blob: bc850208f4b9493cb304524e9f3e0ce770fc4649 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
36#ifdef WIN32
37#include "win32config.h"
38#define XML_DIR_SEP '\\'
39#else
40#include "config.h"
41#define XML_DIR_SEP '/'
42#endif
43
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <libxml/xmlmemory.h>
48#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
57
58#ifdef HAVE_CTYPE_H
59#include <ctype.h>
60#endif
61#ifdef HAVE_STDLIB_H
62#include <stdlib.h>
63#endif
64#ifdef HAVE_SYS_STAT_H
65#include <sys/stat.h>
66#endif
67#ifdef HAVE_FCNTL_H
68#include <fcntl.h>
69#endif
70#ifdef HAVE_UNISTD_H
71#include <unistd.h>
72#endif
73#ifdef HAVE_ZLIB_H
74#include <zlib.h>
75#endif
76
77
Daniel Veillard21a0f912001-02-25 19:54:14 +000078#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000079#define XML_PARSER_BUFFER_SIZE 100
80
81/*
82 * Various global defaults for parsing
83 */
84int xmlGetWarningsDefaultValue = 1;
85int xmlParserDebugEntities = 0;
86#ifdef VMS
87int xmlSubstituteEntitiesDefaultVal = 0;
88#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
89int xmlDoValidityCheckingDefaultVal = 0;
90#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
91#else
92int xmlSubstituteEntitiesDefaultValue = 0;
93int xmlDoValidityCheckingDefaultValue = 0;
94#endif
95int xmlLoadExtDtdDefaultValue = 0;
96int xmlPedanticParserDefaultValue = 0;
97int xmlKeepBlanksDefaultValue = 1;
98
99/*
100 * List of XML prefixed PI allowed by W3C specs
101 */
102
103const char *xmlW3CPIs[] = {
104 "xml-stylesheet",
105 NULL
106};
107
108/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
109void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
113
114/************************************************************************
115 * *
116 * Parser stacks related functions and macros *
117 * *
118 ************************************************************************/
119
120xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
121 const xmlChar ** str);
122
123/*
124 * Generic function for accessing stacks in the Parser Context
125 */
126
127#define PUSH_AND_POP(scope, type, name) \
128scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
129 if (ctxt->name##Nr >= ctxt->name##Max) { \
130 ctxt->name##Max *= 2; \
131 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
132 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
133 if (ctxt->name##Tab == NULL) { \
134 xmlGenericError(xmlGenericErrorContext, \
135 "realloc failed !\n"); \
136 return(0); \
137 } \
138 } \
139 ctxt->name##Tab[ctxt->name##Nr] = value; \
140 ctxt->name = value; \
141 return(ctxt->name##Nr++); \
142} \
143scope type name##Pop(xmlParserCtxtPtr ctxt) { \
144 type ret; \
145 if (ctxt->name##Nr <= 0) return(0); \
146 ctxt->name##Nr--; \
147 if (ctxt->name##Nr > 0) \
148 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
149 else \
150 ctxt->name = NULL; \
151 ret = ctxt->name##Tab[ctxt->name##Nr]; \
152 ctxt->name##Tab[ctxt->name##Nr] = 0; \
153 return(ret); \
154} \
155
156/*
157 * Those macros actually generate the functions
158 */
159PUSH_AND_POP(extern, xmlParserInputPtr, input)
160PUSH_AND_POP(extern, xmlNodePtr, node)
161PUSH_AND_POP(extern, xmlChar*, name)
162
163int spacePush(xmlParserCtxtPtr ctxt, int val) {
164 if (ctxt->spaceNr >= ctxt->spaceMax) {
165 ctxt->spaceMax *= 2;
166 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
167 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
168 if (ctxt->spaceTab == NULL) {
169 xmlGenericError(xmlGenericErrorContext,
170 "realloc failed !\n");
171 return(0);
172 }
173 }
174 ctxt->spaceTab[ctxt->spaceNr] = val;
175 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
176 return(ctxt->spaceNr++);
177}
178
179int spacePop(xmlParserCtxtPtr ctxt) {
180 int ret;
181 if (ctxt->spaceNr <= 0) return(0);
182 ctxt->spaceNr--;
183 if (ctxt->spaceNr > 0)
184 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
185 else
186 ctxt->space = NULL;
187 ret = ctxt->spaceTab[ctxt->spaceNr];
188 ctxt->spaceTab[ctxt->spaceNr] = -1;
189 return(ret);
190}
191
192/*
193 * Macros for accessing the content. Those should be used only by the parser,
194 * and not exported.
195 *
196 * Dirty macros, i.e. one often need to make assumption on the context to
197 * use them
198 *
199 * CUR_PTR return the current pointer to the xmlChar to be parsed.
200 * To be used with extreme caution since operations consuming
201 * characters may move the input buffer to a different location !
202 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
203 * This should be used internally by the parser
204 * only to compare to ASCII values otherwise it would break when
205 * running with UTF-8 encoding.
206 * RAW same as CUR but in the input buffer, bypass any token
207 * extraction that may have been done
208 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
209 * to compare on ASCII based substring.
210 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
211 * strings within the parser.
212 *
213 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
214 *
215 * NEXT Skip to the next character, this does the proper decoding
216 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
217 * NEXTL(l) Skip l xmlChars in the input buffer
218 * CUR_CHAR(l) returns the current unicode character (int), set l
219 * to the number of xmlChars used for the encoding [0-5].
220 * CUR_SCHAR same but operate on a string instead of the context
221 * COPY_BUF copy the current unicode char to the target buffer, increment
222 * the index
223 * GROW, SHRINK handling of input buffers
224 */
225
226#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
227#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
228#define NXT(val) ctxt->input->cur[(val)]
229#define CUR_PTR ctxt->input->cur
230
231#define SKIP(val) do { \
232 ctxt->nbChars += (val),ctxt->input->cur += (val); \
233 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000234 if ((*ctxt->input->cur == 0) && \
235 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
236 xmlPopInput(ctxt); \
237 } while (0)
238
Daniel Veillard48b2f892001-02-25 16:11:03 +0000239#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000240 xmlParserInputShrink(ctxt->input); \
241 if ((*ctxt->input->cur == 0) && \
242 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
243 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000244 }
Owen Taylor3473f882001-02-23 17:55:21 +0000245
Daniel Veillard48b2f892001-02-25 16:11:03 +0000246#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000247 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
248 if ((*ctxt->input->cur == 0) && \
249 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
250 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000251 }
Owen Taylor3473f882001-02-23 17:55:21 +0000252
253#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
254
255#define NEXT xmlNextChar(ctxt)
256
Daniel Veillard21a0f912001-02-25 19:54:14 +0000257#define NEXT1 { \
258 ctxt->input->cur++; \
259 ctxt->nbChars++; \
260 if (*ctxt->input->cur == 0) \
261 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
262 }
263
Owen Taylor3473f882001-02-23 17:55:21 +0000264#define NEXTL(l) do { \
265 if (*(ctxt->input->cur) == '\n') { \
266 ctxt->input->line++; ctxt->input->col = 1; \
267 } else ctxt->input->col++; \
268 ctxt->token = 0; ctxt->input->cur += l; \
269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000270 } while (0)
271
272#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
273#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
274
275#define COPY_BUF(l,b,i,v) \
276 if (l == 1) b[i++] = (xmlChar) v; \
277 else i += xmlCopyChar(l,&b[i],v)
278
279/**
280 * xmlSkipBlankChars:
281 * @ctxt: the XML parser context
282 *
283 * skip all blanks character found at that point in the input streams.
284 * It pops up finished entities in the process if allowable at that point.
285 *
286 * Returns the number of space chars skipped
287 */
288
289int
290xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
291 int cur, res = 0;
292
293 /*
294 * It's Okay to use CUR/NEXT here since all the blanks are on
295 * the ASCII range.
296 */
297 do {
298 cur = CUR;
299 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
300 NEXT;
301 cur = CUR;
302 res++;
303 }
304 while ((cur == 0) && (ctxt->inputNr > 1) &&
305 (ctxt->instate != XML_PARSER_COMMENT)) {
306 xmlPopInput(ctxt);
307 cur = CUR;
308 }
309 /*
310 * Need to handle support of entities branching here
311 */
312 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
313 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
314 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
315 return(res);
316}
317
318/************************************************************************
319 * *
320 * Commodity functions to handle entities *
321 * *
322 ************************************************************************/
323
324/**
325 * xmlPopInput:
326 * @ctxt: an XML parser context
327 *
328 * xmlPopInput: the current input pointed by ctxt->input came to an end
329 * pop it and return the next char.
330 *
331 * Returns the current xmlChar in the parser context
332 */
333xmlChar
334xmlPopInput(xmlParserCtxtPtr ctxt) {
335 if (ctxt->inputNr == 1) return(0); /* End of main Input */
336 if (xmlParserDebugEntities)
337 xmlGenericError(xmlGenericErrorContext,
338 "Popping input %d\n", ctxt->inputNr);
339 xmlFreeInputStream(inputPop(ctxt));
340 if ((*ctxt->input->cur == 0) &&
341 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
342 return(xmlPopInput(ctxt));
343 return(CUR);
344}
345
346/**
347 * xmlPushInput:
348 * @ctxt: an XML parser context
349 * @input: an XML parser input fragment (entity, XML fragment ...).
350 *
351 * xmlPushInput: switch to a new input stream which is stacked on top
352 * of the previous one(s).
353 */
354void
355xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
356 if (input == NULL) return;
357
358 if (xmlParserDebugEntities) {
359 if ((ctxt->input != NULL) && (ctxt->input->filename))
360 xmlGenericError(xmlGenericErrorContext,
361 "%s(%d): ", ctxt->input->filename,
362 ctxt->input->line);
363 xmlGenericError(xmlGenericErrorContext,
364 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
365 }
366 inputPush(ctxt, input);
367 GROW;
368}
369
370/**
371 * xmlParseCharRef:
372 * @ctxt: an XML parser context
373 *
374 * parse Reference declarations
375 *
376 * [66] CharRef ::= '&#' [0-9]+ ';' |
377 * '&#x' [0-9a-fA-F]+ ';'
378 *
379 * [ WFC: Legal Character ]
380 * Characters referred to using character references must match the
381 * production for Char.
382 *
383 * Returns the value parsed (as an int), 0 in case of error
384 */
385int
386xmlParseCharRef(xmlParserCtxtPtr ctxt) {
387 int val = 0;
388 int count = 0;
389
390 if (ctxt->token != 0) {
391 val = ctxt->token;
392 ctxt->token = 0;
393 return(val);
394 }
395 /*
396 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
397 */
398 if ((RAW == '&') && (NXT(1) == '#') &&
399 (NXT(2) == 'x')) {
400 SKIP(3);
401 GROW;
402 while (RAW != ';') { /* loop blocked by count */
403 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
404 val = val * 16 + (CUR - '0');
405 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
406 val = val * 16 + (CUR - 'a') + 10;
407 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
408 val = val * 16 + (CUR - 'A') + 10;
409 else {
410 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
412 ctxt->sax->error(ctxt->userData,
413 "xmlParseCharRef: invalid hexadecimal value\n");
414 ctxt->wellFormed = 0;
415 ctxt->disableSAX = 1;
416 val = 0;
417 break;
418 }
419 NEXT;
420 count++;
421 }
422 if (RAW == ';') {
423 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
424 ctxt->nbChars ++;
425 ctxt->input->cur++;
426 }
427 } else if ((RAW == '&') && (NXT(1) == '#')) {
428 SKIP(2);
429 GROW;
430 while (RAW != ';') { /* loop blocked by count */
431 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
432 val = val * 10 + (CUR - '0');
433 else {
434 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
436 ctxt->sax->error(ctxt->userData,
437 "xmlParseCharRef: invalid decimal value\n");
438 ctxt->wellFormed = 0;
439 ctxt->disableSAX = 1;
440 val = 0;
441 break;
442 }
443 NEXT;
444 count++;
445 }
446 if (RAW == ';') {
447 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
448 ctxt->nbChars ++;
449 ctxt->input->cur++;
450 }
451 } else {
452 ctxt->errNo = XML_ERR_INVALID_CHARREF;
453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
454 ctxt->sax->error(ctxt->userData,
455 "xmlParseCharRef: invalid value\n");
456 ctxt->wellFormed = 0;
457 ctxt->disableSAX = 1;
458 }
459
460 /*
461 * [ WFC: Legal Character ]
462 * Characters referred to using character references must match the
463 * production for Char.
464 */
465 if (IS_CHAR(val)) {
466 return(val);
467 } else {
468 ctxt->errNo = XML_ERR_INVALID_CHAR;
469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
470 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
471 val);
472 ctxt->wellFormed = 0;
473 ctxt->disableSAX = 1;
474 }
475 return(0);
476}
477
478/**
479 * xmlParseStringCharRef:
480 * @ctxt: an XML parser context
481 * @str: a pointer to an index in the string
482 *
483 * parse Reference declarations, variant parsing from a string rather
484 * than an an input flow.
485 *
486 * [66] CharRef ::= '&#' [0-9]+ ';' |
487 * '&#x' [0-9a-fA-F]+ ';'
488 *
489 * [ WFC: Legal Character ]
490 * Characters referred to using character references must match the
491 * production for Char.
492 *
493 * Returns the value parsed (as an int), 0 in case of error, str will be
494 * updated to the current value of the index
495 */
496int
497xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
498 const xmlChar *ptr;
499 xmlChar cur;
500 int val = 0;
501
502 if ((str == NULL) || (*str == NULL)) return(0);
503 ptr = *str;
504 cur = *ptr;
505 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
506 ptr += 3;
507 cur = *ptr;
508 while (cur != ';') { /* Non input consuming loop */
509 if ((cur >= '0') && (cur <= '9'))
510 val = val * 16 + (cur - '0');
511 else if ((cur >= 'a') && (cur <= 'f'))
512 val = val * 16 + (cur - 'a') + 10;
513 else if ((cur >= 'A') && (cur <= 'F'))
514 val = val * 16 + (cur - 'A') + 10;
515 else {
516 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
518 ctxt->sax->error(ctxt->userData,
519 "xmlParseStringCharRef: invalid hexadecimal value\n");
520 ctxt->wellFormed = 0;
521 ctxt->disableSAX = 1;
522 val = 0;
523 break;
524 }
525 ptr++;
526 cur = *ptr;
527 }
528 if (cur == ';')
529 ptr++;
530 } else if ((cur == '&') && (ptr[1] == '#')){
531 ptr += 2;
532 cur = *ptr;
533 while (cur != ';') { /* Non input consuming loops */
534 if ((cur >= '0') && (cur <= '9'))
535 val = val * 10 + (cur - '0');
536 else {
537 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
539 ctxt->sax->error(ctxt->userData,
540 "xmlParseStringCharRef: invalid decimal value\n");
541 ctxt->wellFormed = 0;
542 ctxt->disableSAX = 1;
543 val = 0;
544 break;
545 }
546 ptr++;
547 cur = *ptr;
548 }
549 if (cur == ';')
550 ptr++;
551 } else {
552 ctxt->errNo = XML_ERR_INVALID_CHARREF;
553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
554 ctxt->sax->error(ctxt->userData,
555 "xmlParseCharRef: invalid value\n");
556 ctxt->wellFormed = 0;
557 ctxt->disableSAX = 1;
558 return(0);
559 }
560 *str = ptr;
561
562 /*
563 * [ WFC: Legal Character ]
564 * Characters referred to using character references must match the
565 * production for Char.
566 */
567 if (IS_CHAR(val)) {
568 return(val);
569 } else {
570 ctxt->errNo = XML_ERR_INVALID_CHAR;
571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
572 ctxt->sax->error(ctxt->userData,
573 "CharRef: invalid xmlChar value %d\n", val);
574 ctxt->wellFormed = 0;
575 ctxt->disableSAX = 1;
576 }
577 return(0);
578}
579
580/**
581 * xmlParserHandlePEReference:
582 * @ctxt: the parser context
583 *
584 * [69] PEReference ::= '%' Name ';'
585 *
586 * [ WFC: No Recursion ]
587 * A parsed entity must not contain a recursive
588 * reference to itself, either directly or indirectly.
589 *
590 * [ WFC: Entity Declared ]
591 * In a document without any DTD, a document with only an internal DTD
592 * subset which contains no parameter entity references, or a document
593 * with "standalone='yes'", ... ... The declaration of a parameter
594 * entity must precede any reference to it...
595 *
596 * [ VC: Entity Declared ]
597 * In a document with an external subset or external parameter entities
598 * with "standalone='no'", ... ... The declaration of a parameter entity
599 * must precede any reference to it...
600 *
601 * [ WFC: In DTD ]
602 * Parameter-entity references may only appear in the DTD.
603 * NOTE: misleading but this is handled.
604 *
605 * A PEReference may have been detected in the current input stream
606 * the handling is done accordingly to
607 * http://www.w3.org/TR/REC-xml#entproc
608 * i.e.
609 * - Included in literal in entity values
610 * - Included as Paraemeter Entity reference within DTDs
611 */
612void
613xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
614 xmlChar *name;
615 xmlEntityPtr entity = NULL;
616 xmlParserInputPtr input;
617
618 if (ctxt->token != 0) {
619 return;
620 }
621 if (RAW != '%') return;
622 switch(ctxt->instate) {
623 case XML_PARSER_CDATA_SECTION:
624 return;
625 case XML_PARSER_COMMENT:
626 return;
627 case XML_PARSER_START_TAG:
628 return;
629 case XML_PARSER_END_TAG:
630 return;
631 case XML_PARSER_EOF:
632 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
634 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
635 ctxt->wellFormed = 0;
636 ctxt->disableSAX = 1;
637 return;
638 case XML_PARSER_PROLOG:
639 case XML_PARSER_START:
640 case XML_PARSER_MISC:
641 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
643 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
644 ctxt->wellFormed = 0;
645 ctxt->disableSAX = 1;
646 return;
647 case XML_PARSER_ENTITY_DECL:
648 case XML_PARSER_CONTENT:
649 case XML_PARSER_ATTRIBUTE_VALUE:
650 case XML_PARSER_PI:
651 case XML_PARSER_SYSTEM_LITERAL:
652 /* we just ignore it there */
653 return;
654 case XML_PARSER_EPILOG:
655 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
657 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
658 ctxt->wellFormed = 0;
659 ctxt->disableSAX = 1;
660 return;
661 case XML_PARSER_ENTITY_VALUE:
662 /*
663 * NOTE: in the case of entity values, we don't do the
664 * substitution here since we need the literal
665 * entity value to be able to save the internal
666 * subset of the document.
667 * This will be handled by xmlStringDecodeEntities
668 */
669 return;
670 case XML_PARSER_DTD:
671 /*
672 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
673 * In the internal DTD subset, parameter-entity references
674 * can occur only where markup declarations can occur, not
675 * within markup declarations.
676 * In that case this is handled in xmlParseMarkupDecl
677 */
678 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
679 return;
680 break;
681 case XML_PARSER_IGNORE:
682 return;
683 }
684
685 NEXT;
686 name = xmlParseName(ctxt);
687 if (xmlParserDebugEntities)
688 xmlGenericError(xmlGenericErrorContext,
689 "PE Reference: %s\n", name);
690 if (name == NULL) {
691 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
693 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
694 ctxt->wellFormed = 0;
695 ctxt->disableSAX = 1;
696 } else {
697 if (RAW == ';') {
698 NEXT;
699 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
700 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
701 if (entity == NULL) {
702
703 /*
704 * [ WFC: Entity Declared ]
705 * In a document without any DTD, a document with only an
706 * internal DTD subset which contains no parameter entity
707 * references, or a document with "standalone='yes'", ...
708 * ... The declaration of a parameter entity must precede
709 * any reference to it...
710 */
711 if ((ctxt->standalone == 1) ||
712 ((ctxt->hasExternalSubset == 0) &&
713 (ctxt->hasPErefs == 0))) {
714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
715 ctxt->sax->error(ctxt->userData,
716 "PEReference: %%%s; not found\n", name);
717 ctxt->wellFormed = 0;
718 ctxt->disableSAX = 1;
719 } else {
720 /*
721 * [ VC: Entity Declared ]
722 * In a document with an external subset or external
723 * parameter entities with "standalone='no'", ...
724 * ... The declaration of a parameter entity must precede
725 * any reference to it...
726 */
727 if ((!ctxt->disableSAX) &&
728 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
729 ctxt->vctxt.error(ctxt->vctxt.userData,
730 "PEReference: %%%s; not found\n", name);
731 } else if ((!ctxt->disableSAX) &&
732 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
733 ctxt->sax->warning(ctxt->userData,
734 "PEReference: %%%s; not found\n", name);
735 ctxt->valid = 0;
736 }
737 } else {
738 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
739 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
740 /*
741 * handle the extra spaces added before and after
742 * c.f. http://www.w3.org/TR/REC-xml#as-PE
743 * this is done independantly.
744 */
745 input = xmlNewEntityInputStream(ctxt, entity);
746 xmlPushInput(ctxt, input);
747 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
748 (RAW == '<') && (NXT(1) == '?') &&
749 (NXT(2) == 'x') && (NXT(3) == 'm') &&
750 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
751 xmlParseTextDecl(ctxt);
752 }
753 if (ctxt->token == 0)
754 ctxt->token = ' ';
755 } else {
756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
757 ctxt->sax->error(ctxt->userData,
758 "xmlHandlePEReference: %s is not a parameter entity\n",
759 name);
760 ctxt->wellFormed = 0;
761 ctxt->disableSAX = 1;
762 }
763 }
764 } else {
765 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
767 ctxt->sax->error(ctxt->userData,
768 "xmlHandlePEReference: expecting ';'\n");
769 ctxt->wellFormed = 0;
770 ctxt->disableSAX = 1;
771 }
772 xmlFree(name);
773 }
774}
775
776/*
777 * Macro used to grow the current buffer.
778 */
779#define growBuffer(buffer) { \
780 buffer##_size *= 2; \
781 buffer = (xmlChar *) \
782 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
783 if (buffer == NULL) { \
784 perror("realloc failed"); \
785 return(NULL); \
786 } \
787}
788
789/**
790 * xmlStringDecodeEntities:
791 * @ctxt: the parser context
792 * @str: the input string
793 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
794 * @end: an end marker xmlChar, 0 if none
795 * @end2: an end marker xmlChar, 0 if none
796 * @end3: an end marker xmlChar, 0 if none
797 *
798 * Takes a entity string content and process to do the adequate subtitutions.
799 *
800 * [67] Reference ::= EntityRef | CharRef
801 *
802 * [69] PEReference ::= '%' Name ';'
803 *
804 * Returns A newly allocated string with the substitution done. The caller
805 * must deallocate it !
806 */
807xmlChar *
808xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
809 xmlChar end, xmlChar end2, xmlChar end3) {
810 xmlChar *buffer = NULL;
811 int buffer_size = 0;
812
813 xmlChar *current = NULL;
814 xmlEntityPtr ent;
815 int c,l;
816 int nbchars = 0;
817
818 if (str == NULL)
819 return(NULL);
820
821 if (ctxt->depth > 40) {
822 ctxt->errNo = XML_ERR_ENTITY_LOOP;
823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
824 ctxt->sax->error(ctxt->userData,
825 "Detected entity reference loop\n");
826 ctxt->wellFormed = 0;
827 ctxt->disableSAX = 1;
828 return(NULL);
829 }
830
831 /*
832 * allocate a translation buffer.
833 */
834 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
835 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
836 if (buffer == NULL) {
837 perror("xmlDecodeEntities: malloc failed");
838 return(NULL);
839 }
840
841 /*
842 * Ok loop until we reach one of the ending char or a size limit.
843 * we are operating on already parsed values.
844 */
845 c = CUR_SCHAR(str, l);
846 while ((c != 0) && (c != end) && /* non input consuming loop */
847 (c != end2) && (c != end3)) {
848
849 if (c == 0) break;
850 if ((c == '&') && (str[1] == '#')) {
851 int val = xmlParseStringCharRef(ctxt, &str);
852 if (val != 0) {
853 COPY_BUF(0,buffer,nbchars,val);
854 }
855 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
856 if (xmlParserDebugEntities)
857 xmlGenericError(xmlGenericErrorContext,
858 "String decoding Entity Reference: %.30s\n",
859 str);
860 ent = xmlParseStringEntityRef(ctxt, &str);
861 if ((ent != NULL) &&
862 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
863 if (ent->content != NULL) {
864 COPY_BUF(0,buffer,nbchars,ent->content[0]);
865 } else {
866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
867 ctxt->sax->error(ctxt->userData,
868 "internal error entity has no content\n");
869 }
870 } else if ((ent != NULL) && (ent->content != NULL)) {
871 xmlChar *rep;
872
873 ctxt->depth++;
874 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
875 0, 0, 0);
876 ctxt->depth--;
877 if (rep != NULL) {
878 current = rep;
879 while (*current != 0) { /* non input consuming loop */
880 buffer[nbchars++] = *current++;
881 if (nbchars >
882 buffer_size - XML_PARSER_BUFFER_SIZE) {
883 growBuffer(buffer);
884 }
885 }
886 xmlFree(rep);
887 }
888 } else if (ent != NULL) {
889 int i = xmlStrlen(ent->name);
890 const xmlChar *cur = ent->name;
891
892 buffer[nbchars++] = '&';
893 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
894 growBuffer(buffer);
895 }
896 for (;i > 0;i--)
897 buffer[nbchars++] = *cur++;
898 buffer[nbchars++] = ';';
899 }
900 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
901 if (xmlParserDebugEntities)
902 xmlGenericError(xmlGenericErrorContext,
903 "String decoding PE Reference: %.30s\n", str);
904 ent = xmlParseStringPEReference(ctxt, &str);
905 if (ent != NULL) {
906 xmlChar *rep;
907
908 ctxt->depth++;
909 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
910 0, 0, 0);
911 ctxt->depth--;
912 if (rep != NULL) {
913 current = rep;
914 while (*current != 0) { /* non input consuming loop */
915 buffer[nbchars++] = *current++;
916 if (nbchars >
917 buffer_size - XML_PARSER_BUFFER_SIZE) {
918 growBuffer(buffer);
919 }
920 }
921 xmlFree(rep);
922 }
923 }
924 } else {
925 COPY_BUF(l,buffer,nbchars,c);
926 str += l;
927 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
928 growBuffer(buffer);
929 }
930 }
931 c = CUR_SCHAR(str, l);
932 }
933 buffer[nbchars++] = 0;
934 return(buffer);
935}
936
937
938/************************************************************************
939 * *
940 * Commodity functions to handle xmlChars *
941 * *
942 ************************************************************************/
943
944/**
945 * xmlStrndup:
946 * @cur: the input xmlChar *
947 * @len: the len of @cur
948 *
949 * a strndup for array of xmlChar's
950 *
951 * Returns a new xmlChar * or NULL
952 */
953xmlChar *
954xmlStrndup(const xmlChar *cur, int len) {
955 xmlChar *ret;
956
957 if ((cur == NULL) || (len < 0)) return(NULL);
958 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
959 if (ret == NULL) {
960 xmlGenericError(xmlGenericErrorContext,
961 "malloc of %ld byte failed\n",
962 (len + 1) * (long)sizeof(xmlChar));
963 return(NULL);
964 }
965 memcpy(ret, cur, len * sizeof(xmlChar));
966 ret[len] = 0;
967 return(ret);
968}
969
970/**
971 * xmlStrdup:
972 * @cur: the input xmlChar *
973 *
974 * a strdup for array of xmlChar's. Since they are supposed to be
975 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
976 * a termination mark of '0'.
977 *
978 * Returns a new xmlChar * or NULL
979 */
980xmlChar *
981xmlStrdup(const xmlChar *cur) {
982 const xmlChar *p = cur;
983
984 if (cur == NULL) return(NULL);
985 while (*p != 0) p++; /* non input consuming */
986 return(xmlStrndup(cur, p - cur));
987}
988
989/**
990 * xmlCharStrndup:
991 * @cur: the input char *
992 * @len: the len of @cur
993 *
994 * a strndup for char's to xmlChar's
995 *
996 * Returns a new xmlChar * or NULL
997 */
998
999xmlChar *
1000xmlCharStrndup(const char *cur, int len) {
1001 int i;
1002 xmlChar *ret;
1003
1004 if ((cur == NULL) || (len < 0)) return(NULL);
1005 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1006 if (ret == NULL) {
1007 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1008 (len + 1) * (long)sizeof(xmlChar));
1009 return(NULL);
1010 }
1011 for (i = 0;i < len;i++)
1012 ret[i] = (xmlChar) cur[i];
1013 ret[len] = 0;
1014 return(ret);
1015}
1016
1017/**
1018 * xmlCharStrdup:
1019 * @cur: the input char *
1020 * @len: the len of @cur
1021 *
1022 * a strdup for char's to xmlChar's
1023 *
1024 * Returns a new xmlChar * or NULL
1025 */
1026
1027xmlChar *
1028xmlCharStrdup(const char *cur) {
1029 const char *p = cur;
1030
1031 if (cur == NULL) return(NULL);
1032 while (*p != '\0') p++; /* non input consuming */
1033 return(xmlCharStrndup(cur, p - cur));
1034}
1035
1036/**
1037 * xmlStrcmp:
1038 * @str1: the first xmlChar *
1039 * @str2: the second xmlChar *
1040 *
1041 * a strcmp for xmlChar's
1042 *
1043 * Returns the integer result of the comparison
1044 */
1045
1046int
1047xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1048 register int tmp;
1049
1050 if (str1 == str2) return(0);
1051 if (str1 == NULL) return(-1);
1052 if (str2 == NULL) return(1);
1053 do {
1054 tmp = *str1++ - *str2;
1055 if (tmp != 0) return(tmp);
1056 } while (*str2++ != 0);
1057 return 0;
1058}
1059
1060/**
1061 * xmlStrEqual:
1062 * @str1: the first xmlChar *
1063 * @str2: the second xmlChar *
1064 *
1065 * Check if both string are equal of have same content
1066 * Should be a bit more readable and faster than xmlStrEqual()
1067 *
1068 * Returns 1 if they are equal, 0 if they are different
1069 */
1070
1071int
1072xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1073 if (str1 == str2) return(1);
1074 if (str1 == NULL) return(0);
1075 if (str2 == NULL) return(0);
1076 do {
1077 if (*str1++ != *str2) return(0);
1078 } while (*str2++);
1079 return(1);
1080}
1081
1082/**
1083 * xmlStrncmp:
1084 * @str1: the first xmlChar *
1085 * @str2: the second xmlChar *
1086 * @len: the max comparison length
1087 *
1088 * a strncmp for xmlChar's
1089 *
1090 * Returns the integer result of the comparison
1091 */
1092
1093int
1094xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1095 register int tmp;
1096
1097 if (len <= 0) return(0);
1098 if (str1 == str2) return(0);
1099 if (str1 == NULL) return(-1);
1100 if (str2 == NULL) return(1);
1101 do {
1102 tmp = *str1++ - *str2;
1103 if (tmp != 0 || --len == 0) return(tmp);
1104 } while (*str2++ != 0);
1105 return 0;
1106}
1107
1108static xmlChar casemap[256] = {
1109 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1110 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1111 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1112 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1113 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1114 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1115 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1116 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1117 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1118 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1119 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1120 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1121 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1122 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1123 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1124 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1125 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1126 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1127 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1128 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1129 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1130 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1131 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1132 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1133 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1134 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1135 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1136 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1137 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1138 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1139 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1140 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1141};
1142
1143/**
1144 * xmlStrcasecmp:
1145 * @str1: the first xmlChar *
1146 * @str2: the second xmlChar *
1147 *
1148 * a strcasecmp for xmlChar's
1149 *
1150 * Returns the integer result of the comparison
1151 */
1152
1153int
1154xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1155 register int tmp;
1156
1157 if (str1 == str2) return(0);
1158 if (str1 == NULL) return(-1);
1159 if (str2 == NULL) return(1);
1160 do {
1161 tmp = casemap[*str1++] - casemap[*str2];
1162 if (tmp != 0) return(tmp);
1163 } while (*str2++ != 0);
1164 return 0;
1165}
1166
1167/**
1168 * xmlStrncasecmp:
1169 * @str1: the first xmlChar *
1170 * @str2: the second xmlChar *
1171 * @len: the max comparison length
1172 *
1173 * a strncasecmp for xmlChar's
1174 *
1175 * Returns the integer result of the comparison
1176 */
1177
1178int
1179xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1180 register int tmp;
1181
1182 if (len <= 0) return(0);
1183 if (str1 == str2) return(0);
1184 if (str1 == NULL) return(-1);
1185 if (str2 == NULL) return(1);
1186 do {
1187 tmp = casemap[*str1++] - casemap[*str2];
1188 if (tmp != 0 || --len == 0) return(tmp);
1189 } while (*str2++ != 0);
1190 return 0;
1191}
1192
1193/**
1194 * xmlStrchr:
1195 * @str: the xmlChar * array
1196 * @val: the xmlChar to search
1197 *
1198 * a strchr for xmlChar's
1199 *
1200 * Returns the xmlChar * for the first occurence or NULL.
1201 */
1202
1203const xmlChar *
1204xmlStrchr(const xmlChar *str, xmlChar val) {
1205 if (str == NULL) return(NULL);
1206 while (*str != 0) { /* non input consuming */
1207 if (*str == val) return((xmlChar *) str);
1208 str++;
1209 }
1210 return(NULL);
1211}
1212
1213/**
1214 * xmlStrstr:
1215 * @str: the xmlChar * array (haystack)
1216 * @val: the xmlChar to search (needle)
1217 *
1218 * a strstr for xmlChar's
1219 *
1220 * Returns the xmlChar * for the first occurence or NULL.
1221 */
1222
1223const xmlChar *
1224xmlStrstr(const xmlChar *str, xmlChar *val) {
1225 int n;
1226
1227 if (str == NULL) return(NULL);
1228 if (val == NULL) return(NULL);
1229 n = xmlStrlen(val);
1230
1231 if (n == 0) return(str);
1232 while (*str != 0) { /* non input consuming */
1233 if (*str == *val) {
1234 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1235 }
1236 str++;
1237 }
1238 return(NULL);
1239}
1240
1241/**
1242 * xmlStrcasestr:
1243 * @str: the xmlChar * array (haystack)
1244 * @val: the xmlChar to search (needle)
1245 *
1246 * a case-ignoring strstr for xmlChar's
1247 *
1248 * Returns the xmlChar * for the first occurence or NULL.
1249 */
1250
1251const xmlChar *
1252xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1253 int n;
1254
1255 if (str == NULL) return(NULL);
1256 if (val == NULL) return(NULL);
1257 n = xmlStrlen(val);
1258
1259 if (n == 0) return(str);
1260 while (*str != 0) { /* non input consuming */
1261 if (casemap[*str] == casemap[*val])
1262 if (!xmlStrncasecmp(str, val, n)) return(str);
1263 str++;
1264 }
1265 return(NULL);
1266}
1267
1268/**
1269 * xmlStrsub:
1270 * @str: the xmlChar * array (haystack)
1271 * @start: the index of the first char (zero based)
1272 * @len: the length of the substring
1273 *
1274 * Extract a substring of a given string
1275 *
1276 * Returns the xmlChar * for the first occurence or NULL.
1277 */
1278
1279xmlChar *
1280xmlStrsub(const xmlChar *str, int start, int len) {
1281 int i;
1282
1283 if (str == NULL) return(NULL);
1284 if (start < 0) return(NULL);
1285 if (len < 0) return(NULL);
1286
1287 for (i = 0;i < start;i++) {
1288 if (*str == 0) return(NULL);
1289 str++;
1290 }
1291 if (*str == 0) return(NULL);
1292 return(xmlStrndup(str, len));
1293}
1294
1295/**
1296 * xmlStrlen:
1297 * @str: the xmlChar * array
1298 *
1299 * length of a xmlChar's string
1300 *
1301 * Returns the number of xmlChar contained in the ARRAY.
1302 */
1303
1304int
1305xmlStrlen(const xmlChar *str) {
1306 int len = 0;
1307
1308 if (str == NULL) return(0);
1309 while (*str != 0) { /* non input consuming */
1310 str++;
1311 len++;
1312 }
1313 return(len);
1314}
1315
1316/**
1317 * xmlStrncat:
1318 * @cur: the original xmlChar * array
1319 * @add: the xmlChar * array added
1320 * @len: the length of @add
1321 *
1322 * a strncat for array of xmlChar's, it will extend cur with the len
1323 * first bytes of @add.
1324 *
1325 * Returns a new xmlChar *, the original @cur is reallocated if needed
1326 * and should not be freed
1327 */
1328
1329xmlChar *
1330xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1331 int size;
1332 xmlChar *ret;
1333
1334 if ((add == NULL) || (len == 0))
1335 return(cur);
1336 if (cur == NULL)
1337 return(xmlStrndup(add, len));
1338
1339 size = xmlStrlen(cur);
1340 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1341 if (ret == NULL) {
1342 xmlGenericError(xmlGenericErrorContext,
1343 "xmlStrncat: realloc of %ld byte failed\n",
1344 (size + len + 1) * (long)sizeof(xmlChar));
1345 return(cur);
1346 }
1347 memcpy(&ret[size], add, len * sizeof(xmlChar));
1348 ret[size + len] = 0;
1349 return(ret);
1350}
1351
1352/**
1353 * xmlStrcat:
1354 * @cur: the original xmlChar * array
1355 * @add: the xmlChar * array added
1356 *
1357 * a strcat for array of xmlChar's. Since they are supposed to be
1358 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1359 * a termination mark of '0'.
1360 *
1361 * Returns a new xmlChar * containing the concatenated string.
1362 */
1363xmlChar *
1364xmlStrcat(xmlChar *cur, const xmlChar *add) {
1365 const xmlChar *p = add;
1366
1367 if (add == NULL) return(cur);
1368 if (cur == NULL)
1369 return(xmlStrdup(add));
1370
1371 while (*p != 0) p++; /* non input consuming */
1372 return(xmlStrncat(cur, add, p - add));
1373}
1374
1375/************************************************************************
1376 * *
1377 * Commodity functions, cleanup needed ? *
1378 * *
1379 ************************************************************************/
1380
1381/**
1382 * areBlanks:
1383 * @ctxt: an XML parser context
1384 * @str: a xmlChar *
1385 * @len: the size of @str
1386 *
1387 * Is this a sequence of blank chars that one can ignore ?
1388 *
1389 * Returns 1 if ignorable 0 otherwise.
1390 */
1391
1392static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1393 int i, ret;
1394 xmlNodePtr lastChild;
1395
Daniel Veillard2f362242001-03-02 17:36:21 +00001396 if (ctxt->keepBlanks)
1397 return(0);
1398
Owen Taylor3473f882001-02-23 17:55:21 +00001399 /*
1400 * Check for xml:space value.
1401 */
1402 if (*(ctxt->space) == 1)
1403 return(0);
1404
1405 /*
1406 * Check that the string is made of blanks
1407 */
1408 for (i = 0;i < len;i++)
1409 if (!(IS_BLANK(str[i]))) return(0);
1410
1411 /*
1412 * Look if the element is mixed content in the Dtd if available
1413 */
1414 if (ctxt->myDoc != NULL) {
1415 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1416 if (ret == 0) return(1);
1417 if (ret == 1) return(0);
1418 }
1419
1420 /*
1421 * Otherwise, heuristic :-\
1422 */
Owen Taylor3473f882001-02-23 17:55:21 +00001423 if (RAW != '<') return(0);
1424 if (ctxt->node == NULL) return(0);
1425 if ((ctxt->node->children == NULL) &&
1426 (RAW == '<') && (NXT(1) == '/')) return(0);
1427
1428 lastChild = xmlGetLastChild(ctxt->node);
1429 if (lastChild == NULL) {
1430 if (ctxt->node->content != NULL) return(0);
1431 } else if (xmlNodeIsText(lastChild))
1432 return(0);
1433 else if ((ctxt->node->children != NULL) &&
1434 (xmlNodeIsText(ctxt->node->children)))
1435 return(0);
1436 return(1);
1437}
1438
1439/*
1440 * Forward definition for recusive behaviour.
1441 */
1442void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1443void xmlParseReference(xmlParserCtxtPtr ctxt);
1444
1445/************************************************************************
1446 * *
1447 * Extra stuff for namespace support *
1448 * Relates to http://www.w3.org/TR/WD-xml-names *
1449 * *
1450 ************************************************************************/
1451
1452/**
1453 * xmlSplitQName:
1454 * @ctxt: an XML parser context
1455 * @name: an XML parser context
1456 * @prefix: a xmlChar **
1457 *
1458 * parse an UTF8 encoded XML qualified name string
1459 *
1460 * [NS 5] QName ::= (Prefix ':')? LocalPart
1461 *
1462 * [NS 6] Prefix ::= NCName
1463 *
1464 * [NS 7] LocalPart ::= NCName
1465 *
1466 * Returns the local part, and prefix is updated
1467 * to get the Prefix if any.
1468 */
1469
1470xmlChar *
1471xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1472 xmlChar buf[XML_MAX_NAMELEN + 5];
1473 xmlChar *buffer = NULL;
1474 int len = 0;
1475 int max = XML_MAX_NAMELEN;
1476 xmlChar *ret = NULL;
1477 const xmlChar *cur = name;
1478 int c;
1479
1480 *prefix = NULL;
1481
1482#ifndef XML_XML_NAMESPACE
1483 /* xml: prefix is not really a namespace */
1484 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1485 (cur[2] == 'l') && (cur[3] == ':'))
1486 return(xmlStrdup(name));
1487#endif
1488
1489 /* nasty but valid */
1490 if (cur[0] == ':')
1491 return(xmlStrdup(name));
1492
1493 c = *cur++;
1494 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1495 buf[len++] = c;
1496 c = *cur++;
1497 }
1498 if (len >= max) {
1499 /*
1500 * Okay someone managed to make a huge name, so he's ready to pay
1501 * for the processing speed.
1502 */
1503 max = len * 2;
1504
1505 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1506 if (buffer == NULL) {
1507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1508 ctxt->sax->error(ctxt->userData,
1509 "xmlSplitQName: out of memory\n");
1510 return(NULL);
1511 }
1512 memcpy(buffer, buf, len);
1513 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1514 if (len + 10 > max) {
1515 max *= 2;
1516 buffer = (xmlChar *) xmlRealloc(buffer,
1517 max * sizeof(xmlChar));
1518 if (buffer == NULL) {
1519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1520 ctxt->sax->error(ctxt->userData,
1521 "xmlSplitQName: out of memory\n");
1522 return(NULL);
1523 }
1524 }
1525 buffer[len++] = c;
1526 c = *cur++;
1527 }
1528 buffer[len] = 0;
1529 }
1530
1531 if (buffer == NULL)
1532 ret = xmlStrndup(buf, len);
1533 else {
1534 ret = buffer;
1535 buffer = NULL;
1536 max = XML_MAX_NAMELEN;
1537 }
1538
1539
1540 if (c == ':') {
1541 c = *cur++;
1542 if (c == 0) return(ret);
1543 *prefix = ret;
1544 len = 0;
1545
1546 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1547 buf[len++] = c;
1548 c = *cur++;
1549 }
1550 if (len >= max) {
1551 /*
1552 * Okay someone managed to make a huge name, so he's ready to pay
1553 * for the processing speed.
1554 */
1555 max = len * 2;
1556
1557 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1558 if (buffer == NULL) {
1559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1560 ctxt->sax->error(ctxt->userData,
1561 "xmlSplitQName: out of memory\n");
1562 return(NULL);
1563 }
1564 memcpy(buffer, buf, len);
1565 while (c != 0) { /* tested bigname2.xml */
1566 if (len + 10 > max) {
1567 max *= 2;
1568 buffer = (xmlChar *) xmlRealloc(buffer,
1569 max * sizeof(xmlChar));
1570 if (buffer == NULL) {
1571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1572 ctxt->sax->error(ctxt->userData,
1573 "xmlSplitQName: out of memory\n");
1574 return(NULL);
1575 }
1576 }
1577 buffer[len++] = c;
1578 c = *cur++;
1579 }
1580 buffer[len] = 0;
1581 }
1582
1583 if (buffer == NULL)
1584 ret = xmlStrndup(buf, len);
1585 else {
1586 ret = buffer;
1587 }
1588 }
1589
1590 return(ret);
1591}
1592
1593/************************************************************************
1594 * *
1595 * The parser itself *
1596 * Relates to http://www.w3.org/TR/REC-xml *
1597 * *
1598 ************************************************************************/
1599
Daniel Veillard21a0f912001-02-25 19:54:14 +00001600xmlChar *xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001601/**
1602 * xmlParseName:
1603 * @ctxt: an XML parser context
1604 *
1605 * parse an XML name.
1606 *
1607 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1608 * CombiningChar | Extender
1609 *
1610 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1611 *
1612 * [6] Names ::= Name (S Name)*
1613 *
1614 * Returns the Name parsed or NULL
1615 */
1616
1617xmlChar *
1618xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001619 const xmlChar *in;
1620 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001621 int count = 0;
1622
1623 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001624
1625 /*
1626 * Accelerator for simple ASCII names
1627 */
1628 in = ctxt->input->cur;
1629 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1630 ((*in >= 0x41) && (*in <= 0x5A)) ||
1631 (*in == '_') || (*in == ':')) {
1632 in++;
1633 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1634 ((*in >= 0x41) && (*in <= 0x5A)) ||
1635 ((*in >= 0x30) && (*in <= 0x39)) ||
1636 (*in == '_') || (*in == ':'))
1637 in++;
1638 if ((*in == ' ') || (*in == '>') || (*in == '/')) {
1639 count = in - ctxt->input->cur;
1640 ret = xmlStrndup(ctxt->input->cur, count);
1641 ctxt->input->cur = in;
1642 return(ret);
1643 }
1644 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001645 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001646}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001647
Daniel Veillard21a0f912001-02-25 19:54:14 +00001648xmlChar *
1649xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1650 xmlChar buf[XML_MAX_NAMELEN + 5];
1651 int len = 0, l;
1652 int c;
1653 int count = 0;
1654
1655 /*
1656 * Handler for more complex cases
1657 */
1658 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001659 c = CUR_CHAR(l);
1660 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1661 (!IS_LETTER(c) && (c != '_') &&
1662 (c != ':'))) {
1663 return(NULL);
1664 }
1665
1666 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1667 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1668 (c == '.') || (c == '-') ||
1669 (c == '_') || (c == ':') ||
1670 (IS_COMBINING(c)) ||
1671 (IS_EXTENDER(c)))) {
1672 if (count++ > 100) {
1673 count = 0;
1674 GROW;
1675 }
1676 COPY_BUF(l,buf,len,c);
1677 NEXTL(l);
1678 c = CUR_CHAR(l);
1679 if (len >= XML_MAX_NAMELEN) {
1680 /*
1681 * Okay someone managed to make a huge name, so he's ready to pay
1682 * for the processing speed.
1683 */
1684 xmlChar *buffer;
1685 int max = len * 2;
1686
1687 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1688 if (buffer == NULL) {
1689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1690 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001691 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001692 return(NULL);
1693 }
1694 memcpy(buffer, buf, len);
1695 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1696 (c == '.') || (c == '-') ||
1697 (c == '_') || (c == ':') ||
1698 (IS_COMBINING(c)) ||
1699 (IS_EXTENDER(c))) {
1700 if (count++ > 100) {
1701 count = 0;
1702 GROW;
1703 }
1704 if (len + 10 > max) {
1705 max *= 2;
1706 buffer = (xmlChar *) xmlRealloc(buffer,
1707 max * sizeof(xmlChar));
1708 if (buffer == NULL) {
1709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1710 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001711 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001712 return(NULL);
1713 }
1714 }
1715 COPY_BUF(l,buffer,len,c);
1716 NEXTL(l);
1717 c = CUR_CHAR(l);
1718 }
1719 buffer[len] = 0;
1720 return(buffer);
1721 }
1722 }
1723 return(xmlStrndup(buf, len));
1724}
1725
1726/**
1727 * xmlParseStringName:
1728 * @ctxt: an XML parser context
1729 * @str: a pointer to the string pointer (IN/OUT)
1730 *
1731 * parse an XML name.
1732 *
1733 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1734 * CombiningChar | Extender
1735 *
1736 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1737 *
1738 * [6] Names ::= Name (S Name)*
1739 *
1740 * Returns the Name parsed or NULL. The str pointer
1741 * is updated to the current location in the string.
1742 */
1743
1744xmlChar *
1745xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1746 xmlChar buf[XML_MAX_NAMELEN + 5];
1747 const xmlChar *cur = *str;
1748 int len = 0, l;
1749 int c;
1750
1751 c = CUR_SCHAR(cur, l);
1752 if (!IS_LETTER(c) && (c != '_') &&
1753 (c != ':')) {
1754 return(NULL);
1755 }
1756
1757 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1758 (c == '.') || (c == '-') ||
1759 (c == '_') || (c == ':') ||
1760 (IS_COMBINING(c)) ||
1761 (IS_EXTENDER(c))) {
1762 COPY_BUF(l,buf,len,c);
1763 cur += l;
1764 c = CUR_SCHAR(cur, l);
1765 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1766 /*
1767 * Okay someone managed to make a huge name, so he's ready to pay
1768 * for the processing speed.
1769 */
1770 xmlChar *buffer;
1771 int max = len * 2;
1772
1773 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1774 if (buffer == NULL) {
1775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1776 ctxt->sax->error(ctxt->userData,
1777 "xmlParseStringName: out of memory\n");
1778 return(NULL);
1779 }
1780 memcpy(buffer, buf, len);
1781 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1782 (c == '.') || (c == '-') ||
1783 (c == '_') || (c == ':') ||
1784 (IS_COMBINING(c)) ||
1785 (IS_EXTENDER(c))) {
1786 if (len + 10 > max) {
1787 max *= 2;
1788 buffer = (xmlChar *) xmlRealloc(buffer,
1789 max * sizeof(xmlChar));
1790 if (buffer == NULL) {
1791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1792 ctxt->sax->error(ctxt->userData,
1793 "xmlParseStringName: out of memory\n");
1794 return(NULL);
1795 }
1796 }
1797 COPY_BUF(l,buffer,len,c);
1798 cur += l;
1799 c = CUR_SCHAR(cur, l);
1800 }
1801 buffer[len] = 0;
1802 *str = cur;
1803 return(buffer);
1804 }
1805 }
1806 *str = cur;
1807 return(xmlStrndup(buf, len));
1808}
1809
1810/**
1811 * xmlParseNmtoken:
1812 * @ctxt: an XML parser context
1813 *
1814 * parse an XML Nmtoken.
1815 *
1816 * [7] Nmtoken ::= (NameChar)+
1817 *
1818 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1819 *
1820 * Returns the Nmtoken parsed or NULL
1821 */
1822
1823xmlChar *
1824xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1825 xmlChar buf[XML_MAX_NAMELEN + 5];
1826 int len = 0, l;
1827 int c;
1828 int count = 0;
1829
1830 GROW;
1831 c = CUR_CHAR(l);
1832
1833 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1834 (c == '.') || (c == '-') ||
1835 (c == '_') || (c == ':') ||
1836 (IS_COMBINING(c)) ||
1837 (IS_EXTENDER(c))) {
1838 if (count++ > 100) {
1839 count = 0;
1840 GROW;
1841 }
1842 COPY_BUF(l,buf,len,c);
1843 NEXTL(l);
1844 c = CUR_CHAR(l);
1845 if (len >= XML_MAX_NAMELEN) {
1846 /*
1847 * Okay someone managed to make a huge token, so he's ready to pay
1848 * for the processing speed.
1849 */
1850 xmlChar *buffer;
1851 int max = len * 2;
1852
1853 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1854 if (buffer == NULL) {
1855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1856 ctxt->sax->error(ctxt->userData,
1857 "xmlParseNmtoken: out of memory\n");
1858 return(NULL);
1859 }
1860 memcpy(buffer, buf, len);
1861 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1862 (c == '.') || (c == '-') ||
1863 (c == '_') || (c == ':') ||
1864 (IS_COMBINING(c)) ||
1865 (IS_EXTENDER(c))) {
1866 if (count++ > 100) {
1867 count = 0;
1868 GROW;
1869 }
1870 if (len + 10 > max) {
1871 max *= 2;
1872 buffer = (xmlChar *) xmlRealloc(buffer,
1873 max * sizeof(xmlChar));
1874 if (buffer == NULL) {
1875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1876 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001877 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001878 return(NULL);
1879 }
1880 }
1881 COPY_BUF(l,buffer,len,c);
1882 NEXTL(l);
1883 c = CUR_CHAR(l);
1884 }
1885 buffer[len] = 0;
1886 return(buffer);
1887 }
1888 }
1889 if (len == 0)
1890 return(NULL);
1891 return(xmlStrndup(buf, len));
1892}
1893
1894/**
1895 * xmlParseEntityValue:
1896 * @ctxt: an XML parser context
1897 * @orig: if non-NULL store a copy of the original entity value
1898 *
1899 * parse a value for ENTITY declarations
1900 *
1901 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1902 * "'" ([^%&'] | PEReference | Reference)* "'"
1903 *
1904 * Returns the EntityValue parsed with reference substitued or NULL
1905 */
1906
1907xmlChar *
1908xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1909 xmlChar *buf = NULL;
1910 int len = 0;
1911 int size = XML_PARSER_BUFFER_SIZE;
1912 int c, l;
1913 xmlChar stop;
1914 xmlChar *ret = NULL;
1915 const xmlChar *cur = NULL;
1916 xmlParserInputPtr input;
1917
1918 if (RAW == '"') stop = '"';
1919 else if (RAW == '\'') stop = '\'';
1920 else {
1921 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1923 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1924 ctxt->wellFormed = 0;
1925 ctxt->disableSAX = 1;
1926 return(NULL);
1927 }
1928 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1929 if (buf == NULL) {
1930 xmlGenericError(xmlGenericErrorContext,
1931 "malloc of %d byte failed\n", size);
1932 return(NULL);
1933 }
1934
1935 /*
1936 * The content of the entity definition is copied in a buffer.
1937 */
1938
1939 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1940 input = ctxt->input;
1941 GROW;
1942 NEXT;
1943 c = CUR_CHAR(l);
1944 /*
1945 * NOTE: 4.4.5 Included in Literal
1946 * When a parameter entity reference appears in a literal entity
1947 * value, ... a single or double quote character in the replacement
1948 * text is always treated as a normal data character and will not
1949 * terminate the literal.
1950 * In practice it means we stop the loop only when back at parsing
1951 * the initial entity and the quote is found
1952 */
1953 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1954 (ctxt->input != input))) {
1955 if (len + 5 >= size) {
1956 size *= 2;
1957 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1958 if (buf == NULL) {
1959 xmlGenericError(xmlGenericErrorContext,
1960 "realloc of %d byte failed\n", size);
1961 return(NULL);
1962 }
1963 }
1964 COPY_BUF(l,buf,len,c);
1965 NEXTL(l);
1966 /*
1967 * Pop-up of finished entities.
1968 */
1969 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1970 xmlPopInput(ctxt);
1971
1972 GROW;
1973 c = CUR_CHAR(l);
1974 if (c == 0) {
1975 GROW;
1976 c = CUR_CHAR(l);
1977 }
1978 }
1979 buf[len] = 0;
1980
1981 /*
1982 * Raise problem w.r.t. '&' and '%' being used in non-entities
1983 * reference constructs. Note Charref will be handled in
1984 * xmlStringDecodeEntities()
1985 */
1986 cur = buf;
1987 while (*cur != 0) { /* non input consuming */
1988 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1989 xmlChar *name;
1990 xmlChar tmp = *cur;
1991
1992 cur++;
1993 name = xmlParseStringName(ctxt, &cur);
1994 if ((name == NULL) || (*cur != ';')) {
1995 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1997 ctxt->sax->error(ctxt->userData,
1998 "EntityValue: '%c' forbidden except for entities references\n",
1999 tmp);
2000 ctxt->wellFormed = 0;
2001 ctxt->disableSAX = 1;
2002 }
2003 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2004 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2006 ctxt->sax->error(ctxt->userData,
2007 "EntityValue: PEReferences forbidden in internal subset\n",
2008 tmp);
2009 ctxt->wellFormed = 0;
2010 ctxt->disableSAX = 1;
2011 }
2012 if (name != NULL)
2013 xmlFree(name);
2014 }
2015 cur++;
2016 }
2017
2018 /*
2019 * Then PEReference entities are substituted.
2020 */
2021 if (c != stop) {
2022 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2024 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2025 ctxt->wellFormed = 0;
2026 ctxt->disableSAX = 1;
2027 xmlFree(buf);
2028 } else {
2029 NEXT;
2030 /*
2031 * NOTE: 4.4.7 Bypassed
2032 * When a general entity reference appears in the EntityValue in
2033 * an entity declaration, it is bypassed and left as is.
2034 * so XML_SUBSTITUTE_REF is not set here.
2035 */
2036 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2037 0, 0, 0);
2038 if (orig != NULL)
2039 *orig = buf;
2040 else
2041 xmlFree(buf);
2042 }
2043
2044 return(ret);
2045}
2046
2047/**
2048 * xmlParseAttValue:
2049 * @ctxt: an XML parser context
2050 *
2051 * parse a value for an attribute
2052 * Note: the parser won't do substitution of entities here, this
2053 * will be handled later in xmlStringGetNodeList
2054 *
2055 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2056 * "'" ([^<&'] | Reference)* "'"
2057 *
2058 * 3.3.3 Attribute-Value Normalization:
2059 * Before the value of an attribute is passed to the application or
2060 * checked for validity, the XML processor must normalize it as follows:
2061 * - a character reference is processed by appending the referenced
2062 * character to the attribute value
2063 * - an entity reference is processed by recursively processing the
2064 * replacement text of the entity
2065 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2066 * appending #x20 to the normalized value, except that only a single
2067 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2068 * parsed entity or the literal entity value of an internal parsed entity
2069 * - other characters are processed by appending them to the normalized value
2070 * If the declared value is not CDATA, then the XML processor must further
2071 * process the normalized attribute value by discarding any leading and
2072 * trailing space (#x20) characters, and by replacing sequences of space
2073 * (#x20) characters by a single space (#x20) character.
2074 * All attributes for which no declaration has been read should be treated
2075 * by a non-validating parser as if declared CDATA.
2076 *
2077 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2078 */
2079
2080xmlChar *
2081xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2082 xmlChar limit = 0;
2083 xmlChar *buf = NULL;
2084 int len = 0;
2085 int buf_size = 0;
2086 int c, l;
2087 xmlChar *current = NULL;
2088 xmlEntityPtr ent;
2089
2090
2091 SHRINK;
2092 if (NXT(0) == '"') {
2093 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2094 limit = '"';
2095 NEXT;
2096 } else if (NXT(0) == '\'') {
2097 limit = '\'';
2098 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2099 NEXT;
2100 } else {
2101 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2103 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2104 ctxt->wellFormed = 0;
2105 ctxt->disableSAX = 1;
2106 return(NULL);
2107 }
2108
2109 /*
2110 * allocate a translation buffer.
2111 */
2112 buf_size = XML_PARSER_BUFFER_SIZE;
2113 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2114 if (buf == NULL) {
2115 perror("xmlParseAttValue: malloc failed");
2116 return(NULL);
2117 }
2118
2119 /*
2120 * Ok loop until we reach one of the ending char or a size limit.
2121 */
2122 c = CUR_CHAR(l);
2123 while (((NXT(0) != limit) && /* checked */
2124 (c != '<')) || (ctxt->token != 0)) {
2125 if (c == 0) break;
2126 if (ctxt->token == '&') {
2127 /*
2128 * The reparsing will be done in xmlStringGetNodeList()
2129 * called by the attribute() function in SAX.c
2130 */
2131 static xmlChar buffer[6] = "&#38;";
2132
2133 if (len > buf_size - 10) {
2134 growBuffer(buf);
2135 }
2136 current = &buffer[0];
2137 while (*current != 0) { /* non input consuming */
2138 buf[len++] = *current++;
2139 }
2140 ctxt->token = 0;
2141 } else if (c == '&') {
2142 if (NXT(1) == '#') {
2143 int val = xmlParseCharRef(ctxt);
2144 if (val == '&') {
2145 /*
2146 * The reparsing will be done in xmlStringGetNodeList()
2147 * called by the attribute() function in SAX.c
2148 */
2149 static xmlChar buffer[6] = "&#38;";
2150
2151 if (len > buf_size - 10) {
2152 growBuffer(buf);
2153 }
2154 current = &buffer[0];
2155 while (*current != 0) { /* non input consuming */
2156 buf[len++] = *current++;
2157 }
2158 } else {
2159 len += xmlCopyChar(0, &buf[len], val);
2160 }
2161 } else {
2162 ent = xmlParseEntityRef(ctxt);
2163 if ((ent != NULL) &&
2164 (ctxt->replaceEntities != 0)) {
2165 xmlChar *rep;
2166
2167 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2168 rep = xmlStringDecodeEntities(ctxt, ent->content,
2169 XML_SUBSTITUTE_REF, 0, 0, 0);
2170 if (rep != NULL) {
2171 current = rep;
2172 while (*current != 0) { /* non input consuming */
2173 buf[len++] = *current++;
2174 if (len > buf_size - 10) {
2175 growBuffer(buf);
2176 }
2177 }
2178 xmlFree(rep);
2179 }
2180 } else {
2181 if (ent->content != NULL)
2182 buf[len++] = ent->content[0];
2183 }
2184 } else if (ent != NULL) {
2185 int i = xmlStrlen(ent->name);
2186 const xmlChar *cur = ent->name;
2187
2188 /*
2189 * This may look absurd but is needed to detect
2190 * entities problems
2191 */
2192 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2193 (ent->content != NULL)) {
2194 xmlChar *rep;
2195 rep = xmlStringDecodeEntities(ctxt, ent->content,
2196 XML_SUBSTITUTE_REF, 0, 0, 0);
2197 if (rep != NULL)
2198 xmlFree(rep);
2199 }
2200
2201 /*
2202 * Just output the reference
2203 */
2204 buf[len++] = '&';
2205 if (len > buf_size - i - 10) {
2206 growBuffer(buf);
2207 }
2208 for (;i > 0;i--)
2209 buf[len++] = *cur++;
2210 buf[len++] = ';';
2211 }
2212 }
2213 } else {
2214 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2215 COPY_BUF(l,buf,len,0x20);
2216 if (len > buf_size - 10) {
2217 growBuffer(buf);
2218 }
2219 } else {
2220 COPY_BUF(l,buf,len,c);
2221 if (len > buf_size - 10) {
2222 growBuffer(buf);
2223 }
2224 }
2225 NEXTL(l);
2226 }
2227 GROW;
2228 c = CUR_CHAR(l);
2229 }
2230 buf[len++] = 0;
2231 if (RAW == '<') {
2232 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2234 ctxt->sax->error(ctxt->userData,
2235 "Unescaped '<' not allowed in attributes values\n");
2236 ctxt->wellFormed = 0;
2237 ctxt->disableSAX = 1;
2238 } else if (RAW != limit) {
2239 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2241 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2242 ctxt->wellFormed = 0;
2243 ctxt->disableSAX = 1;
2244 } else
2245 NEXT;
2246 return(buf);
2247}
2248
2249/**
2250 * xmlParseSystemLiteral:
2251 * @ctxt: an XML parser context
2252 *
2253 * parse an XML Literal
2254 *
2255 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2256 *
2257 * Returns the SystemLiteral parsed or NULL
2258 */
2259
2260xmlChar *
2261xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2262 xmlChar *buf = NULL;
2263 int len = 0;
2264 int size = XML_PARSER_BUFFER_SIZE;
2265 int cur, l;
2266 xmlChar stop;
2267 int state = ctxt->instate;
2268 int count = 0;
2269
2270 SHRINK;
2271 if (RAW == '"') {
2272 NEXT;
2273 stop = '"';
2274 } else if (RAW == '\'') {
2275 NEXT;
2276 stop = '\'';
2277 } else {
2278 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2280 ctxt->sax->error(ctxt->userData,
2281 "SystemLiteral \" or ' expected\n");
2282 ctxt->wellFormed = 0;
2283 ctxt->disableSAX = 1;
2284 return(NULL);
2285 }
2286
2287 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2288 if (buf == NULL) {
2289 xmlGenericError(xmlGenericErrorContext,
2290 "malloc of %d byte failed\n", size);
2291 return(NULL);
2292 }
2293 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2294 cur = CUR_CHAR(l);
2295 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2296 if (len + 5 >= size) {
2297 size *= 2;
2298 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2299 if (buf == NULL) {
2300 xmlGenericError(xmlGenericErrorContext,
2301 "realloc of %d byte failed\n", size);
2302 ctxt->instate = (xmlParserInputState) state;
2303 return(NULL);
2304 }
2305 }
2306 count++;
2307 if (count > 50) {
2308 GROW;
2309 count = 0;
2310 }
2311 COPY_BUF(l,buf,len,cur);
2312 NEXTL(l);
2313 cur = CUR_CHAR(l);
2314 if (cur == 0) {
2315 GROW;
2316 SHRINK;
2317 cur = CUR_CHAR(l);
2318 }
2319 }
2320 buf[len] = 0;
2321 ctxt->instate = (xmlParserInputState) state;
2322 if (!IS_CHAR(cur)) {
2323 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2325 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2326 ctxt->wellFormed = 0;
2327 ctxt->disableSAX = 1;
2328 } else {
2329 NEXT;
2330 }
2331 return(buf);
2332}
2333
2334/**
2335 * xmlParsePubidLiteral:
2336 * @ctxt: an XML parser context
2337 *
2338 * parse an XML public literal
2339 *
2340 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2341 *
2342 * Returns the PubidLiteral parsed or NULL.
2343 */
2344
2345xmlChar *
2346xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2347 xmlChar *buf = NULL;
2348 int len = 0;
2349 int size = XML_PARSER_BUFFER_SIZE;
2350 xmlChar cur;
2351 xmlChar stop;
2352 int count = 0;
2353
2354 SHRINK;
2355 if (RAW == '"') {
2356 NEXT;
2357 stop = '"';
2358 } else if (RAW == '\'') {
2359 NEXT;
2360 stop = '\'';
2361 } else {
2362 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2364 ctxt->sax->error(ctxt->userData,
2365 "SystemLiteral \" or ' expected\n");
2366 ctxt->wellFormed = 0;
2367 ctxt->disableSAX = 1;
2368 return(NULL);
2369 }
2370 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2371 if (buf == NULL) {
2372 xmlGenericError(xmlGenericErrorContext,
2373 "malloc of %d byte failed\n", size);
2374 return(NULL);
2375 }
2376 cur = CUR;
2377 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2378 if (len + 1 >= size) {
2379 size *= 2;
2380 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2381 if (buf == NULL) {
2382 xmlGenericError(xmlGenericErrorContext,
2383 "realloc of %d byte failed\n", size);
2384 return(NULL);
2385 }
2386 }
2387 buf[len++] = cur;
2388 count++;
2389 if (count > 50) {
2390 GROW;
2391 count = 0;
2392 }
2393 NEXT;
2394 cur = CUR;
2395 if (cur == 0) {
2396 GROW;
2397 SHRINK;
2398 cur = CUR;
2399 }
2400 }
2401 buf[len] = 0;
2402 if (cur != stop) {
2403 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2405 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2406 ctxt->wellFormed = 0;
2407 ctxt->disableSAX = 1;
2408 } else {
2409 NEXT;
2410 }
2411 return(buf);
2412}
2413
Daniel Veillard48b2f892001-02-25 16:11:03 +00002414void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002415/**
2416 * xmlParseCharData:
2417 * @ctxt: an XML parser context
2418 * @cdata: int indicating whether we are within a CDATA section
2419 *
2420 * parse a CharData section.
2421 * if we are within a CDATA section ']]>' marks an end of section.
2422 *
2423 * The right angle bracket (>) may be represented using the string "&gt;",
2424 * and must, for compatibility, be escaped using "&gt;" or a character
2425 * reference when it appears in the string "]]>" in content, when that
2426 * string is not marking the end of a CDATA section.
2427 *
2428 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2429 */
2430
2431void
2432xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002433 const xmlChar *in;
2434 int nbchar = 0;
2435
2436 SHRINK;
2437 GROW;
2438 /*
2439 * Accelerated common case where input don't need to be
2440 * modified before passing it to the handler.
2441 */
2442 if ((ctxt->token == 0) && (!cdata)) {
2443 in = ctxt->input->cur;
2444 do {
2445 while (((*in >= 0x20) && (*in != '<') &&
2446 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2447 in++;
2448 if (*in == 0xA) {
2449 ctxt->input->line++;
2450 continue; /* while */
2451 }
2452 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002453 if (nbchar > 0) {
2454 if (IS_BLANK(*ctxt->input->cur) &&
2455 areBlanks(ctxt, ctxt->input->cur, nbchar)) {
2456 if (ctxt->sax->ignorableWhitespace != NULL)
2457 ctxt->sax->ignorableWhitespace(ctxt->userData,
2458 ctxt->input->cur, nbchar);
2459 } else {
2460 if (ctxt->sax->characters != NULL)
2461 ctxt->sax->characters(ctxt->userData,
2462 ctxt->input->cur, nbchar);
2463 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002464 }
2465 ctxt->input->cur = in;
2466 if (*in == 0xD) {
2467 in++;
2468 if (*in == 0xA) {
2469 ctxt->input->cur = in;
2470 in++;
2471 ctxt->input->line++;
2472 continue; /* while */
2473 }
2474 in--;
2475 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002476 if (*in == '<') {
2477 return;
2478 }
2479 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002480 return;
2481 }
2482 SHRINK;
2483 GROW;
2484 in = ctxt->input->cur;
2485 } while ((*in >= 0x20) && (*in <= 0x7F));
2486 nbchar = 0;
2487 }
2488 xmlParseCharDataComplex(ctxt, cdata);
2489}
2490
2491void
2492xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002493 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2494 int nbchar = 0;
2495 int cur, l;
2496 int count = 0;
2497
2498 SHRINK;
2499 GROW;
2500 cur = CUR_CHAR(l);
2501 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2502 ((cur != '&') || (ctxt->token == '&')) &&
2503 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2504 if ((cur == ']') && (NXT(1) == ']') &&
2505 (NXT(2) == '>')) {
2506 if (cdata) break;
2507 else {
2508 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2510 ctxt->sax->error(ctxt->userData,
2511 "Sequence ']]>' not allowed in content\n");
2512 /* Should this be relaxed ??? I see a "must here */
2513 ctxt->wellFormed = 0;
2514 ctxt->disableSAX = 1;
2515 }
2516 }
2517 COPY_BUF(l,buf,nbchar,cur);
2518 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2519 /*
2520 * Ok the segment is to be consumed as chars.
2521 */
2522 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2523 if (areBlanks(ctxt, buf, nbchar)) {
2524 if (ctxt->sax->ignorableWhitespace != NULL)
2525 ctxt->sax->ignorableWhitespace(ctxt->userData,
2526 buf, nbchar);
2527 } else {
2528 if (ctxt->sax->characters != NULL)
2529 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2530 }
2531 }
2532 nbchar = 0;
2533 }
2534 count++;
2535 if (count > 50) {
2536 GROW;
2537 count = 0;
2538 }
2539 NEXTL(l);
2540 cur = CUR_CHAR(l);
2541 }
2542 if (nbchar != 0) {
2543 /*
2544 * Ok the segment is to be consumed as chars.
2545 */
2546 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2547 if (areBlanks(ctxt, buf, nbchar)) {
2548 if (ctxt->sax->ignorableWhitespace != NULL)
2549 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2550 } else {
2551 if (ctxt->sax->characters != NULL)
2552 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2553 }
2554 }
2555 }
2556}
2557
2558/**
2559 * xmlParseExternalID:
2560 * @ctxt: an XML parser context
2561 * @publicID: a xmlChar** receiving PubidLiteral
2562 * @strict: indicate whether we should restrict parsing to only
2563 * production [75], see NOTE below
2564 *
2565 * Parse an External ID or a Public ID
2566 *
2567 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2568 * 'PUBLIC' S PubidLiteral S SystemLiteral
2569 *
2570 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2571 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2572 *
2573 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2574 *
2575 * Returns the function returns SystemLiteral and in the second
2576 * case publicID receives PubidLiteral, is strict is off
2577 * it is possible to return NULL and have publicID set.
2578 */
2579
2580xmlChar *
2581xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2582 xmlChar *URI = NULL;
2583
2584 SHRINK;
2585 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2586 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2587 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2588 SKIP(6);
2589 if (!IS_BLANK(CUR)) {
2590 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2592 ctxt->sax->error(ctxt->userData,
2593 "Space required after 'SYSTEM'\n");
2594 ctxt->wellFormed = 0;
2595 ctxt->disableSAX = 1;
2596 }
2597 SKIP_BLANKS;
2598 URI = xmlParseSystemLiteral(ctxt);
2599 if (URI == NULL) {
2600 ctxt->errNo = XML_ERR_URI_REQUIRED;
2601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2602 ctxt->sax->error(ctxt->userData,
2603 "xmlParseExternalID: SYSTEM, no URI\n");
2604 ctxt->wellFormed = 0;
2605 ctxt->disableSAX = 1;
2606 }
2607 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2608 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2609 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2610 SKIP(6);
2611 if (!IS_BLANK(CUR)) {
2612 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2614 ctxt->sax->error(ctxt->userData,
2615 "Space required after 'PUBLIC'\n");
2616 ctxt->wellFormed = 0;
2617 ctxt->disableSAX = 1;
2618 }
2619 SKIP_BLANKS;
2620 *publicID = xmlParsePubidLiteral(ctxt);
2621 if (*publicID == NULL) {
2622 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2624 ctxt->sax->error(ctxt->userData,
2625 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2626 ctxt->wellFormed = 0;
2627 ctxt->disableSAX = 1;
2628 }
2629 if (strict) {
2630 /*
2631 * We don't handle [83] so "S SystemLiteral" is required.
2632 */
2633 if (!IS_BLANK(CUR)) {
2634 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2636 ctxt->sax->error(ctxt->userData,
2637 "Space required after the Public Identifier\n");
2638 ctxt->wellFormed = 0;
2639 ctxt->disableSAX = 1;
2640 }
2641 } else {
2642 /*
2643 * We handle [83] so we return immediately, if
2644 * "S SystemLiteral" is not detected. From a purely parsing
2645 * point of view that's a nice mess.
2646 */
2647 const xmlChar *ptr;
2648 GROW;
2649
2650 ptr = CUR_PTR;
2651 if (!IS_BLANK(*ptr)) return(NULL);
2652
2653 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2654 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2655 }
2656 SKIP_BLANKS;
2657 URI = xmlParseSystemLiteral(ctxt);
2658 if (URI == NULL) {
2659 ctxt->errNo = XML_ERR_URI_REQUIRED;
2660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2661 ctxt->sax->error(ctxt->userData,
2662 "xmlParseExternalID: PUBLIC, no URI\n");
2663 ctxt->wellFormed = 0;
2664 ctxt->disableSAX = 1;
2665 }
2666 }
2667 return(URI);
2668}
2669
2670/**
2671 * xmlParseComment:
2672 * @ctxt: an XML parser context
2673 *
2674 * Skip an XML (SGML) comment <!-- .... -->
2675 * The spec says that "For compatibility, the string "--" (double-hyphen)
2676 * must not occur within comments. "
2677 *
2678 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2679 */
2680void
2681xmlParseComment(xmlParserCtxtPtr ctxt) {
2682 xmlChar *buf = NULL;
2683 int len;
2684 int size = XML_PARSER_BUFFER_SIZE;
2685 int q, ql;
2686 int r, rl;
2687 int cur, l;
2688 xmlParserInputState state;
2689 xmlParserInputPtr input = ctxt->input;
2690 int count = 0;
2691
2692 /*
2693 * Check that there is a comment right here.
2694 */
2695 if ((RAW != '<') || (NXT(1) != '!') ||
2696 (NXT(2) != '-') || (NXT(3) != '-')) return;
2697
2698 state = ctxt->instate;
2699 ctxt->instate = XML_PARSER_COMMENT;
2700 SHRINK;
2701 SKIP(4);
2702 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2703 if (buf == NULL) {
2704 xmlGenericError(xmlGenericErrorContext,
2705 "malloc of %d byte failed\n", size);
2706 ctxt->instate = state;
2707 return;
2708 }
2709 q = CUR_CHAR(ql);
2710 NEXTL(ql);
2711 r = CUR_CHAR(rl);
2712 NEXTL(rl);
2713 cur = CUR_CHAR(l);
2714 len = 0;
2715 while (IS_CHAR(cur) && /* checked */
2716 ((cur != '>') ||
2717 (r != '-') || (q != '-'))) {
2718 if ((r == '-') && (q == '-') && (len > 1)) {
2719 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2721 ctxt->sax->error(ctxt->userData,
2722 "Comment must not contain '--' (double-hyphen)`\n");
2723 ctxt->wellFormed = 0;
2724 ctxt->disableSAX = 1;
2725 }
2726 if (len + 5 >= size) {
2727 size *= 2;
2728 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2729 if (buf == NULL) {
2730 xmlGenericError(xmlGenericErrorContext,
2731 "realloc of %d byte failed\n", size);
2732 ctxt->instate = state;
2733 return;
2734 }
2735 }
2736 COPY_BUF(ql,buf,len,q);
2737 q = r;
2738 ql = rl;
2739 r = cur;
2740 rl = l;
2741
2742 count++;
2743 if (count > 50) {
2744 GROW;
2745 count = 0;
2746 }
2747 NEXTL(l);
2748 cur = CUR_CHAR(l);
2749 if (cur == 0) {
2750 SHRINK;
2751 GROW;
2752 cur = CUR_CHAR(l);
2753 }
2754 }
2755 buf[len] = 0;
2756 if (!IS_CHAR(cur)) {
2757 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2759 ctxt->sax->error(ctxt->userData,
2760 "Comment not terminated \n<!--%.50s\n", buf);
2761 ctxt->wellFormed = 0;
2762 ctxt->disableSAX = 1;
2763 xmlFree(buf);
2764 } else {
2765 if (input != ctxt->input) {
2766 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2767 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2768 ctxt->sax->error(ctxt->userData,
2769"Comment doesn't start and stop in the same entity\n");
2770 ctxt->wellFormed = 0;
2771 ctxt->disableSAX = 1;
2772 }
2773 NEXT;
2774 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2775 (!ctxt->disableSAX))
2776 ctxt->sax->comment(ctxt->userData, buf);
2777 xmlFree(buf);
2778 }
2779 ctxt->instate = state;
2780}
2781
2782/**
2783 * xmlParsePITarget:
2784 * @ctxt: an XML parser context
2785 *
2786 * parse the name of a PI
2787 *
2788 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2789 *
2790 * Returns the PITarget name or NULL
2791 */
2792
2793xmlChar *
2794xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2795 xmlChar *name;
2796
2797 name = xmlParseName(ctxt);
2798 if ((name != NULL) &&
2799 ((name[0] == 'x') || (name[0] == 'X')) &&
2800 ((name[1] == 'm') || (name[1] == 'M')) &&
2801 ((name[2] == 'l') || (name[2] == 'L'))) {
2802 int i;
2803 if ((name[0] == 'x') && (name[1] == 'm') &&
2804 (name[2] == 'l') && (name[3] == 0)) {
2805 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2807 ctxt->sax->error(ctxt->userData,
2808 "XML declaration allowed only at the start of the document\n");
2809 ctxt->wellFormed = 0;
2810 ctxt->disableSAX = 1;
2811 return(name);
2812 } else if (name[3] == 0) {
2813 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2815 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2816 ctxt->wellFormed = 0;
2817 ctxt->disableSAX = 1;
2818 return(name);
2819 }
2820 for (i = 0;;i++) {
2821 if (xmlW3CPIs[i] == NULL) break;
2822 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2823 return(name);
2824 }
2825 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2826 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2827 ctxt->sax->warning(ctxt->userData,
2828 "xmlParsePItarget: invalid name prefix 'xml'\n");
2829 }
2830 }
2831 return(name);
2832}
2833
2834/**
2835 * xmlParsePI:
2836 * @ctxt: an XML parser context
2837 *
2838 * parse an XML Processing Instruction.
2839 *
2840 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2841 *
2842 * The processing is transfered to SAX once parsed.
2843 */
2844
2845void
2846xmlParsePI(xmlParserCtxtPtr ctxt) {
2847 xmlChar *buf = NULL;
2848 int len = 0;
2849 int size = XML_PARSER_BUFFER_SIZE;
2850 int cur, l;
2851 xmlChar *target;
2852 xmlParserInputState state;
2853 int count = 0;
2854
2855 if ((RAW == '<') && (NXT(1) == '?')) {
2856 xmlParserInputPtr input = ctxt->input;
2857 state = ctxt->instate;
2858 ctxt->instate = XML_PARSER_PI;
2859 /*
2860 * this is a Processing Instruction.
2861 */
2862 SKIP(2);
2863 SHRINK;
2864
2865 /*
2866 * Parse the target name and check for special support like
2867 * namespace.
2868 */
2869 target = xmlParsePITarget(ctxt);
2870 if (target != NULL) {
2871 if ((RAW == '?') && (NXT(1) == '>')) {
2872 if (input != ctxt->input) {
2873 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2875 ctxt->sax->error(ctxt->userData,
2876 "PI declaration doesn't start and stop in the same entity\n");
2877 ctxt->wellFormed = 0;
2878 ctxt->disableSAX = 1;
2879 }
2880 SKIP(2);
2881
2882 /*
2883 * SAX: PI detected.
2884 */
2885 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2886 (ctxt->sax->processingInstruction != NULL))
2887 ctxt->sax->processingInstruction(ctxt->userData,
2888 target, NULL);
2889 ctxt->instate = state;
2890 xmlFree(target);
2891 return;
2892 }
2893 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2894 if (buf == NULL) {
2895 xmlGenericError(xmlGenericErrorContext,
2896 "malloc of %d byte failed\n", size);
2897 ctxt->instate = state;
2898 return;
2899 }
2900 cur = CUR;
2901 if (!IS_BLANK(cur)) {
2902 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2904 ctxt->sax->error(ctxt->userData,
2905 "xmlParsePI: PI %s space expected\n", target);
2906 ctxt->wellFormed = 0;
2907 ctxt->disableSAX = 1;
2908 }
2909 SKIP_BLANKS;
2910 cur = CUR_CHAR(l);
2911 while (IS_CHAR(cur) && /* checked */
2912 ((cur != '?') || (NXT(1) != '>'))) {
2913 if (len + 5 >= size) {
2914 size *= 2;
2915 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2916 if (buf == NULL) {
2917 xmlGenericError(xmlGenericErrorContext,
2918 "realloc of %d byte failed\n", size);
2919 ctxt->instate = state;
2920 return;
2921 }
2922 }
2923 count++;
2924 if (count > 50) {
2925 GROW;
2926 count = 0;
2927 }
2928 COPY_BUF(l,buf,len,cur);
2929 NEXTL(l);
2930 cur = CUR_CHAR(l);
2931 if (cur == 0) {
2932 SHRINK;
2933 GROW;
2934 cur = CUR_CHAR(l);
2935 }
2936 }
2937 buf[len] = 0;
2938 if (cur != '?') {
2939 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
2940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2941 ctxt->sax->error(ctxt->userData,
2942 "xmlParsePI: PI %s never end ...\n", target);
2943 ctxt->wellFormed = 0;
2944 ctxt->disableSAX = 1;
2945 } else {
2946 if (input != ctxt->input) {
2947 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2949 ctxt->sax->error(ctxt->userData,
2950 "PI declaration doesn't start and stop in the same entity\n");
2951 ctxt->wellFormed = 0;
2952 ctxt->disableSAX = 1;
2953 }
2954 SKIP(2);
2955
2956 /*
2957 * SAX: PI detected.
2958 */
2959 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2960 (ctxt->sax->processingInstruction != NULL))
2961 ctxt->sax->processingInstruction(ctxt->userData,
2962 target, buf);
2963 }
2964 xmlFree(buf);
2965 xmlFree(target);
2966 } else {
2967 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
2968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2969 ctxt->sax->error(ctxt->userData,
2970 "xmlParsePI : no target name\n");
2971 ctxt->wellFormed = 0;
2972 ctxt->disableSAX = 1;
2973 }
2974 ctxt->instate = state;
2975 }
2976}
2977
2978/**
2979 * xmlParseNotationDecl:
2980 * @ctxt: an XML parser context
2981 *
2982 * parse a notation declaration
2983 *
2984 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2985 *
2986 * Hence there is actually 3 choices:
2987 * 'PUBLIC' S PubidLiteral
2988 * 'PUBLIC' S PubidLiteral S SystemLiteral
2989 * and 'SYSTEM' S SystemLiteral
2990 *
2991 * See the NOTE on xmlParseExternalID().
2992 */
2993
2994void
2995xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
2996 xmlChar *name;
2997 xmlChar *Pubid;
2998 xmlChar *Systemid;
2999
3000 if ((RAW == '<') && (NXT(1) == '!') &&
3001 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3002 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3003 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3004 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3005 xmlParserInputPtr input = ctxt->input;
3006 SHRINK;
3007 SKIP(10);
3008 if (!IS_BLANK(CUR)) {
3009 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3011 ctxt->sax->error(ctxt->userData,
3012 "Space required after '<!NOTATION'\n");
3013 ctxt->wellFormed = 0;
3014 ctxt->disableSAX = 1;
3015 return;
3016 }
3017 SKIP_BLANKS;
3018
Daniel Veillard29631a82001-03-05 09:49:20 +00003019 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003020 if (name == NULL) {
3021 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3023 ctxt->sax->error(ctxt->userData,
3024 "NOTATION: Name expected here\n");
3025 ctxt->wellFormed = 0;
3026 ctxt->disableSAX = 1;
3027 return;
3028 }
3029 if (!IS_BLANK(CUR)) {
3030 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3031 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3032 ctxt->sax->error(ctxt->userData,
3033 "Space required after the NOTATION name'\n");
3034 ctxt->wellFormed = 0;
3035 ctxt->disableSAX = 1;
3036 return;
3037 }
3038 SKIP_BLANKS;
3039
3040 /*
3041 * Parse the IDs.
3042 */
3043 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3044 SKIP_BLANKS;
3045
3046 if (RAW == '>') {
3047 if (input != ctxt->input) {
3048 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3050 ctxt->sax->error(ctxt->userData,
3051"Notation declaration doesn't start and stop in the same entity\n");
3052 ctxt->wellFormed = 0;
3053 ctxt->disableSAX = 1;
3054 }
3055 NEXT;
3056 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3057 (ctxt->sax->notationDecl != NULL))
3058 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3059 } else {
3060 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3062 ctxt->sax->error(ctxt->userData,
3063 "'>' required to close NOTATION declaration\n");
3064 ctxt->wellFormed = 0;
3065 ctxt->disableSAX = 1;
3066 }
3067 xmlFree(name);
3068 if (Systemid != NULL) xmlFree(Systemid);
3069 if (Pubid != NULL) xmlFree(Pubid);
3070 }
3071}
3072
3073/**
3074 * xmlParseEntityDecl:
3075 * @ctxt: an XML parser context
3076 *
3077 * parse <!ENTITY declarations
3078 *
3079 * [70] EntityDecl ::= GEDecl | PEDecl
3080 *
3081 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3082 *
3083 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3084 *
3085 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3086 *
3087 * [74] PEDef ::= EntityValue | ExternalID
3088 *
3089 * [76] NDataDecl ::= S 'NDATA' S Name
3090 *
3091 * [ VC: Notation Declared ]
3092 * The Name must match the declared name of a notation.
3093 */
3094
3095void
3096xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3097 xmlChar *name = NULL;
3098 xmlChar *value = NULL;
3099 xmlChar *URI = NULL, *literal = NULL;
3100 xmlChar *ndata = NULL;
3101 int isParameter = 0;
3102 xmlChar *orig = NULL;
3103
3104 GROW;
3105 if ((RAW == '<') && (NXT(1) == '!') &&
3106 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3107 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3108 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3109 xmlParserInputPtr input = ctxt->input;
3110 ctxt->instate = XML_PARSER_ENTITY_DECL;
3111 SHRINK;
3112 SKIP(8);
3113 if (!IS_BLANK(CUR)) {
3114 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3115 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3116 ctxt->sax->error(ctxt->userData,
3117 "Space required after '<!ENTITY'\n");
3118 ctxt->wellFormed = 0;
3119 ctxt->disableSAX = 1;
3120 }
3121 SKIP_BLANKS;
3122
3123 if (RAW == '%') {
3124 NEXT;
3125 if (!IS_BLANK(CUR)) {
3126 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3128 ctxt->sax->error(ctxt->userData,
3129 "Space required after '%'\n");
3130 ctxt->wellFormed = 0;
3131 ctxt->disableSAX = 1;
3132 }
3133 SKIP_BLANKS;
3134 isParameter = 1;
3135 }
3136
Daniel Veillard29631a82001-03-05 09:49:20 +00003137 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003138 if (name == NULL) {
3139 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3141 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3142 ctxt->wellFormed = 0;
3143 ctxt->disableSAX = 1;
3144 return;
3145 }
3146 if (!IS_BLANK(CUR)) {
3147 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3149 ctxt->sax->error(ctxt->userData,
3150 "Space required after the entity name\n");
3151 ctxt->wellFormed = 0;
3152 ctxt->disableSAX = 1;
3153 }
3154 SKIP_BLANKS;
3155
3156 /*
3157 * handle the various case of definitions...
3158 */
3159 if (isParameter) {
3160 if ((RAW == '"') || (RAW == '\'')) {
3161 value = xmlParseEntityValue(ctxt, &orig);
3162 if (value) {
3163 if ((ctxt->sax != NULL) &&
3164 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3165 ctxt->sax->entityDecl(ctxt->userData, name,
3166 XML_INTERNAL_PARAMETER_ENTITY,
3167 NULL, NULL, value);
3168 }
3169 } else {
3170 URI = xmlParseExternalID(ctxt, &literal, 1);
3171 if ((URI == NULL) && (literal == NULL)) {
3172 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3173 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3174 ctxt->sax->error(ctxt->userData,
3175 "Entity value required\n");
3176 ctxt->wellFormed = 0;
3177 ctxt->disableSAX = 1;
3178 }
3179 if (URI) {
3180 xmlURIPtr uri;
3181
3182 uri = xmlParseURI((const char *) URI);
3183 if (uri == NULL) {
3184 ctxt->errNo = XML_ERR_INVALID_URI;
3185 if ((ctxt->sax != NULL) &&
3186 (!ctxt->disableSAX) &&
3187 (ctxt->sax->error != NULL))
3188 ctxt->sax->error(ctxt->userData,
3189 "Invalid URI: %s\n", URI);
3190 ctxt->wellFormed = 0;
3191 } else {
3192 if (uri->fragment != NULL) {
3193 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3194 if ((ctxt->sax != NULL) &&
3195 (!ctxt->disableSAX) &&
3196 (ctxt->sax->error != NULL))
3197 ctxt->sax->error(ctxt->userData,
3198 "Fragment not allowed: %s\n", URI);
3199 ctxt->wellFormed = 0;
3200 } else {
3201 if ((ctxt->sax != NULL) &&
3202 (!ctxt->disableSAX) &&
3203 (ctxt->sax->entityDecl != NULL))
3204 ctxt->sax->entityDecl(ctxt->userData, name,
3205 XML_EXTERNAL_PARAMETER_ENTITY,
3206 literal, URI, NULL);
3207 }
3208 xmlFreeURI(uri);
3209 }
3210 }
3211 }
3212 } else {
3213 if ((RAW == '"') || (RAW == '\'')) {
3214 value = xmlParseEntityValue(ctxt, &orig);
3215 if ((ctxt->sax != NULL) &&
3216 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3217 ctxt->sax->entityDecl(ctxt->userData, name,
3218 XML_INTERNAL_GENERAL_ENTITY,
3219 NULL, NULL, value);
3220 } else {
3221 URI = xmlParseExternalID(ctxt, &literal, 1);
3222 if ((URI == NULL) && (literal == NULL)) {
3223 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3225 ctxt->sax->error(ctxt->userData,
3226 "Entity value required\n");
3227 ctxt->wellFormed = 0;
3228 ctxt->disableSAX = 1;
3229 }
3230 if (URI) {
3231 xmlURIPtr uri;
3232
3233 uri = xmlParseURI((const char *)URI);
3234 if (uri == NULL) {
3235 ctxt->errNo = XML_ERR_INVALID_URI;
3236 if ((ctxt->sax != NULL) &&
3237 (!ctxt->disableSAX) &&
3238 (ctxt->sax->error != NULL))
3239 ctxt->sax->error(ctxt->userData,
3240 "Invalid URI: %s\n", URI);
3241 ctxt->wellFormed = 0;
3242 } else {
3243 if (uri->fragment != NULL) {
3244 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3245 if ((ctxt->sax != NULL) &&
3246 (!ctxt->disableSAX) &&
3247 (ctxt->sax->error != NULL))
3248 ctxt->sax->error(ctxt->userData,
3249 "Fragment not allowed: %s\n", URI);
3250 ctxt->wellFormed = 0;
3251 }
3252 xmlFreeURI(uri);
3253 }
3254 }
3255 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3256 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3257 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3258 ctxt->sax->error(ctxt->userData,
3259 "Space required before 'NDATA'\n");
3260 ctxt->wellFormed = 0;
3261 ctxt->disableSAX = 1;
3262 }
3263 SKIP_BLANKS;
3264 if ((RAW == 'N') && (NXT(1) == 'D') &&
3265 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3266 (NXT(4) == 'A')) {
3267 SKIP(5);
3268 if (!IS_BLANK(CUR)) {
3269 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3271 ctxt->sax->error(ctxt->userData,
3272 "Space required after 'NDATA'\n");
3273 ctxt->wellFormed = 0;
3274 ctxt->disableSAX = 1;
3275 }
3276 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003277 ndata = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003278 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3279 (ctxt->sax->unparsedEntityDecl != NULL))
3280 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3281 literal, URI, ndata);
3282 } else {
3283 if ((ctxt->sax != NULL) &&
3284 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3285 ctxt->sax->entityDecl(ctxt->userData, name,
3286 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3287 literal, URI, NULL);
3288 }
3289 }
3290 }
3291 SKIP_BLANKS;
3292 if (RAW != '>') {
3293 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3294 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3295 ctxt->sax->error(ctxt->userData,
3296 "xmlParseEntityDecl: entity %s not terminated\n", name);
3297 ctxt->wellFormed = 0;
3298 ctxt->disableSAX = 1;
3299 } else {
3300 if (input != ctxt->input) {
3301 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3302 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3303 ctxt->sax->error(ctxt->userData,
3304"Entity declaration doesn't start and stop in the same entity\n");
3305 ctxt->wellFormed = 0;
3306 ctxt->disableSAX = 1;
3307 }
3308 NEXT;
3309 }
3310 if (orig != NULL) {
3311 /*
3312 * Ugly mechanism to save the raw entity value.
3313 */
3314 xmlEntityPtr cur = NULL;
3315
3316 if (isParameter) {
3317 if ((ctxt->sax != NULL) &&
3318 (ctxt->sax->getParameterEntity != NULL))
3319 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3320 } else {
3321 if ((ctxt->sax != NULL) &&
3322 (ctxt->sax->getEntity != NULL))
3323 cur = ctxt->sax->getEntity(ctxt->userData, name);
3324 }
3325 if (cur != NULL) {
3326 if (cur->orig != NULL)
3327 xmlFree(orig);
3328 else
3329 cur->orig = orig;
3330 } else
3331 xmlFree(orig);
3332 }
3333 if (name != NULL) xmlFree(name);
3334 if (value != NULL) xmlFree(value);
3335 if (URI != NULL) xmlFree(URI);
3336 if (literal != NULL) xmlFree(literal);
3337 if (ndata != NULL) xmlFree(ndata);
3338 }
3339}
3340
3341/**
3342 * xmlParseDefaultDecl:
3343 * @ctxt: an XML parser context
3344 * @value: Receive a possible fixed default value for the attribute
3345 *
3346 * Parse an attribute default declaration
3347 *
3348 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3349 *
3350 * [ VC: Required Attribute ]
3351 * if the default declaration is the keyword #REQUIRED, then the
3352 * attribute must be specified for all elements of the type in the
3353 * attribute-list declaration.
3354 *
3355 * [ VC: Attribute Default Legal ]
3356 * The declared default value must meet the lexical constraints of
3357 * the declared attribute type c.f. xmlValidateAttributeDecl()
3358 *
3359 * [ VC: Fixed Attribute Default ]
3360 * if an attribute has a default value declared with the #FIXED
3361 * keyword, instances of that attribute must match the default value.
3362 *
3363 * [ WFC: No < in Attribute Values ]
3364 * handled in xmlParseAttValue()
3365 *
3366 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3367 * or XML_ATTRIBUTE_FIXED.
3368 */
3369
3370int
3371xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3372 int val;
3373 xmlChar *ret;
3374
3375 *value = NULL;
3376 if ((RAW == '#') && (NXT(1) == 'R') &&
3377 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3378 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3379 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3380 (NXT(8) == 'D')) {
3381 SKIP(9);
3382 return(XML_ATTRIBUTE_REQUIRED);
3383 }
3384 if ((RAW == '#') && (NXT(1) == 'I') &&
3385 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3386 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3387 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3388 SKIP(8);
3389 return(XML_ATTRIBUTE_IMPLIED);
3390 }
3391 val = XML_ATTRIBUTE_NONE;
3392 if ((RAW == '#') && (NXT(1) == 'F') &&
3393 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3394 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3395 SKIP(6);
3396 val = XML_ATTRIBUTE_FIXED;
3397 if (!IS_BLANK(CUR)) {
3398 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3399 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3400 ctxt->sax->error(ctxt->userData,
3401 "Space required after '#FIXED'\n");
3402 ctxt->wellFormed = 0;
3403 ctxt->disableSAX = 1;
3404 }
3405 SKIP_BLANKS;
3406 }
3407 ret = xmlParseAttValue(ctxt);
3408 ctxt->instate = XML_PARSER_DTD;
3409 if (ret == NULL) {
3410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3411 ctxt->sax->error(ctxt->userData,
3412 "Attribute default value declaration error\n");
3413 ctxt->wellFormed = 0;
3414 ctxt->disableSAX = 1;
3415 } else
3416 *value = ret;
3417 return(val);
3418}
3419
3420/**
3421 * xmlParseNotationType:
3422 * @ctxt: an XML parser context
3423 *
3424 * parse an Notation attribute type.
3425 *
3426 * Note: the leading 'NOTATION' S part has already being parsed...
3427 *
3428 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3429 *
3430 * [ VC: Notation Attributes ]
3431 * Values of this type must match one of the notation names included
3432 * in the declaration; all notation names in the declaration must be declared.
3433 *
3434 * Returns: the notation attribute tree built while parsing
3435 */
3436
3437xmlEnumerationPtr
3438xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3439 xmlChar *name;
3440 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3441
3442 if (RAW != '(') {
3443 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3445 ctxt->sax->error(ctxt->userData,
3446 "'(' required to start 'NOTATION'\n");
3447 ctxt->wellFormed = 0;
3448 ctxt->disableSAX = 1;
3449 return(NULL);
3450 }
3451 SHRINK;
3452 do {
3453 NEXT;
3454 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003455 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003456 if (name == NULL) {
3457 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3459 ctxt->sax->error(ctxt->userData,
3460 "Name expected in NOTATION declaration\n");
3461 ctxt->wellFormed = 0;
3462 ctxt->disableSAX = 1;
3463 return(ret);
3464 }
3465 cur = xmlCreateEnumeration(name);
3466 xmlFree(name);
3467 if (cur == NULL) return(ret);
3468 if (last == NULL) ret = last = cur;
3469 else {
3470 last->next = cur;
3471 last = cur;
3472 }
3473 SKIP_BLANKS;
3474 } while (RAW == '|');
3475 if (RAW != ')') {
3476 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3478 ctxt->sax->error(ctxt->userData,
3479 "')' required to finish NOTATION declaration\n");
3480 ctxt->wellFormed = 0;
3481 ctxt->disableSAX = 1;
3482 if ((last != NULL) && (last != ret))
3483 xmlFreeEnumeration(last);
3484 return(ret);
3485 }
3486 NEXT;
3487 return(ret);
3488}
3489
3490/**
3491 * xmlParseEnumerationType:
3492 * @ctxt: an XML parser context
3493 *
3494 * parse an Enumeration attribute type.
3495 *
3496 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3497 *
3498 * [ VC: Enumeration ]
3499 * Values of this type must match one of the Nmtoken tokens in
3500 * the declaration
3501 *
3502 * Returns: the enumeration attribute tree built while parsing
3503 */
3504
3505xmlEnumerationPtr
3506xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3507 xmlChar *name;
3508 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3509
3510 if (RAW != '(') {
3511 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3513 ctxt->sax->error(ctxt->userData,
3514 "'(' required to start ATTLIST enumeration\n");
3515 ctxt->wellFormed = 0;
3516 ctxt->disableSAX = 1;
3517 return(NULL);
3518 }
3519 SHRINK;
3520 do {
3521 NEXT;
3522 SKIP_BLANKS;
3523 name = xmlParseNmtoken(ctxt);
3524 if (name == NULL) {
3525 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3526 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3527 ctxt->sax->error(ctxt->userData,
3528 "NmToken expected in ATTLIST enumeration\n");
3529 ctxt->wellFormed = 0;
3530 ctxt->disableSAX = 1;
3531 return(ret);
3532 }
3533 cur = xmlCreateEnumeration(name);
3534 xmlFree(name);
3535 if (cur == NULL) return(ret);
3536 if (last == NULL) ret = last = cur;
3537 else {
3538 last->next = cur;
3539 last = cur;
3540 }
3541 SKIP_BLANKS;
3542 } while (RAW == '|');
3543 if (RAW != ')') {
3544 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3546 ctxt->sax->error(ctxt->userData,
3547 "')' required to finish ATTLIST enumeration\n");
3548 ctxt->wellFormed = 0;
3549 ctxt->disableSAX = 1;
3550 return(ret);
3551 }
3552 NEXT;
3553 return(ret);
3554}
3555
3556/**
3557 * xmlParseEnumeratedType:
3558 * @ctxt: an XML parser context
3559 * @tree: the enumeration tree built while parsing
3560 *
3561 * parse an Enumerated attribute type.
3562 *
3563 * [57] EnumeratedType ::= NotationType | Enumeration
3564 *
3565 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3566 *
3567 *
3568 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3569 */
3570
3571int
3572xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3573 if ((RAW == 'N') && (NXT(1) == 'O') &&
3574 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3575 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3576 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3577 SKIP(8);
3578 if (!IS_BLANK(CUR)) {
3579 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3581 ctxt->sax->error(ctxt->userData,
3582 "Space required after 'NOTATION'\n");
3583 ctxt->wellFormed = 0;
3584 ctxt->disableSAX = 1;
3585 return(0);
3586 }
3587 SKIP_BLANKS;
3588 *tree = xmlParseNotationType(ctxt);
3589 if (*tree == NULL) return(0);
3590 return(XML_ATTRIBUTE_NOTATION);
3591 }
3592 *tree = xmlParseEnumerationType(ctxt);
3593 if (*tree == NULL) return(0);
3594 return(XML_ATTRIBUTE_ENUMERATION);
3595}
3596
3597/**
3598 * xmlParseAttributeType:
3599 * @ctxt: an XML parser context
3600 * @tree: the enumeration tree built while parsing
3601 *
3602 * parse the Attribute list def for an element
3603 *
3604 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3605 *
3606 * [55] StringType ::= 'CDATA'
3607 *
3608 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3609 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3610 *
3611 * Validity constraints for attribute values syntax are checked in
3612 * xmlValidateAttributeValue()
3613 *
3614 * [ VC: ID ]
3615 * Values of type ID must match the Name production. A name must not
3616 * appear more than once in an XML document as a value of this type;
3617 * i.e., ID values must uniquely identify the elements which bear them.
3618 *
3619 * [ VC: One ID per Element Type ]
3620 * No element type may have more than one ID attribute specified.
3621 *
3622 * [ VC: ID Attribute Default ]
3623 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3624 *
3625 * [ VC: IDREF ]
3626 * Values of type IDREF must match the Name production, and values
3627 * of type IDREFS must match Names; each IDREF Name must match the value
3628 * of an ID attribute on some element in the XML document; i.e. IDREF
3629 * values must match the value of some ID attribute.
3630 *
3631 * [ VC: Entity Name ]
3632 * Values of type ENTITY must match the Name production, values
3633 * of type ENTITIES must match Names; each Entity Name must match the
3634 * name of an unparsed entity declared in the DTD.
3635 *
3636 * [ VC: Name Token ]
3637 * Values of type NMTOKEN must match the Nmtoken production; values
3638 * of type NMTOKENS must match Nmtokens.
3639 *
3640 * Returns the attribute type
3641 */
3642int
3643xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3644 SHRINK;
3645 if ((RAW == 'C') && (NXT(1) == 'D') &&
3646 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3647 (NXT(4) == 'A')) {
3648 SKIP(5);
3649 return(XML_ATTRIBUTE_CDATA);
3650 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3651 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3652 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3653 SKIP(6);
3654 return(XML_ATTRIBUTE_IDREFS);
3655 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3656 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3657 (NXT(4) == 'F')) {
3658 SKIP(5);
3659 return(XML_ATTRIBUTE_IDREF);
3660 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3661 SKIP(2);
3662 return(XML_ATTRIBUTE_ID);
3663 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3664 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3665 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3666 SKIP(6);
3667 return(XML_ATTRIBUTE_ENTITY);
3668 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3669 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3670 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3671 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3672 SKIP(8);
3673 return(XML_ATTRIBUTE_ENTITIES);
3674 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3675 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3676 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3677 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3678 SKIP(8);
3679 return(XML_ATTRIBUTE_NMTOKENS);
3680 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3681 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3682 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3683 (NXT(6) == 'N')) {
3684 SKIP(7);
3685 return(XML_ATTRIBUTE_NMTOKEN);
3686 }
3687 return(xmlParseEnumeratedType(ctxt, tree));
3688}
3689
3690/**
3691 * xmlParseAttributeListDecl:
3692 * @ctxt: an XML parser context
3693 *
3694 * : parse the Attribute list def for an element
3695 *
3696 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3697 *
3698 * [53] AttDef ::= S Name S AttType S DefaultDecl
3699 *
3700 */
3701void
3702xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3703 xmlChar *elemName;
3704 xmlChar *attrName;
3705 xmlEnumerationPtr tree;
3706
3707 if ((RAW == '<') && (NXT(1) == '!') &&
3708 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3709 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3710 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3711 (NXT(8) == 'T')) {
3712 xmlParserInputPtr input = ctxt->input;
3713
3714 SKIP(9);
3715 if (!IS_BLANK(CUR)) {
3716 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3717 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3718 ctxt->sax->error(ctxt->userData,
3719 "Space required after '<!ATTLIST'\n");
3720 ctxt->wellFormed = 0;
3721 ctxt->disableSAX = 1;
3722 }
3723 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003724 elemName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003725 if (elemName == NULL) {
3726 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3728 ctxt->sax->error(ctxt->userData,
3729 "ATTLIST: no name for Element\n");
3730 ctxt->wellFormed = 0;
3731 ctxt->disableSAX = 1;
3732 return;
3733 }
3734 SKIP_BLANKS;
3735 GROW;
3736 while (RAW != '>') {
3737 const xmlChar *check = CUR_PTR;
3738 int type;
3739 int def;
3740 xmlChar *defaultValue = NULL;
3741
3742 GROW;
3743 tree = NULL;
Daniel Veillard29631a82001-03-05 09:49:20 +00003744 attrName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003745 if (attrName == NULL) {
3746 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3748 ctxt->sax->error(ctxt->userData,
3749 "ATTLIST: no name for Attribute\n");
3750 ctxt->wellFormed = 0;
3751 ctxt->disableSAX = 1;
3752 break;
3753 }
3754 GROW;
3755 if (!IS_BLANK(CUR)) {
3756 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3758 ctxt->sax->error(ctxt->userData,
3759 "Space required after the attribute name\n");
3760 ctxt->wellFormed = 0;
3761 ctxt->disableSAX = 1;
3762 if (attrName != NULL)
3763 xmlFree(attrName);
3764 if (defaultValue != NULL)
3765 xmlFree(defaultValue);
3766 break;
3767 }
3768 SKIP_BLANKS;
3769
3770 type = xmlParseAttributeType(ctxt, &tree);
3771 if (type <= 0) {
3772 if (attrName != NULL)
3773 xmlFree(attrName);
3774 if (defaultValue != NULL)
3775 xmlFree(defaultValue);
3776 break;
3777 }
3778
3779 GROW;
3780 if (!IS_BLANK(CUR)) {
3781 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3783 ctxt->sax->error(ctxt->userData,
3784 "Space required after the attribute type\n");
3785 ctxt->wellFormed = 0;
3786 ctxt->disableSAX = 1;
3787 if (attrName != NULL)
3788 xmlFree(attrName);
3789 if (defaultValue != NULL)
3790 xmlFree(defaultValue);
3791 if (tree != NULL)
3792 xmlFreeEnumeration(tree);
3793 break;
3794 }
3795 SKIP_BLANKS;
3796
3797 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3798 if (def <= 0) {
3799 if (attrName != NULL)
3800 xmlFree(attrName);
3801 if (defaultValue != NULL)
3802 xmlFree(defaultValue);
3803 if (tree != NULL)
3804 xmlFreeEnumeration(tree);
3805 break;
3806 }
3807
3808 GROW;
3809 if (RAW != '>') {
3810 if (!IS_BLANK(CUR)) {
3811 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3813 ctxt->sax->error(ctxt->userData,
3814 "Space required after the attribute default value\n");
3815 ctxt->wellFormed = 0;
3816 ctxt->disableSAX = 1;
3817 if (attrName != NULL)
3818 xmlFree(attrName);
3819 if (defaultValue != NULL)
3820 xmlFree(defaultValue);
3821 if (tree != NULL)
3822 xmlFreeEnumeration(tree);
3823 break;
3824 }
3825 SKIP_BLANKS;
3826 }
3827 if (check == CUR_PTR) {
3828 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3829 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3830 ctxt->sax->error(ctxt->userData,
3831 "xmlParseAttributeListDecl: detected internal error\n");
3832 if (attrName != NULL)
3833 xmlFree(attrName);
3834 if (defaultValue != NULL)
3835 xmlFree(defaultValue);
3836 if (tree != NULL)
3837 xmlFreeEnumeration(tree);
3838 break;
3839 }
3840 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3841 (ctxt->sax->attributeDecl != NULL))
3842 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3843 type, def, defaultValue, tree);
3844 if (attrName != NULL)
3845 xmlFree(attrName);
3846 if (defaultValue != NULL)
3847 xmlFree(defaultValue);
3848 GROW;
3849 }
3850 if (RAW == '>') {
3851 if (input != ctxt->input) {
3852 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3854 ctxt->sax->error(ctxt->userData,
3855"Attribute list declaration doesn't start and stop in the same entity\n");
3856 ctxt->wellFormed = 0;
3857 ctxt->disableSAX = 1;
3858 }
3859 NEXT;
3860 }
3861
3862 xmlFree(elemName);
3863 }
3864}
3865
3866/**
3867 * xmlParseElementMixedContentDecl:
3868 * @ctxt: an XML parser context
3869 *
3870 * parse the declaration for a Mixed Element content
3871 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3872 *
3873 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3874 * '(' S? '#PCDATA' S? ')'
3875 *
3876 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3877 *
3878 * [ VC: No Duplicate Types ]
3879 * The same name must not appear more than once in a single
3880 * mixed-content declaration.
3881 *
3882 * returns: the list of the xmlElementContentPtr describing the element choices
3883 */
3884xmlElementContentPtr
3885xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3886 xmlElementContentPtr ret = NULL, cur = NULL, n;
3887 xmlChar *elem = NULL;
3888
3889 GROW;
3890 if ((RAW == '#') && (NXT(1) == 'P') &&
3891 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3892 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3893 (NXT(6) == 'A')) {
3894 SKIP(7);
3895 SKIP_BLANKS;
3896 SHRINK;
3897 if (RAW == ')') {
3898 ctxt->entity = ctxt->input;
3899 NEXT;
3900 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3901 if (RAW == '*') {
3902 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3903 NEXT;
3904 }
3905 return(ret);
3906 }
3907 if ((RAW == '(') || (RAW == '|')) {
3908 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3909 if (ret == NULL) return(NULL);
3910 }
3911 while (RAW == '|') {
3912 NEXT;
3913 if (elem == NULL) {
3914 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3915 if (ret == NULL) return(NULL);
3916 ret->c1 = cur;
3917 cur = ret;
3918 } else {
3919 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3920 if (n == NULL) return(NULL);
3921 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3922 cur->c2 = n;
3923 cur = n;
3924 xmlFree(elem);
3925 }
3926 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003927 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003928 if (elem == NULL) {
3929 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3931 ctxt->sax->error(ctxt->userData,
3932 "xmlParseElementMixedContentDecl : Name expected\n");
3933 ctxt->wellFormed = 0;
3934 ctxt->disableSAX = 1;
3935 xmlFreeElementContent(cur);
3936 return(NULL);
3937 }
3938 SKIP_BLANKS;
3939 GROW;
3940 }
3941 if ((RAW == ')') && (NXT(1) == '*')) {
3942 if (elem != NULL) {
3943 cur->c2 = xmlNewElementContent(elem,
3944 XML_ELEMENT_CONTENT_ELEMENT);
3945 xmlFree(elem);
3946 }
3947 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3948 ctxt->entity = ctxt->input;
3949 SKIP(2);
3950 } else {
3951 if (elem != NULL) xmlFree(elem);
3952 xmlFreeElementContent(ret);
3953 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
3954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3955 ctxt->sax->error(ctxt->userData,
3956 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
3957 ctxt->wellFormed = 0;
3958 ctxt->disableSAX = 1;
3959 return(NULL);
3960 }
3961
3962 } else {
3963 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
3964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3965 ctxt->sax->error(ctxt->userData,
3966 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3967 ctxt->wellFormed = 0;
3968 ctxt->disableSAX = 1;
3969 }
3970 return(ret);
3971}
3972
3973/**
3974 * xmlParseElementChildrenContentDecl:
3975 * @ctxt: an XML parser context
3976 *
3977 * parse the declaration for a Mixed Element content
3978 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3979 *
3980 *
3981 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3982 *
3983 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3984 *
3985 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3986 *
3987 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3988 *
3989 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3990 * TODO Parameter-entity replacement text must be properly nested
3991 * with parenthetized groups. That is to say, if either of the
3992 * opening or closing parentheses in a choice, seq, or Mixed
3993 * construct is contained in the replacement text for a parameter
3994 * entity, both must be contained in the same replacement text. For
3995 * interoperability, if a parameter-entity reference appears in a
3996 * choice, seq, or Mixed construct, its replacement text should not
3997 * be empty, and neither the first nor last non-blank character of
3998 * the replacement text should be a connector (| or ,).
3999 *
4000 * returns: the tree of xmlElementContentPtr describing the element
4001 * hierarchy.
4002 */
4003xmlElementContentPtr
4004#ifdef VMS
4005xmlParseElementChildrenContentD
4006#else
4007xmlParseElementChildrenContentDecl
4008#endif
4009(xmlParserCtxtPtr ctxt) {
4010 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4011 xmlChar *elem;
4012 xmlChar type = 0;
4013
4014 SKIP_BLANKS;
4015 GROW;
4016 if (RAW == '(') {
4017 /* Recurse on first child */
4018 NEXT;
4019 SKIP_BLANKS;
4020 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4021 SKIP_BLANKS;
4022 GROW;
4023 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004024 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004025 if (elem == NULL) {
4026 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4028 ctxt->sax->error(ctxt->userData,
4029 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4030 ctxt->wellFormed = 0;
4031 ctxt->disableSAX = 1;
4032 return(NULL);
4033 }
4034 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4035 GROW;
4036 if (RAW == '?') {
4037 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4038 NEXT;
4039 } else if (RAW == '*') {
4040 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4041 NEXT;
4042 } else if (RAW == '+') {
4043 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4044 NEXT;
4045 } else {
4046 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4047 }
4048 xmlFree(elem);
4049 GROW;
4050 }
4051 SKIP_BLANKS;
4052 SHRINK;
4053 while (RAW != ')') {
4054 /*
4055 * Each loop we parse one separator and one element.
4056 */
4057 if (RAW == ',') {
4058 if (type == 0) type = CUR;
4059
4060 /*
4061 * Detect "Name | Name , Name" error
4062 */
4063 else if (type != CUR) {
4064 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4066 ctxt->sax->error(ctxt->userData,
4067 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4068 type);
4069 ctxt->wellFormed = 0;
4070 ctxt->disableSAX = 1;
4071 if ((op != NULL) && (op != ret))
4072 xmlFreeElementContent(op);
4073 if ((last != NULL) && (last != ret) &&
4074 (last != ret->c1) && (last != ret->c2))
4075 xmlFreeElementContent(last);
4076 if (ret != NULL)
4077 xmlFreeElementContent(ret);
4078 return(NULL);
4079 }
4080 NEXT;
4081
4082 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4083 if (op == NULL) {
4084 xmlFreeElementContent(ret);
4085 return(NULL);
4086 }
4087 if (last == NULL) {
4088 op->c1 = ret;
4089 ret = cur = op;
4090 } else {
4091 cur->c2 = op;
4092 op->c1 = last;
4093 cur =op;
4094 last = NULL;
4095 }
4096 } else if (RAW == '|') {
4097 if (type == 0) type = CUR;
4098
4099 /*
4100 * Detect "Name , Name | Name" error
4101 */
4102 else if (type != CUR) {
4103 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4105 ctxt->sax->error(ctxt->userData,
4106 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4107 type);
4108 ctxt->wellFormed = 0;
4109 ctxt->disableSAX = 1;
4110 if ((op != NULL) && (op != ret) && (op != last))
4111 xmlFreeElementContent(op);
4112 if ((last != NULL) && (last != ret) &&
4113 (last != ret->c1) && (last != ret->c2))
4114 xmlFreeElementContent(last);
4115 if (ret != NULL)
4116 xmlFreeElementContent(ret);
4117 return(NULL);
4118 }
4119 NEXT;
4120
4121 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4122 if (op == NULL) {
4123 if ((op != NULL) && (op != ret))
4124 xmlFreeElementContent(op);
4125 if ((last != NULL) && (last != ret) &&
4126 (last != ret->c1) && (last != ret->c2))
4127 xmlFreeElementContent(last);
4128 if (ret != NULL)
4129 xmlFreeElementContent(ret);
4130 return(NULL);
4131 }
4132 if (last == NULL) {
4133 op->c1 = ret;
4134 ret = cur = op;
4135 } else {
4136 cur->c2 = op;
4137 op->c1 = last;
4138 cur =op;
4139 last = NULL;
4140 }
4141 } else {
4142 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4144 ctxt->sax->error(ctxt->userData,
4145 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4146 ctxt->wellFormed = 0;
4147 ctxt->disableSAX = 1;
4148 if ((op != NULL) && (op != ret))
4149 xmlFreeElementContent(op);
4150 if ((last != NULL) && (last != ret) &&
4151 (last != ret->c1) && (last != ret->c2))
4152 xmlFreeElementContent(last);
4153 if (ret != NULL)
4154 xmlFreeElementContent(ret);
4155 return(NULL);
4156 }
4157 GROW;
4158 SKIP_BLANKS;
4159 GROW;
4160 if (RAW == '(') {
4161 /* Recurse on second child */
4162 NEXT;
4163 SKIP_BLANKS;
4164 last = xmlParseElementChildrenContentDecl(ctxt);
4165 SKIP_BLANKS;
4166 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004167 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004168 if (elem == NULL) {
4169 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4171 ctxt->sax->error(ctxt->userData,
4172 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4173 ctxt->wellFormed = 0;
4174 ctxt->disableSAX = 1;
4175 if ((op != NULL) && (op != ret))
4176 xmlFreeElementContent(op);
4177 if ((last != NULL) && (last != ret) &&
4178 (last != ret->c1) && (last != ret->c2))
4179 xmlFreeElementContent(last);
4180 if (ret != NULL)
4181 xmlFreeElementContent(ret);
4182 return(NULL);
4183 }
4184 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4185 xmlFree(elem);
4186 if (RAW == '?') {
4187 last->ocur = XML_ELEMENT_CONTENT_OPT;
4188 NEXT;
4189 } else if (RAW == '*') {
4190 last->ocur = XML_ELEMENT_CONTENT_MULT;
4191 NEXT;
4192 } else if (RAW == '+') {
4193 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4194 NEXT;
4195 } else {
4196 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4197 }
4198 }
4199 SKIP_BLANKS;
4200 GROW;
4201 }
4202 if ((cur != NULL) && (last != NULL)) {
4203 cur->c2 = last;
4204 }
4205 ctxt->entity = ctxt->input;
4206 NEXT;
4207 if (RAW == '?') {
4208 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4209 NEXT;
4210 } else if (RAW == '*') {
4211 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4212 NEXT;
4213 } else if (RAW == '+') {
4214 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4215 NEXT;
4216 }
4217 return(ret);
4218}
4219
4220/**
4221 * xmlParseElementContentDecl:
4222 * @ctxt: an XML parser context
4223 * @name: the name of the element being defined.
4224 * @result: the Element Content pointer will be stored here if any
4225 *
4226 * parse the declaration for an Element content either Mixed or Children,
4227 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4228 *
4229 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4230 *
4231 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4232 */
4233
4234int
4235xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4236 xmlElementContentPtr *result) {
4237
4238 xmlElementContentPtr tree = NULL;
4239 xmlParserInputPtr input = ctxt->input;
4240 int res;
4241
4242 *result = NULL;
4243
4244 if (RAW != '(') {
4245 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4247 ctxt->sax->error(ctxt->userData,
4248 "xmlParseElementContentDecl : '(' expected\n");
4249 ctxt->wellFormed = 0;
4250 ctxt->disableSAX = 1;
4251 return(-1);
4252 }
4253 NEXT;
4254 GROW;
4255 SKIP_BLANKS;
4256 if ((RAW == '#') && (NXT(1) == 'P') &&
4257 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4258 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4259 (NXT(6) == 'A')) {
4260 tree = xmlParseElementMixedContentDecl(ctxt);
4261 res = XML_ELEMENT_TYPE_MIXED;
4262 } else {
4263 tree = xmlParseElementChildrenContentDecl(ctxt);
4264 res = XML_ELEMENT_TYPE_ELEMENT;
4265 }
4266 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4267 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4269 ctxt->sax->error(ctxt->userData,
4270"Element content declaration doesn't start and stop in the same entity\n");
4271 ctxt->wellFormed = 0;
4272 ctxt->disableSAX = 1;
4273 }
4274 SKIP_BLANKS;
4275 *result = tree;
4276 return(res);
4277}
4278
4279/**
4280 * xmlParseElementDecl:
4281 * @ctxt: an XML parser context
4282 *
4283 * parse an Element declaration.
4284 *
4285 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4286 *
4287 * [ VC: Unique Element Type Declaration ]
4288 * No element type may be declared more than once
4289 *
4290 * Returns the type of the element, or -1 in case of error
4291 */
4292int
4293xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4294 xmlChar *name;
4295 int ret = -1;
4296 xmlElementContentPtr content = NULL;
4297
4298 GROW;
4299 if ((RAW == '<') && (NXT(1) == '!') &&
4300 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4301 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4302 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4303 (NXT(8) == 'T')) {
4304 xmlParserInputPtr input = ctxt->input;
4305
4306 SKIP(9);
4307 if (!IS_BLANK(CUR)) {
4308 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4310 ctxt->sax->error(ctxt->userData,
4311 "Space required after 'ELEMENT'\n");
4312 ctxt->wellFormed = 0;
4313 ctxt->disableSAX = 1;
4314 }
4315 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00004316 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004317 if (name == NULL) {
4318 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4320 ctxt->sax->error(ctxt->userData,
4321 "xmlParseElementDecl: no name for Element\n");
4322 ctxt->wellFormed = 0;
4323 ctxt->disableSAX = 1;
4324 return(-1);
4325 }
4326 while ((RAW == 0) && (ctxt->inputNr > 1))
4327 xmlPopInput(ctxt);
4328 if (!IS_BLANK(CUR)) {
4329 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4331 ctxt->sax->error(ctxt->userData,
4332 "Space required after the element name\n");
4333 ctxt->wellFormed = 0;
4334 ctxt->disableSAX = 1;
4335 }
4336 SKIP_BLANKS;
4337 if ((RAW == 'E') && (NXT(1) == 'M') &&
4338 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4339 (NXT(4) == 'Y')) {
4340 SKIP(5);
4341 /*
4342 * Element must always be empty.
4343 */
4344 ret = XML_ELEMENT_TYPE_EMPTY;
4345 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4346 (NXT(2) == 'Y')) {
4347 SKIP(3);
4348 /*
4349 * Element is a generic container.
4350 */
4351 ret = XML_ELEMENT_TYPE_ANY;
4352 } else if (RAW == '(') {
4353 ret = xmlParseElementContentDecl(ctxt, name, &content);
4354 } else {
4355 /*
4356 * [ WFC: PEs in Internal Subset ] error handling.
4357 */
4358 if ((RAW == '%') && (ctxt->external == 0) &&
4359 (ctxt->inputNr == 1)) {
4360 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4362 ctxt->sax->error(ctxt->userData,
4363 "PEReference: forbidden within markup decl in internal subset\n");
4364 } else {
4365 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4367 ctxt->sax->error(ctxt->userData,
4368 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4369 }
4370 ctxt->wellFormed = 0;
4371 ctxt->disableSAX = 1;
4372 if (name != NULL) xmlFree(name);
4373 return(-1);
4374 }
4375
4376 SKIP_BLANKS;
4377 /*
4378 * Pop-up of finished entities.
4379 */
4380 while ((RAW == 0) && (ctxt->inputNr > 1))
4381 xmlPopInput(ctxt);
4382 SKIP_BLANKS;
4383
4384 if (RAW != '>') {
4385 ctxt->errNo = XML_ERR_GT_REQUIRED;
4386 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4387 ctxt->sax->error(ctxt->userData,
4388 "xmlParseElementDecl: expected '>' at the end\n");
4389 ctxt->wellFormed = 0;
4390 ctxt->disableSAX = 1;
4391 } else {
4392 if (input != ctxt->input) {
4393 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4394 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4395 ctxt->sax->error(ctxt->userData,
4396"Element declaration doesn't start and stop in the same entity\n");
4397 ctxt->wellFormed = 0;
4398 ctxt->disableSAX = 1;
4399 }
4400
4401 NEXT;
4402 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4403 (ctxt->sax->elementDecl != NULL))
4404 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4405 content);
4406 }
4407 if (content != NULL) {
4408 xmlFreeElementContent(content);
4409 }
4410 if (name != NULL) {
4411 xmlFree(name);
4412 }
4413 }
4414 return(ret);
4415}
4416
4417/**
4418 * xmlParseMarkupDecl:
4419 * @ctxt: an XML parser context
4420 *
4421 * parse Markup declarations
4422 *
4423 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4424 * NotationDecl | PI | Comment
4425 *
4426 * [ VC: Proper Declaration/PE Nesting ]
4427 * Parameter-entity replacement text must be properly nested with
4428 * markup declarations. That is to say, if either the first character
4429 * or the last character of a markup declaration (markupdecl above) is
4430 * contained in the replacement text for a parameter-entity reference,
4431 * both must be contained in the same replacement text.
4432 *
4433 * [ WFC: PEs in Internal Subset ]
4434 * In the internal DTD subset, parameter-entity references can occur
4435 * only where markup declarations can occur, not within markup declarations.
4436 * (This does not apply to references that occur in external parameter
4437 * entities or to the external subset.)
4438 */
4439void
4440xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4441 GROW;
4442 xmlParseElementDecl(ctxt);
4443 xmlParseAttributeListDecl(ctxt);
4444 xmlParseEntityDecl(ctxt);
4445 xmlParseNotationDecl(ctxt);
4446 xmlParsePI(ctxt);
4447 xmlParseComment(ctxt);
4448 /*
4449 * This is only for internal subset. On external entities,
4450 * the replacement is done before parsing stage
4451 */
4452 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4453 xmlParsePEReference(ctxt);
4454 ctxt->instate = XML_PARSER_DTD;
4455}
4456
4457/**
4458 * xmlParseTextDecl:
4459 * @ctxt: an XML parser context
4460 *
4461 * parse an XML declaration header for external entities
4462 *
4463 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4464 *
4465 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4466 */
4467
4468void
4469xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4470 xmlChar *version;
4471
4472 /*
4473 * We know that '<?xml' is here.
4474 */
4475 if ((RAW == '<') && (NXT(1) == '?') &&
4476 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4477 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4478 SKIP(5);
4479 } else {
4480 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4481 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4482 ctxt->sax->error(ctxt->userData,
4483 "Text declaration '<?xml' required\n");
4484 ctxt->wellFormed = 0;
4485 ctxt->disableSAX = 1;
4486
4487 return;
4488 }
4489
4490 if (!IS_BLANK(CUR)) {
4491 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4493 ctxt->sax->error(ctxt->userData,
4494 "Space needed after '<?xml'\n");
4495 ctxt->wellFormed = 0;
4496 ctxt->disableSAX = 1;
4497 }
4498 SKIP_BLANKS;
4499
4500 /*
4501 * We may have the VersionInfo here.
4502 */
4503 version = xmlParseVersionInfo(ctxt);
4504 if (version == NULL)
4505 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4506 ctxt->input->version = version;
4507
4508 /*
4509 * We must have the encoding declaration
4510 */
4511 if (!IS_BLANK(CUR)) {
4512 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4514 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4515 ctxt->wellFormed = 0;
4516 ctxt->disableSAX = 1;
4517 }
4518 xmlParseEncodingDecl(ctxt);
4519 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4520 /*
4521 * The XML REC instructs us to stop parsing right here
4522 */
4523 return;
4524 }
4525
4526 SKIP_BLANKS;
4527 if ((RAW == '?') && (NXT(1) == '>')) {
4528 SKIP(2);
4529 } else if (RAW == '>') {
4530 /* Deprecated old WD ... */
4531 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4533 ctxt->sax->error(ctxt->userData,
4534 "XML declaration must end-up with '?>'\n");
4535 ctxt->wellFormed = 0;
4536 ctxt->disableSAX = 1;
4537 NEXT;
4538 } else {
4539 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4540 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4541 ctxt->sax->error(ctxt->userData,
4542 "parsing XML declaration: '?>' expected\n");
4543 ctxt->wellFormed = 0;
4544 ctxt->disableSAX = 1;
4545 MOVETO_ENDTAG(CUR_PTR);
4546 NEXT;
4547 }
4548}
4549
4550/*
4551 * xmlParseConditionalSections
4552 * @ctxt: an XML parser context
4553 *
4554 * [61] conditionalSect ::= includeSect | ignoreSect
4555 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4556 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4557 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4558 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4559 */
4560
4561void
4562xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4563 SKIP(3);
4564 SKIP_BLANKS;
4565 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4566 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4567 (NXT(6) == 'E')) {
4568 SKIP(7);
4569 SKIP_BLANKS;
4570 if (RAW != '[') {
4571 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4573 ctxt->sax->error(ctxt->userData,
4574 "XML conditional section '[' expected\n");
4575 ctxt->wellFormed = 0;
4576 ctxt->disableSAX = 1;
4577 } else {
4578 NEXT;
4579 }
4580 if (xmlParserDebugEntities) {
4581 if ((ctxt->input != NULL) && (ctxt->input->filename))
4582 xmlGenericError(xmlGenericErrorContext,
4583 "%s(%d): ", ctxt->input->filename,
4584 ctxt->input->line);
4585 xmlGenericError(xmlGenericErrorContext,
4586 "Entering INCLUDE Conditional Section\n");
4587 }
4588
4589 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4590 (NXT(2) != '>'))) {
4591 const xmlChar *check = CUR_PTR;
4592 int cons = ctxt->input->consumed;
4593 int tok = ctxt->token;
4594
4595 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4596 xmlParseConditionalSections(ctxt);
4597 } else if (IS_BLANK(CUR)) {
4598 NEXT;
4599 } else if (RAW == '%') {
4600 xmlParsePEReference(ctxt);
4601 } else
4602 xmlParseMarkupDecl(ctxt);
4603
4604 /*
4605 * Pop-up of finished entities.
4606 */
4607 while ((RAW == 0) && (ctxt->inputNr > 1))
4608 xmlPopInput(ctxt);
4609
4610 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4611 (tok == ctxt->token)) {
4612 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4614 ctxt->sax->error(ctxt->userData,
4615 "Content error in the external subset\n");
4616 ctxt->wellFormed = 0;
4617 ctxt->disableSAX = 1;
4618 break;
4619 }
4620 }
4621 if (xmlParserDebugEntities) {
4622 if ((ctxt->input != NULL) && (ctxt->input->filename))
4623 xmlGenericError(xmlGenericErrorContext,
4624 "%s(%d): ", ctxt->input->filename,
4625 ctxt->input->line);
4626 xmlGenericError(xmlGenericErrorContext,
4627 "Leaving INCLUDE Conditional Section\n");
4628 }
4629
4630 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4631 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4632 int state;
4633 int instate;
4634 int depth = 0;
4635
4636 SKIP(6);
4637 SKIP_BLANKS;
4638 if (RAW != '[') {
4639 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4641 ctxt->sax->error(ctxt->userData,
4642 "XML conditional section '[' expected\n");
4643 ctxt->wellFormed = 0;
4644 ctxt->disableSAX = 1;
4645 } else {
4646 NEXT;
4647 }
4648 if (xmlParserDebugEntities) {
4649 if ((ctxt->input != NULL) && (ctxt->input->filename))
4650 xmlGenericError(xmlGenericErrorContext,
4651 "%s(%d): ", ctxt->input->filename,
4652 ctxt->input->line);
4653 xmlGenericError(xmlGenericErrorContext,
4654 "Entering IGNORE Conditional Section\n");
4655 }
4656
4657 /*
4658 * Parse up to the end of the conditionnal section
4659 * But disable SAX event generating DTD building in the meantime
4660 */
4661 state = ctxt->disableSAX;
4662 instate = ctxt->instate;
4663 ctxt->disableSAX = 1;
4664 ctxt->instate = XML_PARSER_IGNORE;
4665
4666 while (depth >= 0) {
4667 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4668 depth++;
4669 SKIP(3);
4670 continue;
4671 }
4672 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4673 if (--depth >= 0) SKIP(3);
4674 continue;
4675 }
4676 NEXT;
4677 continue;
4678 }
4679
4680 ctxt->disableSAX = state;
4681 ctxt->instate = instate;
4682
4683 if (xmlParserDebugEntities) {
4684 if ((ctxt->input != NULL) && (ctxt->input->filename))
4685 xmlGenericError(xmlGenericErrorContext,
4686 "%s(%d): ", ctxt->input->filename,
4687 ctxt->input->line);
4688 xmlGenericError(xmlGenericErrorContext,
4689 "Leaving IGNORE Conditional Section\n");
4690 }
4691
4692 } else {
4693 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4695 ctxt->sax->error(ctxt->userData,
4696 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4697 ctxt->wellFormed = 0;
4698 ctxt->disableSAX = 1;
4699 }
4700
4701 if (RAW == 0)
4702 SHRINK;
4703
4704 if (RAW == 0) {
4705 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4707 ctxt->sax->error(ctxt->userData,
4708 "XML conditional section not closed\n");
4709 ctxt->wellFormed = 0;
4710 ctxt->disableSAX = 1;
4711 } else {
4712 SKIP(3);
4713 }
4714}
4715
4716/**
4717 * xmlParseExternalSubset:
4718 * @ctxt: an XML parser context
4719 * @ExternalID: the external identifier
4720 * @SystemID: the system identifier (or URL)
4721 *
4722 * parse Markup declarations from an external subset
4723 *
4724 * [30] extSubset ::= textDecl? extSubsetDecl
4725 *
4726 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4727 */
4728void
4729xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4730 const xmlChar *SystemID) {
4731 GROW;
4732 if ((RAW == '<') && (NXT(1) == '?') &&
4733 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4734 (NXT(4) == 'l')) {
4735 xmlParseTextDecl(ctxt);
4736 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4737 /*
4738 * The XML REC instructs us to stop parsing right here
4739 */
4740 ctxt->instate = XML_PARSER_EOF;
4741 return;
4742 }
4743 }
4744 if (ctxt->myDoc == NULL) {
4745 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4746 }
4747 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4748 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4749
4750 ctxt->instate = XML_PARSER_DTD;
4751 ctxt->external = 1;
4752 while (((RAW == '<') && (NXT(1) == '?')) ||
4753 ((RAW == '<') && (NXT(1) == '!')) ||
4754 IS_BLANK(CUR)) {
4755 const xmlChar *check = CUR_PTR;
4756 int cons = ctxt->input->consumed;
4757 int tok = ctxt->token;
4758
4759 GROW;
4760 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4761 xmlParseConditionalSections(ctxt);
4762 } else if (IS_BLANK(CUR)) {
4763 NEXT;
4764 } else if (RAW == '%') {
4765 xmlParsePEReference(ctxt);
4766 } else
4767 xmlParseMarkupDecl(ctxt);
4768
4769 /*
4770 * Pop-up of finished entities.
4771 */
4772 while ((RAW == 0) && (ctxt->inputNr > 1))
4773 xmlPopInput(ctxt);
4774
4775 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4776 (tok == ctxt->token)) {
4777 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4779 ctxt->sax->error(ctxt->userData,
4780 "Content error in the external subset\n");
4781 ctxt->wellFormed = 0;
4782 ctxt->disableSAX = 1;
4783 break;
4784 }
4785 }
4786
4787 if (RAW != 0) {
4788 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4790 ctxt->sax->error(ctxt->userData,
4791 "Extra content at the end of the document\n");
4792 ctxt->wellFormed = 0;
4793 ctxt->disableSAX = 1;
4794 }
4795
4796}
4797
4798/**
4799 * xmlParseReference:
4800 * @ctxt: an XML parser context
4801 *
4802 * parse and handle entity references in content, depending on the SAX
4803 * interface, this may end-up in a call to character() if this is a
4804 * CharRef, a predefined entity, if there is no reference() callback.
4805 * or if the parser was asked to switch to that mode.
4806 *
4807 * [67] Reference ::= EntityRef | CharRef
4808 */
4809void
4810xmlParseReference(xmlParserCtxtPtr ctxt) {
4811 xmlEntityPtr ent;
4812 xmlChar *val;
4813 if (RAW != '&') return;
4814
4815 if (NXT(1) == '#') {
4816 int i = 0;
4817 xmlChar out[10];
4818 int hex = NXT(2);
4819 int val = xmlParseCharRef(ctxt);
4820
4821 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4822 /*
4823 * So we are using non-UTF-8 buffers
4824 * Check that the char fit on 8bits, if not
4825 * generate a CharRef.
4826 */
4827 if (val <= 0xFF) {
4828 out[0] = val;
4829 out[1] = 0;
4830 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4831 (!ctxt->disableSAX))
4832 ctxt->sax->characters(ctxt->userData, out, 1);
4833 } else {
4834 if ((hex == 'x') || (hex == 'X'))
4835 sprintf((char *)out, "#x%X", val);
4836 else
4837 sprintf((char *)out, "#%d", val);
4838 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4839 (!ctxt->disableSAX))
4840 ctxt->sax->reference(ctxt->userData, out);
4841 }
4842 } else {
4843 /*
4844 * Just encode the value in UTF-8
4845 */
4846 COPY_BUF(0 ,out, i, val);
4847 out[i] = 0;
4848 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4849 (!ctxt->disableSAX))
4850 ctxt->sax->characters(ctxt->userData, out, i);
4851 }
4852 } else {
4853 ent = xmlParseEntityRef(ctxt);
4854 if (ent == NULL) return;
4855 if ((ent->name != NULL) &&
4856 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4857 xmlNodePtr list = NULL;
4858 int ret;
4859
4860
4861 /*
4862 * The first reference to the entity trigger a parsing phase
4863 * where the ent->children is filled with the result from
4864 * the parsing.
4865 */
4866 if (ent->children == NULL) {
4867 xmlChar *value;
4868 value = ent->content;
4869
4870 /*
4871 * Check that this entity is well formed
4872 */
4873 if ((value != NULL) &&
4874 (value[1] == 0) && (value[0] == '<') &&
4875 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4876 /*
4877 * DONE: get definite answer on this !!!
4878 * Lots of entity decls are used to declare a single
4879 * char
4880 * <!ENTITY lt "<">
4881 * Which seems to be valid since
4882 * 2.4: The ampersand character (&) and the left angle
4883 * bracket (<) may appear in their literal form only
4884 * when used ... They are also legal within the literal
4885 * entity value of an internal entity declaration;i
4886 * see "4.3.2 Well-Formed Parsed Entities".
4887 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4888 * Looking at the OASIS test suite and James Clark
4889 * tests, this is broken. However the XML REC uses
4890 * it. Is the XML REC not well-formed ????
4891 * This is a hack to avoid this problem
4892 *
4893 * ANSWER: since lt gt amp .. are already defined,
4894 * this is a redefinition and hence the fact that the
4895 * contentis not well balanced is not a Wf error, this
4896 * is lousy but acceptable.
4897 */
4898 list = xmlNewDocText(ctxt->myDoc, value);
4899 if (list != NULL) {
4900 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4901 (ent->children == NULL)) {
4902 ent->children = list;
4903 ent->last = list;
4904 list->parent = (xmlNodePtr) ent;
4905 } else {
4906 xmlFreeNodeList(list);
4907 }
4908 } else if (list != NULL) {
4909 xmlFreeNodeList(list);
4910 }
4911 } else {
4912 /*
4913 * 4.3.2: An internal general parsed entity is well-formed
4914 * if its replacement text matches the production labeled
4915 * content.
4916 */
4917 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4918 ctxt->depth++;
4919 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
4920 ctxt->sax, NULL, ctxt->depth,
4921 value, &list);
4922 ctxt->depth--;
4923 } else if (ent->etype ==
4924 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4925 ctxt->depth++;
4926 ret = xmlParseExternalEntity(ctxt->myDoc,
4927 ctxt->sax, NULL, ctxt->depth,
4928 ent->URI, ent->ExternalID, &list);
4929 ctxt->depth--;
4930 } else {
4931 ret = -1;
4932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4933 ctxt->sax->error(ctxt->userData,
4934 "Internal: invalid entity type\n");
4935 }
4936 if (ret == XML_ERR_ENTITY_LOOP) {
4937 ctxt->errNo = XML_ERR_ENTITY_LOOP;
4938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4939 ctxt->sax->error(ctxt->userData,
4940 "Detected entity reference loop\n");
4941 ctxt->wellFormed = 0;
4942 ctxt->disableSAX = 1;
4943 } else if ((ret == 0) && (list != NULL)) {
4944 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4945 (ent->children == NULL)) {
4946 ent->children = list;
4947 while (list != NULL) {
4948 list->parent = (xmlNodePtr) ent;
4949 if (list->next == NULL)
4950 ent->last = list;
4951 list = list->next;
4952 }
4953 } else {
4954 xmlFreeNodeList(list);
4955 }
4956 } else if (ret > 0) {
4957 ctxt->errNo = ret;
4958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4959 ctxt->sax->error(ctxt->userData,
4960 "Entity value required\n");
4961 ctxt->wellFormed = 0;
4962 ctxt->disableSAX = 1;
4963 } else if (list != NULL) {
4964 xmlFreeNodeList(list);
4965 }
4966 }
4967 }
4968 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4969 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
4970 /*
4971 * Create a node.
4972 */
4973 ctxt->sax->reference(ctxt->userData, ent->name);
4974 return;
4975 } else if (ctxt->replaceEntities) {
4976 if ((ctxt->node != NULL) && (ent->children != NULL)) {
4977 /*
4978 * Seems we are generating the DOM content, do
4979 * a simple tree copy
4980 */
4981 xmlNodePtr new;
4982 new = xmlCopyNodeList(ent->children);
4983
4984 xmlAddChildList(ctxt->node, new);
4985 /*
4986 * This is to avoid a nasty side effect, see
4987 * characters() in SAX.c
4988 */
4989 ctxt->nodemem = 0;
4990 ctxt->nodelen = 0;
4991 return;
4992 } else {
4993 /*
4994 * Probably running in SAX mode
4995 */
4996 xmlParserInputPtr input;
4997
4998 input = xmlNewEntityInputStream(ctxt, ent);
4999 xmlPushInput(ctxt, input);
5000 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5001 (RAW == '<') && (NXT(1) == '?') &&
5002 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5003 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5004 xmlParseTextDecl(ctxt);
5005 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5006 /*
5007 * The XML REC instructs us to stop parsing right here
5008 */
5009 ctxt->instate = XML_PARSER_EOF;
5010 return;
5011 }
5012 if (input->standalone == 1) {
5013 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5014 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5015 ctxt->sax->error(ctxt->userData,
5016 "external parsed entities cannot be standalone\n");
5017 ctxt->wellFormed = 0;
5018 ctxt->disableSAX = 1;
5019 }
5020 }
5021 return;
5022 }
5023 }
5024 } else {
5025 val = ent->content;
5026 if (val == NULL) return;
5027 /*
5028 * inline the entity.
5029 */
5030 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5031 (!ctxt->disableSAX))
5032 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5033 }
5034 }
5035}
5036
5037/**
5038 * xmlParseEntityRef:
5039 * @ctxt: an XML parser context
5040 *
5041 * parse ENTITY references declarations
5042 *
5043 * [68] EntityRef ::= '&' Name ';'
5044 *
5045 * [ WFC: Entity Declared ]
5046 * In a document without any DTD, a document with only an internal DTD
5047 * subset which contains no parameter entity references, or a document
5048 * with "standalone='yes'", the Name given in the entity reference
5049 * must match that in an entity declaration, except that well-formed
5050 * documents need not declare any of the following entities: amp, lt,
5051 * gt, apos, quot. The declaration of a parameter entity must precede
5052 * any reference to it. Similarly, the declaration of a general entity
5053 * must precede any reference to it which appears in a default value in an
5054 * attribute-list declaration. Note that if entities are declared in the
5055 * external subset or in external parameter entities, a non-validating
5056 * processor is not obligated to read and process their declarations;
5057 * for such documents, the rule that an entity must be declared is a
5058 * well-formedness constraint only if standalone='yes'.
5059 *
5060 * [ WFC: Parsed Entity ]
5061 * An entity reference must not contain the name of an unparsed entity
5062 *
5063 * Returns the xmlEntityPtr if found, or NULL otherwise.
5064 */
5065xmlEntityPtr
5066xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5067 xmlChar *name;
5068 xmlEntityPtr ent = NULL;
5069
5070 GROW;
5071
5072 if (RAW == '&') {
5073 NEXT;
5074 name = xmlParseName(ctxt);
5075 if (name == NULL) {
5076 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5077 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5078 ctxt->sax->error(ctxt->userData,
5079 "xmlParseEntityRef: no name\n");
5080 ctxt->wellFormed = 0;
5081 ctxt->disableSAX = 1;
5082 } else {
5083 if (RAW == ';') {
5084 NEXT;
5085 /*
5086 * Ask first SAX for entity resolution, otherwise try the
5087 * predefined set.
5088 */
5089 if (ctxt->sax != NULL) {
5090 if (ctxt->sax->getEntity != NULL)
5091 ent = ctxt->sax->getEntity(ctxt->userData, name);
5092 if (ent == NULL)
5093 ent = xmlGetPredefinedEntity(name);
5094 }
5095 /*
5096 * [ WFC: Entity Declared ]
5097 * In a document without any DTD, a document with only an
5098 * internal DTD subset which contains no parameter entity
5099 * references, or a document with "standalone='yes'", the
5100 * Name given in the entity reference must match that in an
5101 * entity declaration, except that well-formed documents
5102 * need not declare any of the following entities: amp, lt,
5103 * gt, apos, quot.
5104 * The declaration of a parameter entity must precede any
5105 * reference to it.
5106 * Similarly, the declaration of a general entity must
5107 * precede any reference to it which appears in a default
5108 * value in an attribute-list declaration. Note that if
5109 * entities are declared in the external subset or in
5110 * external parameter entities, a non-validating processor
5111 * is not obligated to read and process their declarations;
5112 * for such documents, the rule that an entity must be
5113 * declared is a well-formedness constraint only if
5114 * standalone='yes'.
5115 */
5116 if (ent == NULL) {
5117 if ((ctxt->standalone == 1) ||
5118 ((ctxt->hasExternalSubset == 0) &&
5119 (ctxt->hasPErefs == 0))) {
5120 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5122 ctxt->sax->error(ctxt->userData,
5123 "Entity '%s' not defined\n", name);
5124 ctxt->wellFormed = 0;
5125 ctxt->disableSAX = 1;
5126 } else {
5127 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5128 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5129 ctxt->sax->warning(ctxt->userData,
5130 "Entity '%s' not defined\n", name);
5131 }
5132 }
5133
5134 /*
5135 * [ WFC: Parsed Entity ]
5136 * An entity reference must not contain the name of an
5137 * unparsed entity
5138 */
5139 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5140 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5142 ctxt->sax->error(ctxt->userData,
5143 "Entity reference to unparsed entity %s\n", name);
5144 ctxt->wellFormed = 0;
5145 ctxt->disableSAX = 1;
5146 }
5147
5148 /*
5149 * [ WFC: No External Entity References ]
5150 * Attribute values cannot contain direct or indirect
5151 * entity references to external entities.
5152 */
5153 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5154 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5155 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5157 ctxt->sax->error(ctxt->userData,
5158 "Attribute references external entity '%s'\n", name);
5159 ctxt->wellFormed = 0;
5160 ctxt->disableSAX = 1;
5161 }
5162 /*
5163 * [ WFC: No < in Attribute Values ]
5164 * The replacement text of any entity referred to directly or
5165 * indirectly in an attribute value (other than "&lt;") must
5166 * not contain a <.
5167 */
5168 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5169 (ent != NULL) &&
5170 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5171 (ent->content != NULL) &&
5172 (xmlStrchr(ent->content, '<'))) {
5173 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5175 ctxt->sax->error(ctxt->userData,
5176 "'<' in entity '%s' is not allowed in attributes values\n", name);
5177 ctxt->wellFormed = 0;
5178 ctxt->disableSAX = 1;
5179 }
5180
5181 /*
5182 * Internal check, no parameter entities here ...
5183 */
5184 else {
5185 switch (ent->etype) {
5186 case XML_INTERNAL_PARAMETER_ENTITY:
5187 case XML_EXTERNAL_PARAMETER_ENTITY:
5188 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5190 ctxt->sax->error(ctxt->userData,
5191 "Attempt to reference the parameter entity '%s'\n", name);
5192 ctxt->wellFormed = 0;
5193 ctxt->disableSAX = 1;
5194 break;
5195 default:
5196 break;
5197 }
5198 }
5199
5200 /*
5201 * [ WFC: No Recursion ]
5202 * A parsed entity must not contain a recursive reference
5203 * to itself, either directly or indirectly.
5204 * Done somewhere else
5205 */
5206
5207 } else {
5208 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5209 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5210 ctxt->sax->error(ctxt->userData,
5211 "xmlParseEntityRef: expecting ';'\n");
5212 ctxt->wellFormed = 0;
5213 ctxt->disableSAX = 1;
5214 }
5215 xmlFree(name);
5216 }
5217 }
5218 return(ent);
5219}
5220
5221/**
5222 * xmlParseStringEntityRef:
5223 * @ctxt: an XML parser context
5224 * @str: a pointer to an index in the string
5225 *
5226 * parse ENTITY references declarations, but this version parses it from
5227 * a string value.
5228 *
5229 * [68] EntityRef ::= '&' Name ';'
5230 *
5231 * [ WFC: Entity Declared ]
5232 * In a document without any DTD, a document with only an internal DTD
5233 * subset which contains no parameter entity references, or a document
5234 * with "standalone='yes'", the Name given in the entity reference
5235 * must match that in an entity declaration, except that well-formed
5236 * documents need not declare any of the following entities: amp, lt,
5237 * gt, apos, quot. The declaration of a parameter entity must precede
5238 * any reference to it. Similarly, the declaration of a general entity
5239 * must precede any reference to it which appears in a default value in an
5240 * attribute-list declaration. Note that if entities are declared in the
5241 * external subset or in external parameter entities, a non-validating
5242 * processor is not obligated to read and process their declarations;
5243 * for such documents, the rule that an entity must be declared is a
5244 * well-formedness constraint only if standalone='yes'.
5245 *
5246 * [ WFC: Parsed Entity ]
5247 * An entity reference must not contain the name of an unparsed entity
5248 *
5249 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5250 * is updated to the current location in the string.
5251 */
5252xmlEntityPtr
5253xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5254 xmlChar *name;
5255 const xmlChar *ptr;
5256 xmlChar cur;
5257 xmlEntityPtr ent = NULL;
5258
5259 if ((str == NULL) || (*str == NULL))
5260 return(NULL);
5261 ptr = *str;
5262 cur = *ptr;
5263 if (cur == '&') {
5264 ptr++;
5265 cur = *ptr;
5266 name = xmlParseStringName(ctxt, &ptr);
5267 if (name == NULL) {
5268 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5270 ctxt->sax->error(ctxt->userData,
5271 "xmlParseEntityRef: no name\n");
5272 ctxt->wellFormed = 0;
5273 ctxt->disableSAX = 1;
5274 } else {
5275 if (*ptr == ';') {
5276 ptr++;
5277 /*
5278 * Ask first SAX for entity resolution, otherwise try the
5279 * predefined set.
5280 */
5281 if (ctxt->sax != NULL) {
5282 if (ctxt->sax->getEntity != NULL)
5283 ent = ctxt->sax->getEntity(ctxt->userData, name);
5284 if (ent == NULL)
5285 ent = xmlGetPredefinedEntity(name);
5286 }
5287 /*
5288 * [ WFC: Entity Declared ]
5289 * In a document without any DTD, a document with only an
5290 * internal DTD subset which contains no parameter entity
5291 * references, or a document with "standalone='yes'", the
5292 * Name given in the entity reference must match that in an
5293 * entity declaration, except that well-formed documents
5294 * need not declare any of the following entities: amp, lt,
5295 * gt, apos, quot.
5296 * The declaration of a parameter entity must precede any
5297 * reference to it.
5298 * Similarly, the declaration of a general entity must
5299 * precede any reference to it which appears in a default
5300 * value in an attribute-list declaration. Note that if
5301 * entities are declared in the external subset or in
5302 * external parameter entities, a non-validating processor
5303 * is not obligated to read and process their declarations;
5304 * for such documents, the rule that an entity must be
5305 * declared is a well-formedness constraint only if
5306 * standalone='yes'.
5307 */
5308 if (ent == NULL) {
5309 if ((ctxt->standalone == 1) ||
5310 ((ctxt->hasExternalSubset == 0) &&
5311 (ctxt->hasPErefs == 0))) {
5312 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5313 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5314 ctxt->sax->error(ctxt->userData,
5315 "Entity '%s' not defined\n", name);
5316 ctxt->wellFormed = 0;
5317 ctxt->disableSAX = 1;
5318 } else {
5319 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5320 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5321 ctxt->sax->warning(ctxt->userData,
5322 "Entity '%s' not defined\n", name);
5323 }
5324 }
5325
5326 /*
5327 * [ WFC: Parsed Entity ]
5328 * An entity reference must not contain the name of an
5329 * unparsed entity
5330 */
5331 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5332 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5334 ctxt->sax->error(ctxt->userData,
5335 "Entity reference to unparsed entity %s\n", name);
5336 ctxt->wellFormed = 0;
5337 ctxt->disableSAX = 1;
5338 }
5339
5340 /*
5341 * [ WFC: No External Entity References ]
5342 * Attribute values cannot contain direct or indirect
5343 * entity references to external entities.
5344 */
5345 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5346 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5347 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5349 ctxt->sax->error(ctxt->userData,
5350 "Attribute references external entity '%s'\n", name);
5351 ctxt->wellFormed = 0;
5352 ctxt->disableSAX = 1;
5353 }
5354 /*
5355 * [ WFC: No < in Attribute Values ]
5356 * The replacement text of any entity referred to directly or
5357 * indirectly in an attribute value (other than "&lt;") must
5358 * not contain a <.
5359 */
5360 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5361 (ent != NULL) &&
5362 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5363 (ent->content != NULL) &&
5364 (xmlStrchr(ent->content, '<'))) {
5365 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5367 ctxt->sax->error(ctxt->userData,
5368 "'<' in entity '%s' is not allowed in attributes values\n", name);
5369 ctxt->wellFormed = 0;
5370 ctxt->disableSAX = 1;
5371 }
5372
5373 /*
5374 * Internal check, no parameter entities here ...
5375 */
5376 else {
5377 switch (ent->etype) {
5378 case XML_INTERNAL_PARAMETER_ENTITY:
5379 case XML_EXTERNAL_PARAMETER_ENTITY:
5380 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5382 ctxt->sax->error(ctxt->userData,
5383 "Attempt to reference the parameter entity '%s'\n", name);
5384 ctxt->wellFormed = 0;
5385 ctxt->disableSAX = 1;
5386 break;
5387 default:
5388 break;
5389 }
5390 }
5391
5392 /*
5393 * [ WFC: No Recursion ]
5394 * A parsed entity must not contain a recursive reference
5395 * to itself, either directly or indirectly.
5396 * Done somewhwere else
5397 */
5398
5399 } else {
5400 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5402 ctxt->sax->error(ctxt->userData,
5403 "xmlParseEntityRef: expecting ';'\n");
5404 ctxt->wellFormed = 0;
5405 ctxt->disableSAX = 1;
5406 }
5407 xmlFree(name);
5408 }
5409 }
5410 *str = ptr;
5411 return(ent);
5412}
5413
5414/**
5415 * xmlParsePEReference:
5416 * @ctxt: an XML parser context
5417 *
5418 * parse PEReference declarations
5419 * The entity content is handled directly by pushing it's content as
5420 * a new input stream.
5421 *
5422 * [69] PEReference ::= '%' Name ';'
5423 *
5424 * [ WFC: No Recursion ]
5425 * A parsed entity must not contain a recursive
5426 * reference to itself, either directly or indirectly.
5427 *
5428 * [ WFC: Entity Declared ]
5429 * In a document without any DTD, a document with only an internal DTD
5430 * subset which contains no parameter entity references, or a document
5431 * with "standalone='yes'", ... ... The declaration of a parameter
5432 * entity must precede any reference to it...
5433 *
5434 * [ VC: Entity Declared ]
5435 * In a document with an external subset or external parameter entities
5436 * with "standalone='no'", ... ... The declaration of a parameter entity
5437 * must precede any reference to it...
5438 *
5439 * [ WFC: In DTD ]
5440 * Parameter-entity references may only appear in the DTD.
5441 * NOTE: misleading but this is handled.
5442 */
5443void
5444xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5445 xmlChar *name;
5446 xmlEntityPtr entity = NULL;
5447 xmlParserInputPtr input;
5448
5449 if (RAW == '%') {
5450 NEXT;
Daniel Veillard29631a82001-03-05 09:49:20 +00005451 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005452 if (name == NULL) {
5453 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5455 ctxt->sax->error(ctxt->userData,
5456 "xmlParsePEReference: no name\n");
5457 ctxt->wellFormed = 0;
5458 ctxt->disableSAX = 1;
5459 } else {
5460 if (RAW == ';') {
5461 NEXT;
5462 if ((ctxt->sax != NULL) &&
5463 (ctxt->sax->getParameterEntity != NULL))
5464 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5465 name);
5466 if (entity == NULL) {
5467 /*
5468 * [ WFC: Entity Declared ]
5469 * In a document without any DTD, a document with only an
5470 * internal DTD subset which contains no parameter entity
5471 * references, or a document with "standalone='yes'", ...
5472 * ... The declaration of a parameter entity must precede
5473 * any reference to it...
5474 */
5475 if ((ctxt->standalone == 1) ||
5476 ((ctxt->hasExternalSubset == 0) &&
5477 (ctxt->hasPErefs == 0))) {
5478 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5479 if ((!ctxt->disableSAX) &&
5480 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5481 ctxt->sax->error(ctxt->userData,
5482 "PEReference: %%%s; not found\n", name);
5483 ctxt->wellFormed = 0;
5484 ctxt->disableSAX = 1;
5485 } else {
5486 /*
5487 * [ VC: Entity Declared ]
5488 * In a document with an external subset or external
5489 * parameter entities with "standalone='no'", ...
5490 * ... The declaration of a parameter entity must precede
5491 * any reference to it...
5492 */
5493 if ((!ctxt->disableSAX) &&
5494 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5495 ctxt->sax->warning(ctxt->userData,
5496 "PEReference: %%%s; not found\n", name);
5497 ctxt->valid = 0;
5498 }
5499 } else {
5500 /*
5501 * Internal checking in case the entity quest barfed
5502 */
5503 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5504 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5505 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5506 ctxt->sax->warning(ctxt->userData,
5507 "Internal: %%%s; is not a parameter entity\n", name);
5508 } else {
5509 /*
5510 * TODO !!!
5511 * handle the extra spaces added before and after
5512 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5513 */
5514 input = xmlNewEntityInputStream(ctxt, entity);
5515 xmlPushInput(ctxt, input);
5516 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5517 (RAW == '<') && (NXT(1) == '?') &&
5518 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5519 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5520 xmlParseTextDecl(ctxt);
5521 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5522 /*
5523 * The XML REC instructs us to stop parsing
5524 * right here
5525 */
5526 ctxt->instate = XML_PARSER_EOF;
5527 xmlFree(name);
5528 return;
5529 }
5530 }
5531 if (ctxt->token == 0)
5532 ctxt->token = ' ';
5533 }
5534 }
5535 ctxt->hasPErefs = 1;
5536 } else {
5537 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5539 ctxt->sax->error(ctxt->userData,
5540 "xmlParsePEReference: expecting ';'\n");
5541 ctxt->wellFormed = 0;
5542 ctxt->disableSAX = 1;
5543 }
5544 xmlFree(name);
5545 }
5546 }
5547}
5548
5549/**
5550 * xmlParseStringPEReference:
5551 * @ctxt: an XML parser context
5552 * @str: a pointer to an index in the string
5553 *
5554 * parse PEReference declarations
5555 *
5556 * [69] PEReference ::= '%' Name ';'
5557 *
5558 * [ WFC: No Recursion ]
5559 * A parsed entity must not contain a recursive
5560 * reference to itself, either directly or indirectly.
5561 *
5562 * [ WFC: Entity Declared ]
5563 * In a document without any DTD, a document with only an internal DTD
5564 * subset which contains no parameter entity references, or a document
5565 * with "standalone='yes'", ... ... The declaration of a parameter
5566 * entity must precede any reference to it...
5567 *
5568 * [ VC: Entity Declared ]
5569 * In a document with an external subset or external parameter entities
5570 * with "standalone='no'", ... ... The declaration of a parameter entity
5571 * must precede any reference to it...
5572 *
5573 * [ WFC: In DTD ]
5574 * Parameter-entity references may only appear in the DTD.
5575 * NOTE: misleading but this is handled.
5576 *
5577 * Returns the string of the entity content.
5578 * str is updated to the current value of the index
5579 */
5580xmlEntityPtr
5581xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5582 const xmlChar *ptr;
5583 xmlChar cur;
5584 xmlChar *name;
5585 xmlEntityPtr entity = NULL;
5586
5587 if ((str == NULL) || (*str == NULL)) return(NULL);
5588 ptr = *str;
5589 cur = *ptr;
5590 if (cur == '%') {
5591 ptr++;
5592 cur = *ptr;
5593 name = xmlParseStringName(ctxt, &ptr);
5594 if (name == NULL) {
5595 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5597 ctxt->sax->error(ctxt->userData,
5598 "xmlParseStringPEReference: no name\n");
5599 ctxt->wellFormed = 0;
5600 ctxt->disableSAX = 1;
5601 } else {
5602 cur = *ptr;
5603 if (cur == ';') {
5604 ptr++;
5605 cur = *ptr;
5606 if ((ctxt->sax != NULL) &&
5607 (ctxt->sax->getParameterEntity != NULL))
5608 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5609 name);
5610 if (entity == NULL) {
5611 /*
5612 * [ WFC: Entity Declared ]
5613 * In a document without any DTD, a document with only an
5614 * internal DTD subset which contains no parameter entity
5615 * references, or a document with "standalone='yes'", ...
5616 * ... The declaration of a parameter entity must precede
5617 * any reference to it...
5618 */
5619 if ((ctxt->standalone == 1) ||
5620 ((ctxt->hasExternalSubset == 0) &&
5621 (ctxt->hasPErefs == 0))) {
5622 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5624 ctxt->sax->error(ctxt->userData,
5625 "PEReference: %%%s; not found\n", name);
5626 ctxt->wellFormed = 0;
5627 ctxt->disableSAX = 1;
5628 } else {
5629 /*
5630 * [ VC: Entity Declared ]
5631 * In a document with an external subset or external
5632 * parameter entities with "standalone='no'", ...
5633 * ... The declaration of a parameter entity must
5634 * precede any reference to it...
5635 */
5636 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5637 ctxt->sax->warning(ctxt->userData,
5638 "PEReference: %%%s; not found\n", name);
5639 ctxt->valid = 0;
5640 }
5641 } else {
5642 /*
5643 * Internal checking in case the entity quest barfed
5644 */
5645 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5646 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5647 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5648 ctxt->sax->warning(ctxt->userData,
5649 "Internal: %%%s; is not a parameter entity\n", name);
5650 }
5651 }
5652 ctxt->hasPErefs = 1;
5653 } else {
5654 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5656 ctxt->sax->error(ctxt->userData,
5657 "xmlParseStringPEReference: expecting ';'\n");
5658 ctxt->wellFormed = 0;
5659 ctxt->disableSAX = 1;
5660 }
5661 xmlFree(name);
5662 }
5663 }
5664 *str = ptr;
5665 return(entity);
5666}
5667
5668/**
5669 * xmlParseDocTypeDecl:
5670 * @ctxt: an XML parser context
5671 *
5672 * parse a DOCTYPE declaration
5673 *
5674 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5675 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5676 *
5677 * [ VC: Root Element Type ]
5678 * The Name in the document type declaration must match the element
5679 * type of the root element.
5680 */
5681
5682void
5683xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5684 xmlChar *name = NULL;
5685 xmlChar *ExternalID = NULL;
5686 xmlChar *URI = NULL;
5687
5688 /*
5689 * We know that '<!DOCTYPE' has been detected.
5690 */
5691 SKIP(9);
5692
5693 SKIP_BLANKS;
5694
5695 /*
5696 * Parse the DOCTYPE name.
5697 */
5698 name = xmlParseName(ctxt);
5699 if (name == NULL) {
5700 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5701 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5702 ctxt->sax->error(ctxt->userData,
5703 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5704 ctxt->wellFormed = 0;
5705 ctxt->disableSAX = 1;
5706 }
5707 ctxt->intSubName = name;
5708
5709 SKIP_BLANKS;
5710
5711 /*
5712 * Check for SystemID and ExternalID
5713 */
5714 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5715
5716 if ((URI != NULL) || (ExternalID != NULL)) {
5717 ctxt->hasExternalSubset = 1;
5718 }
5719 ctxt->extSubURI = URI;
5720 ctxt->extSubSystem = ExternalID;
5721
5722 SKIP_BLANKS;
5723
5724 /*
5725 * Create and update the internal subset.
5726 */
5727 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5728 (!ctxt->disableSAX))
5729 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5730
5731 /*
5732 * Is there any internal subset declarations ?
5733 * they are handled separately in xmlParseInternalSubset()
5734 */
5735 if (RAW == '[')
5736 return;
5737
5738 /*
5739 * We should be at the end of the DOCTYPE declaration.
5740 */
5741 if (RAW != '>') {
5742 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5744 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5745 ctxt->wellFormed = 0;
5746 ctxt->disableSAX = 1;
5747 }
5748 NEXT;
5749}
5750
5751/**
5752 * xmlParseInternalsubset:
5753 * @ctxt: an XML parser context
5754 *
5755 * parse the internal subset declaration
5756 *
5757 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5758 */
5759
5760void
5761xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5762 /*
5763 * Is there any DTD definition ?
5764 */
5765 if (RAW == '[') {
5766 ctxt->instate = XML_PARSER_DTD;
5767 NEXT;
5768 /*
5769 * Parse the succession of Markup declarations and
5770 * PEReferences.
5771 * Subsequence (markupdecl | PEReference | S)*
5772 */
5773 while (RAW != ']') {
5774 const xmlChar *check = CUR_PTR;
5775 int cons = ctxt->input->consumed;
5776
5777 SKIP_BLANKS;
5778 xmlParseMarkupDecl(ctxt);
5779 xmlParsePEReference(ctxt);
5780
5781 /*
5782 * Pop-up of finished entities.
5783 */
5784 while ((RAW == 0) && (ctxt->inputNr > 1))
5785 xmlPopInput(ctxt);
5786
5787 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5788 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5790 ctxt->sax->error(ctxt->userData,
5791 "xmlParseInternalSubset: error detected in Markup declaration\n");
5792 ctxt->wellFormed = 0;
5793 ctxt->disableSAX = 1;
5794 break;
5795 }
5796 }
5797 if (RAW == ']') {
5798 NEXT;
5799 SKIP_BLANKS;
5800 }
5801 }
5802
5803 /*
5804 * We should be at the end of the DOCTYPE declaration.
5805 */
5806 if (RAW != '>') {
5807 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5809 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5810 ctxt->wellFormed = 0;
5811 ctxt->disableSAX = 1;
5812 }
5813 NEXT;
5814}
5815
5816/**
5817 * xmlParseAttribute:
5818 * @ctxt: an XML parser context
5819 * @value: a xmlChar ** used to store the value of the attribute
5820 *
5821 * parse an attribute
5822 *
5823 * [41] Attribute ::= Name Eq AttValue
5824 *
5825 * [ WFC: No External Entity References ]
5826 * Attribute values cannot contain direct or indirect entity references
5827 * to external entities.
5828 *
5829 * [ WFC: No < in Attribute Values ]
5830 * The replacement text of any entity referred to directly or indirectly in
5831 * an attribute value (other than "&lt;") must not contain a <.
5832 *
5833 * [ VC: Attribute Value Type ]
5834 * The attribute must have been declared; the value must be of the type
5835 * declared for it.
5836 *
5837 * [25] Eq ::= S? '=' S?
5838 *
5839 * With namespace:
5840 *
5841 * [NS 11] Attribute ::= QName Eq AttValue
5842 *
5843 * Also the case QName == xmlns:??? is handled independently as a namespace
5844 * definition.
5845 *
5846 * Returns the attribute name, and the value in *value.
5847 */
5848
5849xmlChar *
5850xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5851 xmlChar *name, *val;
5852
5853 *value = NULL;
5854 name = xmlParseName(ctxt);
5855 if (name == NULL) {
5856 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5858 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5859 ctxt->wellFormed = 0;
5860 ctxt->disableSAX = 1;
5861 return(NULL);
5862 }
5863
5864 /*
5865 * read the value
5866 */
5867 SKIP_BLANKS;
5868 if (RAW == '=') {
5869 NEXT;
5870 SKIP_BLANKS;
5871 val = xmlParseAttValue(ctxt);
5872 ctxt->instate = XML_PARSER_CONTENT;
5873 } else {
5874 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5876 ctxt->sax->error(ctxt->userData,
5877 "Specification mandate value for attribute %s\n", name);
5878 ctxt->wellFormed = 0;
5879 ctxt->disableSAX = 1;
5880 xmlFree(name);
5881 return(NULL);
5882 }
5883
5884 /*
5885 * Check that xml:lang conforms to the specification
5886 * No more registered as an error, just generate a warning now
5887 * since this was deprecated in XML second edition
5888 */
5889 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5890 if (!xmlCheckLanguageID(val)) {
5891 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5892 ctxt->sax->warning(ctxt->userData,
5893 "Malformed value for xml:lang : %s\n", val);
5894 }
5895 }
5896
5897 /*
5898 * Check that xml:space conforms to the specification
5899 */
5900 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5901 if (xmlStrEqual(val, BAD_CAST "default"))
5902 *(ctxt->space) = 0;
5903 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5904 *(ctxt->space) = 1;
5905 else {
5906 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5908 ctxt->sax->error(ctxt->userData,
5909"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5910 val);
5911 ctxt->wellFormed = 0;
5912 ctxt->disableSAX = 1;
5913 }
5914 }
5915
5916 *value = val;
5917 return(name);
5918}
5919
5920/**
5921 * xmlParseStartTag:
5922 * @ctxt: an XML parser context
5923 *
5924 * parse a start of tag either for rule element or
5925 * EmptyElement. In both case we don't parse the tag closing chars.
5926 *
5927 * [40] STag ::= '<' Name (S Attribute)* S? '>'
5928 *
5929 * [ WFC: Unique Att Spec ]
5930 * No attribute name may appear more than once in the same start-tag or
5931 * empty-element tag.
5932 *
5933 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5934 *
5935 * [ WFC: Unique Att Spec ]
5936 * No attribute name may appear more than once in the same start-tag or
5937 * empty-element tag.
5938 *
5939 * With namespace:
5940 *
5941 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5942 *
5943 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
5944 *
5945 * Returns the element name parsed
5946 */
5947
5948xmlChar *
5949xmlParseStartTag(xmlParserCtxtPtr ctxt) {
5950 xmlChar *name;
5951 xmlChar *attname;
5952 xmlChar *attvalue;
5953 const xmlChar **atts = NULL;
5954 int nbatts = 0;
5955 int maxatts = 0;
5956 int i;
5957
5958 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00005959 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00005960
5961 name = xmlParseName(ctxt);
5962 if (name == NULL) {
5963 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5965 ctxt->sax->error(ctxt->userData,
5966 "xmlParseStartTag: invalid element name\n");
5967 ctxt->wellFormed = 0;
5968 ctxt->disableSAX = 1;
5969 return(NULL);
5970 }
5971
5972 /*
5973 * Now parse the attributes, it ends up with the ending
5974 *
5975 * (S Attribute)* S?
5976 */
5977 SKIP_BLANKS;
5978 GROW;
5979
Daniel Veillard21a0f912001-02-25 19:54:14 +00005980 while ((RAW != '>') &&
5981 ((RAW != '/') || (NXT(1) != '>')) &&
5982 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005983 const xmlChar *q = CUR_PTR;
5984 int cons = ctxt->input->consumed;
5985
5986 attname = xmlParseAttribute(ctxt, &attvalue);
5987 if ((attname != NULL) && (attvalue != NULL)) {
5988 /*
5989 * [ WFC: Unique Att Spec ]
5990 * No attribute name may appear more than once in the same
5991 * start-tag or empty-element tag.
5992 */
5993 for (i = 0; i < nbatts;i += 2) {
5994 if (xmlStrEqual(atts[i], attname)) {
5995 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
5996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5997 ctxt->sax->error(ctxt->userData,
5998 "Attribute %s redefined\n",
5999 attname);
6000 ctxt->wellFormed = 0;
6001 ctxt->disableSAX = 1;
6002 xmlFree(attname);
6003 xmlFree(attvalue);
6004 goto failed;
6005 }
6006 }
6007
6008 /*
6009 * Add the pair to atts
6010 */
6011 if (atts == NULL) {
6012 maxatts = 10;
6013 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6014 if (atts == NULL) {
6015 xmlGenericError(xmlGenericErrorContext,
6016 "malloc of %ld byte failed\n",
6017 maxatts * (long)sizeof(xmlChar *));
6018 return(NULL);
6019 }
6020 } else if (nbatts + 4 > maxatts) {
6021 maxatts *= 2;
6022 atts = (const xmlChar **) xmlRealloc((void *) atts,
6023 maxatts * sizeof(xmlChar *));
6024 if (atts == NULL) {
6025 xmlGenericError(xmlGenericErrorContext,
6026 "realloc of %ld byte failed\n",
6027 maxatts * (long)sizeof(xmlChar *));
6028 return(NULL);
6029 }
6030 }
6031 atts[nbatts++] = attname;
6032 atts[nbatts++] = attvalue;
6033 atts[nbatts] = NULL;
6034 atts[nbatts + 1] = NULL;
6035 } else {
6036 if (attname != NULL)
6037 xmlFree(attname);
6038 if (attvalue != NULL)
6039 xmlFree(attvalue);
6040 }
6041
6042failed:
6043
6044 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6045 break;
6046 if (!IS_BLANK(RAW)) {
6047 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6049 ctxt->sax->error(ctxt->userData,
6050 "attributes construct error\n");
6051 ctxt->wellFormed = 0;
6052 ctxt->disableSAX = 1;
6053 }
6054 SKIP_BLANKS;
6055 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6056 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6058 ctxt->sax->error(ctxt->userData,
6059 "xmlParseStartTag: problem parsing attributes\n");
6060 ctxt->wellFormed = 0;
6061 ctxt->disableSAX = 1;
6062 break;
6063 }
6064 GROW;
6065 }
6066
6067 /*
6068 * SAX: Start of Element !
6069 */
6070 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6071 (!ctxt->disableSAX))
6072 ctxt->sax->startElement(ctxt->userData, name, atts);
6073
6074 if (atts != NULL) {
6075 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6076 xmlFree((void *) atts);
6077 }
6078 return(name);
6079}
6080
6081/**
6082 * xmlParseEndTag:
6083 * @ctxt: an XML parser context
6084 *
6085 * parse an end of tag
6086 *
6087 * [42] ETag ::= '</' Name S? '>'
6088 *
6089 * With namespace
6090 *
6091 * [NS 9] ETag ::= '</' QName S? '>'
6092 */
6093
6094void
6095xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6096 xmlChar *name;
6097 xmlChar *oldname;
6098
6099 GROW;
6100 if ((RAW != '<') || (NXT(1) != '/')) {
6101 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6103 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6104 ctxt->wellFormed = 0;
6105 ctxt->disableSAX = 1;
6106 return;
6107 }
6108 SKIP(2);
6109
6110 name = xmlParseName(ctxt);
6111
6112 /*
6113 * We should definitely be at the ending "S? '>'" part
6114 */
6115 GROW;
6116 SKIP_BLANKS;
6117 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6118 ctxt->errNo = XML_ERR_GT_REQUIRED;
6119 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6120 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6121 ctxt->wellFormed = 0;
6122 ctxt->disableSAX = 1;
6123 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006124 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006125
6126 /*
6127 * [ WFC: Element Type Match ]
6128 * The Name in an element's end-tag must match the element type in the
6129 * start-tag.
6130 *
6131 */
6132 if ((name == NULL) || (ctxt->name == NULL) ||
6133 (!xmlStrEqual(name, ctxt->name))) {
6134 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6136 if ((name != NULL) && (ctxt->name != NULL)) {
6137 ctxt->sax->error(ctxt->userData,
6138 "Opening and ending tag mismatch: %s and %s\n",
6139 ctxt->name, name);
6140 } else if (ctxt->name != NULL) {
6141 ctxt->sax->error(ctxt->userData,
6142 "Ending tag eror for: %s\n", ctxt->name);
6143 } else {
6144 ctxt->sax->error(ctxt->userData,
6145 "Ending tag error: internal error ???\n");
6146 }
6147
6148 }
6149 ctxt->wellFormed = 0;
6150 ctxt->disableSAX = 1;
6151 }
6152
6153 /*
6154 * SAX: End of Tag
6155 */
6156 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6157 (!ctxt->disableSAX))
6158 ctxt->sax->endElement(ctxt->userData, name);
6159
6160 if (name != NULL)
6161 xmlFree(name);
6162 oldname = namePop(ctxt);
6163 spacePop(ctxt);
6164 if (oldname != NULL) {
6165#ifdef DEBUG_STACK
6166 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6167#endif
6168 xmlFree(oldname);
6169 }
6170 return;
6171}
6172
6173/**
6174 * xmlParseCDSect:
6175 * @ctxt: an XML parser context
6176 *
6177 * Parse escaped pure raw content.
6178 *
6179 * [18] CDSect ::= CDStart CData CDEnd
6180 *
6181 * [19] CDStart ::= '<![CDATA['
6182 *
6183 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6184 *
6185 * [21] CDEnd ::= ']]>'
6186 */
6187void
6188xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6189 xmlChar *buf = NULL;
6190 int len = 0;
6191 int size = XML_PARSER_BUFFER_SIZE;
6192 int r, rl;
6193 int s, sl;
6194 int cur, l;
6195 int count = 0;
6196
6197 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6198 (NXT(2) == '[') && (NXT(3) == 'C') &&
6199 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6200 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6201 (NXT(8) == '[')) {
6202 SKIP(9);
6203 } else
6204 return;
6205
6206 ctxt->instate = XML_PARSER_CDATA_SECTION;
6207 r = CUR_CHAR(rl);
6208 if (!IS_CHAR(r)) {
6209 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6210 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6211 ctxt->sax->error(ctxt->userData,
6212 "CData section not finished\n");
6213 ctxt->wellFormed = 0;
6214 ctxt->disableSAX = 1;
6215 ctxt->instate = XML_PARSER_CONTENT;
6216 return;
6217 }
6218 NEXTL(rl);
6219 s = CUR_CHAR(sl);
6220 if (!IS_CHAR(s)) {
6221 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6223 ctxt->sax->error(ctxt->userData,
6224 "CData section not finished\n");
6225 ctxt->wellFormed = 0;
6226 ctxt->disableSAX = 1;
6227 ctxt->instate = XML_PARSER_CONTENT;
6228 return;
6229 }
6230 NEXTL(sl);
6231 cur = CUR_CHAR(l);
6232 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6233 if (buf == NULL) {
6234 xmlGenericError(xmlGenericErrorContext,
6235 "malloc of %d byte failed\n", size);
6236 return;
6237 }
6238 while (IS_CHAR(cur) &&
6239 ((r != ']') || (s != ']') || (cur != '>'))) {
6240 if (len + 5 >= size) {
6241 size *= 2;
6242 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6243 if (buf == NULL) {
6244 xmlGenericError(xmlGenericErrorContext,
6245 "realloc of %d byte failed\n", size);
6246 return;
6247 }
6248 }
6249 COPY_BUF(rl,buf,len,r);
6250 r = s;
6251 rl = sl;
6252 s = cur;
6253 sl = l;
6254 count++;
6255 if (count > 50) {
6256 GROW;
6257 count = 0;
6258 }
6259 NEXTL(l);
6260 cur = CUR_CHAR(l);
6261 }
6262 buf[len] = 0;
6263 ctxt->instate = XML_PARSER_CONTENT;
6264 if (cur != '>') {
6265 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6267 ctxt->sax->error(ctxt->userData,
6268 "CData section not finished\n%.50s\n", buf);
6269 ctxt->wellFormed = 0;
6270 ctxt->disableSAX = 1;
6271 xmlFree(buf);
6272 return;
6273 }
6274 NEXTL(l);
6275
6276 /*
6277 * Ok the buffer is to be consumed as cdata.
6278 */
6279 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6280 if (ctxt->sax->cdataBlock != NULL)
6281 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6282 }
6283 xmlFree(buf);
6284}
6285
6286/**
6287 * xmlParseContent:
6288 * @ctxt: an XML parser context
6289 *
6290 * Parse a content:
6291 *
6292 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6293 */
6294
6295void
6296xmlParseContent(xmlParserCtxtPtr ctxt) {
6297 GROW;
6298 while (((RAW != 0) || (ctxt->token != 0)) &&
6299 ((RAW != '<') || (NXT(1) != '/'))) {
6300 const xmlChar *test = CUR_PTR;
6301 int cons = ctxt->input->consumed;
6302 xmlChar tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006303 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006304
6305 /*
6306 * Handle possible processed charrefs.
6307 */
6308 if (ctxt->token != 0) {
6309 xmlParseCharData(ctxt, 0);
6310 }
6311 /*
6312 * First case : a Processing Instruction.
6313 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006314 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006315 xmlParsePI(ctxt);
6316 }
6317
6318 /*
6319 * Second case : a CDSection
6320 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006321 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006322 (NXT(2) == '[') && (NXT(3) == 'C') &&
6323 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6324 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6325 (NXT(8) == '[')) {
6326 xmlParseCDSect(ctxt);
6327 }
6328
6329 /*
6330 * Third case : a comment
6331 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006332 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006333 (NXT(2) == '-') && (NXT(3) == '-')) {
6334 xmlParseComment(ctxt);
6335 ctxt->instate = XML_PARSER_CONTENT;
6336 }
6337
6338 /*
6339 * Fourth case : a sub-element.
6340 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006341 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006342 xmlParseElement(ctxt);
6343 }
6344
6345 /*
6346 * Fifth case : a reference. If if has not been resolved,
6347 * parsing returns it's Name, create the node
6348 */
6349
Daniel Veillard21a0f912001-02-25 19:54:14 +00006350 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006351 xmlParseReference(ctxt);
6352 }
6353
6354 /*
6355 * Last case, text. Note that References are handled directly.
6356 */
6357 else {
6358 xmlParseCharData(ctxt, 0);
6359 }
6360
6361 GROW;
6362 /*
6363 * Pop-up of finished entities.
6364 */
6365 while ((RAW == 0) && (ctxt->inputNr > 1))
6366 xmlPopInput(ctxt);
6367 SHRINK;
6368
6369 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6370 (tok == ctxt->token)) {
6371 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6373 ctxt->sax->error(ctxt->userData,
6374 "detected an error in element content\n");
6375 ctxt->wellFormed = 0;
6376 ctxt->disableSAX = 1;
6377 ctxt->instate = XML_PARSER_EOF;
6378 break;
6379 }
6380 }
6381}
6382
6383/**
6384 * xmlParseElement:
6385 * @ctxt: an XML parser context
6386 *
6387 * parse an XML element, this is highly recursive
6388 *
6389 * [39] element ::= EmptyElemTag | STag content ETag
6390 *
6391 * [ WFC: Element Type Match ]
6392 * The Name in an element's end-tag must match the element type in the
6393 * start-tag.
6394 *
6395 * [ VC: Element Valid ]
6396 * An element is valid if there is a declaration matching elementdecl
6397 * where the Name matches the element type and one of the following holds:
6398 * - The declaration matches EMPTY and the element has no content.
6399 * - The declaration matches children and the sequence of child elements
6400 * belongs to the language generated by the regular expression in the
6401 * content model, with optional white space (characters matching the
6402 * nonterminal S) between each pair of child elements.
6403 * - The declaration matches Mixed and the content consists of character
6404 * data and child elements whose types match names in the content model.
6405 * - The declaration matches ANY, and the types of any child elements have
6406 * been declared.
6407 */
6408
6409void
6410xmlParseElement(xmlParserCtxtPtr ctxt) {
6411 const xmlChar *openTag = CUR_PTR;
6412 xmlChar *name;
6413 xmlChar *oldname;
6414 xmlParserNodeInfo node_info;
6415 xmlNodePtr ret;
6416
6417 /* Capture start position */
6418 if (ctxt->record_info) {
6419 node_info.begin_pos = ctxt->input->consumed +
6420 (CUR_PTR - ctxt->input->base);
6421 node_info.begin_line = ctxt->input->line;
6422 }
6423
6424 if (ctxt->spaceNr == 0)
6425 spacePush(ctxt, -1);
6426 else
6427 spacePush(ctxt, *ctxt->space);
6428
6429 name = xmlParseStartTag(ctxt);
6430 if (name == NULL) {
6431 spacePop(ctxt);
6432 return;
6433 }
6434 namePush(ctxt, name);
6435 ret = ctxt->node;
6436
6437 /*
6438 * [ VC: Root Element Type ]
6439 * The Name in the document type declaration must match the element
6440 * type of the root element.
6441 */
6442 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6443 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6444 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6445
6446 /*
6447 * Check for an Empty Element.
6448 */
6449 if ((RAW == '/') && (NXT(1) == '>')) {
6450 SKIP(2);
6451 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6452 (!ctxt->disableSAX))
6453 ctxt->sax->endElement(ctxt->userData, name);
6454 oldname = namePop(ctxt);
6455 spacePop(ctxt);
6456 if (oldname != NULL) {
6457#ifdef DEBUG_STACK
6458 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6459#endif
6460 xmlFree(oldname);
6461 }
6462 if ( ret != NULL && ctxt->record_info ) {
6463 node_info.end_pos = ctxt->input->consumed +
6464 (CUR_PTR - ctxt->input->base);
6465 node_info.end_line = ctxt->input->line;
6466 node_info.node = ret;
6467 xmlParserAddNodeInfo(ctxt, &node_info);
6468 }
6469 return;
6470 }
6471 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006472 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006473 } else {
6474 ctxt->errNo = XML_ERR_GT_REQUIRED;
6475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6476 ctxt->sax->error(ctxt->userData,
6477 "Couldn't find end of Start Tag\n%.30s\n",
6478 openTag);
6479 ctxt->wellFormed = 0;
6480 ctxt->disableSAX = 1;
6481
6482 /*
6483 * end of parsing of this node.
6484 */
6485 nodePop(ctxt);
6486 oldname = namePop(ctxt);
6487 spacePop(ctxt);
6488 if (oldname != NULL) {
6489#ifdef DEBUG_STACK
6490 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6491#endif
6492 xmlFree(oldname);
6493 }
6494
6495 /*
6496 * Capture end position and add node
6497 */
6498 if ( ret != NULL && ctxt->record_info ) {
6499 node_info.end_pos = ctxt->input->consumed +
6500 (CUR_PTR - ctxt->input->base);
6501 node_info.end_line = ctxt->input->line;
6502 node_info.node = ret;
6503 xmlParserAddNodeInfo(ctxt, &node_info);
6504 }
6505 return;
6506 }
6507
6508 /*
6509 * Parse the content of the element:
6510 */
6511 xmlParseContent(ctxt);
6512 if (!IS_CHAR(RAW)) {
6513 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6515 ctxt->sax->error(ctxt->userData,
6516 "Premature end of data in tag %.30s\n", openTag);
6517 ctxt->wellFormed = 0;
6518 ctxt->disableSAX = 1;
6519
6520 /*
6521 * end of parsing of this node.
6522 */
6523 nodePop(ctxt);
6524 oldname = namePop(ctxt);
6525 spacePop(ctxt);
6526 if (oldname != NULL) {
6527#ifdef DEBUG_STACK
6528 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6529#endif
6530 xmlFree(oldname);
6531 }
6532 return;
6533 }
6534
6535 /*
6536 * parse the end of tag: '</' should be here.
6537 */
6538 xmlParseEndTag(ctxt);
6539
6540 /*
6541 * Capture end position and add node
6542 */
6543 if ( ret != NULL && ctxt->record_info ) {
6544 node_info.end_pos = ctxt->input->consumed +
6545 (CUR_PTR - ctxt->input->base);
6546 node_info.end_line = ctxt->input->line;
6547 node_info.node = ret;
6548 xmlParserAddNodeInfo(ctxt, &node_info);
6549 }
6550}
6551
6552/**
6553 * xmlParseVersionNum:
6554 * @ctxt: an XML parser context
6555 *
6556 * parse the XML version value.
6557 *
6558 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6559 *
6560 * Returns the string giving the XML version number, or NULL
6561 */
6562xmlChar *
6563xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6564 xmlChar *buf = NULL;
6565 int len = 0;
6566 int size = 10;
6567 xmlChar cur;
6568
6569 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6570 if (buf == NULL) {
6571 xmlGenericError(xmlGenericErrorContext,
6572 "malloc of %d byte failed\n", size);
6573 return(NULL);
6574 }
6575 cur = CUR;
6576 while (((cur >= 'a') && (cur <= 'z')) ||
6577 ((cur >= 'A') && (cur <= 'Z')) ||
6578 ((cur >= '0') && (cur <= '9')) ||
6579 (cur == '_') || (cur == '.') ||
6580 (cur == ':') || (cur == '-')) {
6581 if (len + 1 >= size) {
6582 size *= 2;
6583 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6584 if (buf == NULL) {
6585 xmlGenericError(xmlGenericErrorContext,
6586 "realloc of %d byte failed\n", size);
6587 return(NULL);
6588 }
6589 }
6590 buf[len++] = cur;
6591 NEXT;
6592 cur=CUR;
6593 }
6594 buf[len] = 0;
6595 return(buf);
6596}
6597
6598/**
6599 * xmlParseVersionInfo:
6600 * @ctxt: an XML parser context
6601 *
6602 * parse the XML version.
6603 *
6604 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6605 *
6606 * [25] Eq ::= S? '=' S?
6607 *
6608 * Returns the version string, e.g. "1.0"
6609 */
6610
6611xmlChar *
6612xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6613 xmlChar *version = NULL;
6614 const xmlChar *q;
6615
6616 if ((RAW == 'v') && (NXT(1) == 'e') &&
6617 (NXT(2) == 'r') && (NXT(3) == 's') &&
6618 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6619 (NXT(6) == 'n')) {
6620 SKIP(7);
6621 SKIP_BLANKS;
6622 if (RAW != '=') {
6623 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6625 ctxt->sax->error(ctxt->userData,
6626 "xmlParseVersionInfo : expected '='\n");
6627 ctxt->wellFormed = 0;
6628 ctxt->disableSAX = 1;
6629 return(NULL);
6630 }
6631 NEXT;
6632 SKIP_BLANKS;
6633 if (RAW == '"') {
6634 NEXT;
6635 q = CUR_PTR;
6636 version = xmlParseVersionNum(ctxt);
6637 if (RAW != '"') {
6638 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6640 ctxt->sax->error(ctxt->userData,
6641 "String not closed\n%.50s\n", q);
6642 ctxt->wellFormed = 0;
6643 ctxt->disableSAX = 1;
6644 } else
6645 NEXT;
6646 } else if (RAW == '\''){
6647 NEXT;
6648 q = CUR_PTR;
6649 version = xmlParseVersionNum(ctxt);
6650 if (RAW != '\'') {
6651 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6653 ctxt->sax->error(ctxt->userData,
6654 "String not closed\n%.50s\n", q);
6655 ctxt->wellFormed = 0;
6656 ctxt->disableSAX = 1;
6657 } else
6658 NEXT;
6659 } else {
6660 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6662 ctxt->sax->error(ctxt->userData,
6663 "xmlParseVersionInfo : expected ' or \"\n");
6664 ctxt->wellFormed = 0;
6665 ctxt->disableSAX = 1;
6666 }
6667 }
6668 return(version);
6669}
6670
6671/**
6672 * xmlParseEncName:
6673 * @ctxt: an XML parser context
6674 *
6675 * parse the XML encoding name
6676 *
6677 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6678 *
6679 * Returns the encoding name value or NULL
6680 */
6681xmlChar *
6682xmlParseEncName(xmlParserCtxtPtr ctxt) {
6683 xmlChar *buf = NULL;
6684 int len = 0;
6685 int size = 10;
6686 xmlChar cur;
6687
6688 cur = CUR;
6689 if (((cur >= 'a') && (cur <= 'z')) ||
6690 ((cur >= 'A') && (cur <= 'Z'))) {
6691 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6692 if (buf == NULL) {
6693 xmlGenericError(xmlGenericErrorContext,
6694 "malloc of %d byte failed\n", size);
6695 return(NULL);
6696 }
6697
6698 buf[len++] = cur;
6699 NEXT;
6700 cur = CUR;
6701 while (((cur >= 'a') && (cur <= 'z')) ||
6702 ((cur >= 'A') && (cur <= 'Z')) ||
6703 ((cur >= '0') && (cur <= '9')) ||
6704 (cur == '.') || (cur == '_') ||
6705 (cur == '-')) {
6706 if (len + 1 >= size) {
6707 size *= 2;
6708 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6709 if (buf == NULL) {
6710 xmlGenericError(xmlGenericErrorContext,
6711 "realloc of %d byte failed\n", size);
6712 return(NULL);
6713 }
6714 }
6715 buf[len++] = cur;
6716 NEXT;
6717 cur = CUR;
6718 if (cur == 0) {
6719 SHRINK;
6720 GROW;
6721 cur = CUR;
6722 }
6723 }
6724 buf[len] = 0;
6725 } else {
6726 ctxt->errNo = XML_ERR_ENCODING_NAME;
6727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6728 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6729 ctxt->wellFormed = 0;
6730 ctxt->disableSAX = 1;
6731 }
6732 return(buf);
6733}
6734
6735/**
6736 * xmlParseEncodingDecl:
6737 * @ctxt: an XML parser context
6738 *
6739 * parse the XML encoding declaration
6740 *
6741 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6742 *
6743 * this setups the conversion filters.
6744 *
6745 * Returns the encoding value or NULL
6746 */
6747
6748xmlChar *
6749xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6750 xmlChar *encoding = NULL;
6751 const xmlChar *q;
6752
6753 SKIP_BLANKS;
6754 if ((RAW == 'e') && (NXT(1) == 'n') &&
6755 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6756 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6757 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6758 SKIP(8);
6759 SKIP_BLANKS;
6760 if (RAW != '=') {
6761 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6763 ctxt->sax->error(ctxt->userData,
6764 "xmlParseEncodingDecl : expected '='\n");
6765 ctxt->wellFormed = 0;
6766 ctxt->disableSAX = 1;
6767 return(NULL);
6768 }
6769 NEXT;
6770 SKIP_BLANKS;
6771 if (RAW == '"') {
6772 NEXT;
6773 q = CUR_PTR;
6774 encoding = xmlParseEncName(ctxt);
6775 if (RAW != '"') {
6776 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6778 ctxt->sax->error(ctxt->userData,
6779 "String not closed\n%.50s\n", q);
6780 ctxt->wellFormed = 0;
6781 ctxt->disableSAX = 1;
6782 } else
6783 NEXT;
6784 } else if (RAW == '\''){
6785 NEXT;
6786 q = CUR_PTR;
6787 encoding = xmlParseEncName(ctxt);
6788 if (RAW != '\'') {
6789 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6791 ctxt->sax->error(ctxt->userData,
6792 "String not closed\n%.50s\n", q);
6793 ctxt->wellFormed = 0;
6794 ctxt->disableSAX = 1;
6795 } else
6796 NEXT;
6797 } else if (RAW == '"'){
6798 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6800 ctxt->sax->error(ctxt->userData,
6801 "xmlParseEncodingDecl : expected ' or \"\n");
6802 ctxt->wellFormed = 0;
6803 ctxt->disableSAX = 1;
6804 }
6805 if (encoding != NULL) {
6806 xmlCharEncoding enc;
6807 xmlCharEncodingHandlerPtr handler;
6808
6809 if (ctxt->input->encoding != NULL)
6810 xmlFree((xmlChar *) ctxt->input->encoding);
6811 ctxt->input->encoding = encoding;
6812
6813 enc = xmlParseCharEncoding((const char *) encoding);
6814 /*
6815 * registered set of known encodings
6816 */
6817 if (enc != XML_CHAR_ENCODING_ERROR) {
6818 xmlSwitchEncoding(ctxt, enc);
6819 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6820 xmlFree(encoding);
6821 return(NULL);
6822 }
6823 } else {
6824 /*
6825 * fallback for unknown encodings
6826 */
6827 handler = xmlFindCharEncodingHandler((const char *) encoding);
6828 if (handler != NULL) {
6829 xmlSwitchToEncoding(ctxt, handler);
6830 } else {
6831 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6832 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6833 ctxt->sax->error(ctxt->userData,
6834 "Unsupported encoding %s\n", encoding);
6835 return(NULL);
6836 }
6837 }
6838 }
6839 }
6840 return(encoding);
6841}
6842
6843/**
6844 * xmlParseSDDecl:
6845 * @ctxt: an XML parser context
6846 *
6847 * parse the XML standalone declaration
6848 *
6849 * [32] SDDecl ::= S 'standalone' Eq
6850 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6851 *
6852 * [ VC: Standalone Document Declaration ]
6853 * TODO The standalone document declaration must have the value "no"
6854 * if any external markup declarations contain declarations of:
6855 * - attributes with default values, if elements to which these
6856 * attributes apply appear in the document without specifications
6857 * of values for these attributes, or
6858 * - entities (other than amp, lt, gt, apos, quot), if references
6859 * to those entities appear in the document, or
6860 * - attributes with values subject to normalization, where the
6861 * attribute appears in the document with a value which will change
6862 * as a result of normalization, or
6863 * - element types with element content, if white space occurs directly
6864 * within any instance of those types.
6865 *
6866 * Returns 1 if standalone, 0 otherwise
6867 */
6868
6869int
6870xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6871 int standalone = -1;
6872
6873 SKIP_BLANKS;
6874 if ((RAW == 's') && (NXT(1) == 't') &&
6875 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6876 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6877 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6878 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6879 SKIP(10);
6880 SKIP_BLANKS;
6881 if (RAW != '=') {
6882 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6884 ctxt->sax->error(ctxt->userData,
6885 "XML standalone declaration : expected '='\n");
6886 ctxt->wellFormed = 0;
6887 ctxt->disableSAX = 1;
6888 return(standalone);
6889 }
6890 NEXT;
6891 SKIP_BLANKS;
6892 if (RAW == '\''){
6893 NEXT;
6894 if ((RAW == 'n') && (NXT(1) == 'o')) {
6895 standalone = 0;
6896 SKIP(2);
6897 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6898 (NXT(2) == 's')) {
6899 standalone = 1;
6900 SKIP(3);
6901 } else {
6902 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6904 ctxt->sax->error(ctxt->userData,
6905 "standalone accepts only 'yes' or 'no'\n");
6906 ctxt->wellFormed = 0;
6907 ctxt->disableSAX = 1;
6908 }
6909 if (RAW != '\'') {
6910 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6912 ctxt->sax->error(ctxt->userData, "String not closed\n");
6913 ctxt->wellFormed = 0;
6914 ctxt->disableSAX = 1;
6915 } else
6916 NEXT;
6917 } else if (RAW == '"'){
6918 NEXT;
6919 if ((RAW == 'n') && (NXT(1) == 'o')) {
6920 standalone = 0;
6921 SKIP(2);
6922 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6923 (NXT(2) == 's')) {
6924 standalone = 1;
6925 SKIP(3);
6926 } else {
6927 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6929 ctxt->sax->error(ctxt->userData,
6930 "standalone accepts only 'yes' or 'no'\n");
6931 ctxt->wellFormed = 0;
6932 ctxt->disableSAX = 1;
6933 }
6934 if (RAW != '"') {
6935 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6937 ctxt->sax->error(ctxt->userData, "String not closed\n");
6938 ctxt->wellFormed = 0;
6939 ctxt->disableSAX = 1;
6940 } else
6941 NEXT;
6942 } else {
6943 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6945 ctxt->sax->error(ctxt->userData,
6946 "Standalone value not found\n");
6947 ctxt->wellFormed = 0;
6948 ctxt->disableSAX = 1;
6949 }
6950 }
6951 return(standalone);
6952}
6953
6954/**
6955 * xmlParseXMLDecl:
6956 * @ctxt: an XML parser context
6957 *
6958 * parse an XML declaration header
6959 *
6960 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
6961 */
6962
6963void
6964xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
6965 xmlChar *version;
6966
6967 /*
6968 * We know that '<?xml' is here.
6969 */
6970 SKIP(5);
6971
6972 if (!IS_BLANK(RAW)) {
6973 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6975 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
6976 ctxt->wellFormed = 0;
6977 ctxt->disableSAX = 1;
6978 }
6979 SKIP_BLANKS;
6980
6981 /*
6982 * We should have the VersionInfo here.
6983 */
6984 version = xmlParseVersionInfo(ctxt);
6985 if (version == NULL)
6986 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6987 ctxt->version = xmlStrdup(version);
6988 xmlFree(version);
6989
6990 /*
6991 * We may have the encoding declaration
6992 */
6993 if (!IS_BLANK(RAW)) {
6994 if ((RAW == '?') && (NXT(1) == '>')) {
6995 SKIP(2);
6996 return;
6997 }
6998 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7000 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7001 ctxt->wellFormed = 0;
7002 ctxt->disableSAX = 1;
7003 }
7004 xmlParseEncodingDecl(ctxt);
7005 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7006 /*
7007 * The XML REC instructs us to stop parsing right here
7008 */
7009 return;
7010 }
7011
7012 /*
7013 * We may have the standalone status.
7014 */
7015 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7016 if ((RAW == '?') && (NXT(1) == '>')) {
7017 SKIP(2);
7018 return;
7019 }
7020 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7022 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7023 ctxt->wellFormed = 0;
7024 ctxt->disableSAX = 1;
7025 }
7026 SKIP_BLANKS;
7027 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7028
7029 SKIP_BLANKS;
7030 if ((RAW == '?') && (NXT(1) == '>')) {
7031 SKIP(2);
7032 } else if (RAW == '>') {
7033 /* Deprecated old WD ... */
7034 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7036 ctxt->sax->error(ctxt->userData,
7037 "XML declaration must end-up with '?>'\n");
7038 ctxt->wellFormed = 0;
7039 ctxt->disableSAX = 1;
7040 NEXT;
7041 } else {
7042 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7044 ctxt->sax->error(ctxt->userData,
7045 "parsing XML declaration: '?>' expected\n");
7046 ctxt->wellFormed = 0;
7047 ctxt->disableSAX = 1;
7048 MOVETO_ENDTAG(CUR_PTR);
7049 NEXT;
7050 }
7051}
7052
7053/**
7054 * xmlParseMisc:
7055 * @ctxt: an XML parser context
7056 *
7057 * parse an XML Misc* optionnal field.
7058 *
7059 * [27] Misc ::= Comment | PI | S
7060 */
7061
7062void
7063xmlParseMisc(xmlParserCtxtPtr ctxt) {
7064 while (((RAW == '<') && (NXT(1) == '?')) ||
7065 ((RAW == '<') && (NXT(1) == '!') &&
7066 (NXT(2) == '-') && (NXT(3) == '-')) ||
7067 IS_BLANK(CUR)) {
7068 if ((RAW == '<') && (NXT(1) == '?')) {
7069 xmlParsePI(ctxt);
7070 } else if (IS_BLANK(CUR)) {
7071 NEXT;
7072 } else
7073 xmlParseComment(ctxt);
7074 }
7075}
7076
7077/**
7078 * xmlParseDocument:
7079 * @ctxt: an XML parser context
7080 *
7081 * parse an XML document (and build a tree if using the standard SAX
7082 * interface).
7083 *
7084 * [1] document ::= prolog element Misc*
7085 *
7086 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7087 *
7088 * Returns 0, -1 in case of error. the parser context is augmented
7089 * as a result of the parsing.
7090 */
7091
7092int
7093xmlParseDocument(xmlParserCtxtPtr ctxt) {
7094 xmlChar start[4];
7095 xmlCharEncoding enc;
7096
7097 xmlInitParser();
7098
7099 GROW;
7100
7101 /*
7102 * SAX: beginning of the document processing.
7103 */
7104 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7105 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7106
7107 /*
7108 * Get the 4 first bytes and decode the charset
7109 * if enc != XML_CHAR_ENCODING_NONE
7110 * plug some encoding conversion routines.
7111 */
7112 start[0] = RAW;
7113 start[1] = NXT(1);
7114 start[2] = NXT(2);
7115 start[3] = NXT(3);
7116 enc = xmlDetectCharEncoding(start, 4);
7117 if (enc != XML_CHAR_ENCODING_NONE) {
7118 xmlSwitchEncoding(ctxt, enc);
7119 }
7120
7121
7122 if (CUR == 0) {
7123 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7125 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7126 ctxt->wellFormed = 0;
7127 ctxt->disableSAX = 1;
7128 }
7129
7130 /*
7131 * Check for the XMLDecl in the Prolog.
7132 */
7133 GROW;
7134 if ((RAW == '<') && (NXT(1) == '?') &&
7135 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7136 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7137
7138 /*
7139 * Note that we will switch encoding on the fly.
7140 */
7141 xmlParseXMLDecl(ctxt);
7142 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7143 /*
7144 * The XML REC instructs us to stop parsing right here
7145 */
7146 return(-1);
7147 }
7148 ctxt->standalone = ctxt->input->standalone;
7149 SKIP_BLANKS;
7150 } else {
7151 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7152 }
7153 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7154 ctxt->sax->startDocument(ctxt->userData);
7155
7156 /*
7157 * The Misc part of the Prolog
7158 */
7159 GROW;
7160 xmlParseMisc(ctxt);
7161
7162 /*
7163 * Then possibly doc type declaration(s) and more Misc
7164 * (doctypedecl Misc*)?
7165 */
7166 GROW;
7167 if ((RAW == '<') && (NXT(1) == '!') &&
7168 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7169 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7170 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7171 (NXT(8) == 'E')) {
7172
7173 ctxt->inSubset = 1;
7174 xmlParseDocTypeDecl(ctxt);
7175 if (RAW == '[') {
7176 ctxt->instate = XML_PARSER_DTD;
7177 xmlParseInternalSubset(ctxt);
7178 }
7179
7180 /*
7181 * Create and update the external subset.
7182 */
7183 ctxt->inSubset = 2;
7184 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7185 (!ctxt->disableSAX))
7186 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7187 ctxt->extSubSystem, ctxt->extSubURI);
7188 ctxt->inSubset = 0;
7189
7190
7191 ctxt->instate = XML_PARSER_PROLOG;
7192 xmlParseMisc(ctxt);
7193 }
7194
7195 /*
7196 * Time to start parsing the tree itself
7197 */
7198 GROW;
7199 if (RAW != '<') {
7200 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7202 ctxt->sax->error(ctxt->userData,
7203 "Start tag expected, '<' not found\n");
7204 ctxt->wellFormed = 0;
7205 ctxt->disableSAX = 1;
7206 ctxt->instate = XML_PARSER_EOF;
7207 } else {
7208 ctxt->instate = XML_PARSER_CONTENT;
7209 xmlParseElement(ctxt);
7210 ctxt->instate = XML_PARSER_EPILOG;
7211
7212
7213 /*
7214 * The Misc part at the end
7215 */
7216 xmlParseMisc(ctxt);
7217
7218 if (RAW != 0) {
7219 ctxt->errNo = XML_ERR_DOCUMENT_END;
7220 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7221 ctxt->sax->error(ctxt->userData,
7222 "Extra content at the end of the document\n");
7223 ctxt->wellFormed = 0;
7224 ctxt->disableSAX = 1;
7225 }
7226 ctxt->instate = XML_PARSER_EOF;
7227 }
7228
7229 /*
7230 * SAX: end of the document processing.
7231 */
7232 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7233 (!ctxt->disableSAX))
7234 ctxt->sax->endDocument(ctxt->userData);
7235
7236 if (! ctxt->wellFormed) return(-1);
7237 return(0);
7238}
7239
7240/**
7241 * xmlParseExtParsedEnt:
7242 * @ctxt: an XML parser context
7243 *
7244 * parse a genreral parsed entity
7245 * An external general parsed entity is well-formed if it matches the
7246 * production labeled extParsedEnt.
7247 *
7248 * [78] extParsedEnt ::= TextDecl? content
7249 *
7250 * Returns 0, -1 in case of error. the parser context is augmented
7251 * as a result of the parsing.
7252 */
7253
7254int
7255xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7256 xmlChar start[4];
7257 xmlCharEncoding enc;
7258
7259 xmlDefaultSAXHandlerInit();
7260
7261 GROW;
7262
7263 /*
7264 * SAX: beginning of the document processing.
7265 */
7266 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7267 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7268
7269 /*
7270 * Get the 4 first bytes and decode the charset
7271 * if enc != XML_CHAR_ENCODING_NONE
7272 * plug some encoding conversion routines.
7273 */
7274 start[0] = RAW;
7275 start[1] = NXT(1);
7276 start[2] = NXT(2);
7277 start[3] = NXT(3);
7278 enc = xmlDetectCharEncoding(start, 4);
7279 if (enc != XML_CHAR_ENCODING_NONE) {
7280 xmlSwitchEncoding(ctxt, enc);
7281 }
7282
7283
7284 if (CUR == 0) {
7285 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7287 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7288 ctxt->wellFormed = 0;
7289 ctxt->disableSAX = 1;
7290 }
7291
7292 /*
7293 * Check for the XMLDecl in the Prolog.
7294 */
7295 GROW;
7296 if ((RAW == '<') && (NXT(1) == '?') &&
7297 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7298 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7299
7300 /*
7301 * Note that we will switch encoding on the fly.
7302 */
7303 xmlParseXMLDecl(ctxt);
7304 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7305 /*
7306 * The XML REC instructs us to stop parsing right here
7307 */
7308 return(-1);
7309 }
7310 SKIP_BLANKS;
7311 } else {
7312 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7313 }
7314 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7315 ctxt->sax->startDocument(ctxt->userData);
7316
7317 /*
7318 * Doing validity checking on chunk doesn't make sense
7319 */
7320 ctxt->instate = XML_PARSER_CONTENT;
7321 ctxt->validate = 0;
7322 ctxt->loadsubset = 0;
7323 ctxt->depth = 0;
7324
7325 xmlParseContent(ctxt);
7326
7327 if ((RAW == '<') && (NXT(1) == '/')) {
7328 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7330 ctxt->sax->error(ctxt->userData,
7331 "chunk is not well balanced\n");
7332 ctxt->wellFormed = 0;
7333 ctxt->disableSAX = 1;
7334 } else if (RAW != 0) {
7335 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7337 ctxt->sax->error(ctxt->userData,
7338 "extra content at the end of well balanced chunk\n");
7339 ctxt->wellFormed = 0;
7340 ctxt->disableSAX = 1;
7341 }
7342
7343 /*
7344 * SAX: end of the document processing.
7345 */
7346 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7347 (!ctxt->disableSAX))
7348 ctxt->sax->endDocument(ctxt->userData);
7349
7350 if (! ctxt->wellFormed) return(-1);
7351 return(0);
7352}
7353
7354/************************************************************************
7355 * *
7356 * Progressive parsing interfaces *
7357 * *
7358 ************************************************************************/
7359
7360/**
7361 * xmlParseLookupSequence:
7362 * @ctxt: an XML parser context
7363 * @first: the first char to lookup
7364 * @next: the next char to lookup or zero
7365 * @third: the next char to lookup or zero
7366 *
7367 * Try to find if a sequence (first, next, third) or just (first next) or
7368 * (first) is available in the input stream.
7369 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7370 * to avoid rescanning sequences of bytes, it DOES change the state of the
7371 * parser, do not use liberally.
7372 *
7373 * Returns the index to the current parsing point if the full sequence
7374 * is available, -1 otherwise.
7375 */
7376int
7377xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7378 xmlChar next, xmlChar third) {
7379 int base, len;
7380 xmlParserInputPtr in;
7381 const xmlChar *buf;
7382
7383 in = ctxt->input;
7384 if (in == NULL) return(-1);
7385 base = in->cur - in->base;
7386 if (base < 0) return(-1);
7387 if (ctxt->checkIndex > base)
7388 base = ctxt->checkIndex;
7389 if (in->buf == NULL) {
7390 buf = in->base;
7391 len = in->length;
7392 } else {
7393 buf = in->buf->buffer->content;
7394 len = in->buf->buffer->use;
7395 }
7396 /* take into account the sequence length */
7397 if (third) len -= 2;
7398 else if (next) len --;
7399 for (;base < len;base++) {
7400 if (buf[base] == first) {
7401 if (third != 0) {
7402 if ((buf[base + 1] != next) ||
7403 (buf[base + 2] != third)) continue;
7404 } else if (next != 0) {
7405 if (buf[base + 1] != next) continue;
7406 }
7407 ctxt->checkIndex = 0;
7408#ifdef DEBUG_PUSH
7409 if (next == 0)
7410 xmlGenericError(xmlGenericErrorContext,
7411 "PP: lookup '%c' found at %d\n",
7412 first, base);
7413 else if (third == 0)
7414 xmlGenericError(xmlGenericErrorContext,
7415 "PP: lookup '%c%c' found at %d\n",
7416 first, next, base);
7417 else
7418 xmlGenericError(xmlGenericErrorContext,
7419 "PP: lookup '%c%c%c' found at %d\n",
7420 first, next, third, base);
7421#endif
7422 return(base - (in->cur - in->base));
7423 }
7424 }
7425 ctxt->checkIndex = base;
7426#ifdef DEBUG_PUSH
7427 if (next == 0)
7428 xmlGenericError(xmlGenericErrorContext,
7429 "PP: lookup '%c' failed\n", first);
7430 else if (third == 0)
7431 xmlGenericError(xmlGenericErrorContext,
7432 "PP: lookup '%c%c' failed\n", first, next);
7433 else
7434 xmlGenericError(xmlGenericErrorContext,
7435 "PP: lookup '%c%c%c' failed\n", first, next, third);
7436#endif
7437 return(-1);
7438}
7439
7440/**
7441 * xmlParseTryOrFinish:
7442 * @ctxt: an XML parser context
7443 * @terminate: last chunk indicator
7444 *
7445 * Try to progress on parsing
7446 *
7447 * Returns zero if no parsing was possible
7448 */
7449int
7450xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7451 int ret = 0;
7452 int avail;
7453 xmlChar cur, next;
7454
7455#ifdef DEBUG_PUSH
7456 switch (ctxt->instate) {
7457 case XML_PARSER_EOF:
7458 xmlGenericError(xmlGenericErrorContext,
7459 "PP: try EOF\n"); break;
7460 case XML_PARSER_START:
7461 xmlGenericError(xmlGenericErrorContext,
7462 "PP: try START\n"); break;
7463 case XML_PARSER_MISC:
7464 xmlGenericError(xmlGenericErrorContext,
7465 "PP: try MISC\n");break;
7466 case XML_PARSER_COMMENT:
7467 xmlGenericError(xmlGenericErrorContext,
7468 "PP: try COMMENT\n");break;
7469 case XML_PARSER_PROLOG:
7470 xmlGenericError(xmlGenericErrorContext,
7471 "PP: try PROLOG\n");break;
7472 case XML_PARSER_START_TAG:
7473 xmlGenericError(xmlGenericErrorContext,
7474 "PP: try START_TAG\n");break;
7475 case XML_PARSER_CONTENT:
7476 xmlGenericError(xmlGenericErrorContext,
7477 "PP: try CONTENT\n");break;
7478 case XML_PARSER_CDATA_SECTION:
7479 xmlGenericError(xmlGenericErrorContext,
7480 "PP: try CDATA_SECTION\n");break;
7481 case XML_PARSER_END_TAG:
7482 xmlGenericError(xmlGenericErrorContext,
7483 "PP: try END_TAG\n");break;
7484 case XML_PARSER_ENTITY_DECL:
7485 xmlGenericError(xmlGenericErrorContext,
7486 "PP: try ENTITY_DECL\n");break;
7487 case XML_PARSER_ENTITY_VALUE:
7488 xmlGenericError(xmlGenericErrorContext,
7489 "PP: try ENTITY_VALUE\n");break;
7490 case XML_PARSER_ATTRIBUTE_VALUE:
7491 xmlGenericError(xmlGenericErrorContext,
7492 "PP: try ATTRIBUTE_VALUE\n");break;
7493 case XML_PARSER_DTD:
7494 xmlGenericError(xmlGenericErrorContext,
7495 "PP: try DTD\n");break;
7496 case XML_PARSER_EPILOG:
7497 xmlGenericError(xmlGenericErrorContext,
7498 "PP: try EPILOG\n");break;
7499 case XML_PARSER_PI:
7500 xmlGenericError(xmlGenericErrorContext,
7501 "PP: try PI\n");break;
7502 case XML_PARSER_IGNORE:
7503 xmlGenericError(xmlGenericErrorContext,
7504 "PP: try IGNORE\n");break;
7505 }
7506#endif
7507
7508 while (1) {
7509 /*
7510 * Pop-up of finished entities.
7511 */
7512 while ((RAW == 0) && (ctxt->inputNr > 1))
7513 xmlPopInput(ctxt);
7514
7515 if (ctxt->input ==NULL) break;
7516 if (ctxt->input->buf == NULL)
7517 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7518 else
7519 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7520 if (avail < 1)
7521 goto done;
7522 switch (ctxt->instate) {
7523 case XML_PARSER_EOF:
7524 /*
7525 * Document parsing is done !
7526 */
7527 goto done;
7528 case XML_PARSER_START:
7529 /*
7530 * Very first chars read from the document flow.
7531 */
7532 cur = ctxt->input->cur[0];
7533 if (IS_BLANK(cur)) {
7534 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7535 ctxt->sax->setDocumentLocator(ctxt->userData,
7536 &xmlDefaultSAXLocator);
7537 ctxt->errNo = XML_ERR_DOCUMENT_START;
7538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7539 ctxt->sax->error(ctxt->userData,
7540 "Extra spaces at the beginning of the document are not allowed\n");
7541 ctxt->wellFormed = 0;
7542 ctxt->disableSAX = 1;
7543 SKIP_BLANKS;
7544 ret++;
7545 if (ctxt->input->buf == NULL)
7546 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7547 else
7548 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7549 }
7550 if (avail < 2)
7551 goto done;
7552
7553 cur = ctxt->input->cur[0];
7554 next = ctxt->input->cur[1];
7555 if (cur == 0) {
7556 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7557 ctxt->sax->setDocumentLocator(ctxt->userData,
7558 &xmlDefaultSAXLocator);
7559 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7561 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7562 ctxt->wellFormed = 0;
7563 ctxt->disableSAX = 1;
7564 ctxt->instate = XML_PARSER_EOF;
7565#ifdef DEBUG_PUSH
7566 xmlGenericError(xmlGenericErrorContext,
7567 "PP: entering EOF\n");
7568#endif
7569 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7570 ctxt->sax->endDocument(ctxt->userData);
7571 goto done;
7572 }
7573 if ((cur == '<') && (next == '?')) {
7574 /* PI or XML decl */
7575 if (avail < 5) return(ret);
7576 if ((!terminate) &&
7577 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7578 return(ret);
7579 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7580 ctxt->sax->setDocumentLocator(ctxt->userData,
7581 &xmlDefaultSAXLocator);
7582 if ((ctxt->input->cur[2] == 'x') &&
7583 (ctxt->input->cur[3] == 'm') &&
7584 (ctxt->input->cur[4] == 'l') &&
7585 (IS_BLANK(ctxt->input->cur[5]))) {
7586 ret += 5;
7587#ifdef DEBUG_PUSH
7588 xmlGenericError(xmlGenericErrorContext,
7589 "PP: Parsing XML Decl\n");
7590#endif
7591 xmlParseXMLDecl(ctxt);
7592 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7593 /*
7594 * The XML REC instructs us to stop parsing right
7595 * here
7596 */
7597 ctxt->instate = XML_PARSER_EOF;
7598 return(0);
7599 }
7600 ctxt->standalone = ctxt->input->standalone;
7601 if ((ctxt->encoding == NULL) &&
7602 (ctxt->input->encoding != NULL))
7603 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7604 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7605 (!ctxt->disableSAX))
7606 ctxt->sax->startDocument(ctxt->userData);
7607 ctxt->instate = XML_PARSER_MISC;
7608#ifdef DEBUG_PUSH
7609 xmlGenericError(xmlGenericErrorContext,
7610 "PP: entering MISC\n");
7611#endif
7612 } else {
7613 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7614 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7615 (!ctxt->disableSAX))
7616 ctxt->sax->startDocument(ctxt->userData);
7617 ctxt->instate = XML_PARSER_MISC;
7618#ifdef DEBUG_PUSH
7619 xmlGenericError(xmlGenericErrorContext,
7620 "PP: entering MISC\n");
7621#endif
7622 }
7623 } else {
7624 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7625 ctxt->sax->setDocumentLocator(ctxt->userData,
7626 &xmlDefaultSAXLocator);
7627 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7628 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7629 (!ctxt->disableSAX))
7630 ctxt->sax->startDocument(ctxt->userData);
7631 ctxt->instate = XML_PARSER_MISC;
7632#ifdef DEBUG_PUSH
7633 xmlGenericError(xmlGenericErrorContext,
7634 "PP: entering MISC\n");
7635#endif
7636 }
7637 break;
7638 case XML_PARSER_MISC:
7639 SKIP_BLANKS;
7640 if (ctxt->input->buf == NULL)
7641 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7642 else
7643 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7644 if (avail < 2)
7645 goto done;
7646 cur = ctxt->input->cur[0];
7647 next = ctxt->input->cur[1];
7648 if ((cur == '<') && (next == '?')) {
7649 if ((!terminate) &&
7650 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7651 goto done;
7652#ifdef DEBUG_PUSH
7653 xmlGenericError(xmlGenericErrorContext,
7654 "PP: Parsing PI\n");
7655#endif
7656 xmlParsePI(ctxt);
7657 } else if ((cur == '<') && (next == '!') &&
7658 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7659 if ((!terminate) &&
7660 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7661 goto done;
7662#ifdef DEBUG_PUSH
7663 xmlGenericError(xmlGenericErrorContext,
7664 "PP: Parsing Comment\n");
7665#endif
7666 xmlParseComment(ctxt);
7667 ctxt->instate = XML_PARSER_MISC;
7668 } else if ((cur == '<') && (next == '!') &&
7669 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7670 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7671 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7672 (ctxt->input->cur[8] == 'E')) {
7673 if ((!terminate) &&
7674 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7675 goto done;
7676#ifdef DEBUG_PUSH
7677 xmlGenericError(xmlGenericErrorContext,
7678 "PP: Parsing internal subset\n");
7679#endif
7680 ctxt->inSubset = 1;
7681 xmlParseDocTypeDecl(ctxt);
7682 if (RAW == '[') {
7683 ctxt->instate = XML_PARSER_DTD;
7684#ifdef DEBUG_PUSH
7685 xmlGenericError(xmlGenericErrorContext,
7686 "PP: entering DTD\n");
7687#endif
7688 } else {
7689 /*
7690 * Create and update the external subset.
7691 */
7692 ctxt->inSubset = 2;
7693 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7694 (ctxt->sax->externalSubset != NULL))
7695 ctxt->sax->externalSubset(ctxt->userData,
7696 ctxt->intSubName, ctxt->extSubSystem,
7697 ctxt->extSubURI);
7698 ctxt->inSubset = 0;
7699 ctxt->instate = XML_PARSER_PROLOG;
7700#ifdef DEBUG_PUSH
7701 xmlGenericError(xmlGenericErrorContext,
7702 "PP: entering PROLOG\n");
7703#endif
7704 }
7705 } else if ((cur == '<') && (next == '!') &&
7706 (avail < 9)) {
7707 goto done;
7708 } else {
7709 ctxt->instate = XML_PARSER_START_TAG;
7710#ifdef DEBUG_PUSH
7711 xmlGenericError(xmlGenericErrorContext,
7712 "PP: entering START_TAG\n");
7713#endif
7714 }
7715 break;
7716 case XML_PARSER_IGNORE:
7717 xmlGenericError(xmlGenericErrorContext,
7718 "PP: internal error, state == IGNORE");
7719 ctxt->instate = XML_PARSER_DTD;
7720#ifdef DEBUG_PUSH
7721 xmlGenericError(xmlGenericErrorContext,
7722 "PP: entering DTD\n");
7723#endif
7724 break;
7725 case XML_PARSER_PROLOG:
7726 SKIP_BLANKS;
7727 if (ctxt->input->buf == NULL)
7728 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7729 else
7730 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7731 if (avail < 2)
7732 goto done;
7733 cur = ctxt->input->cur[0];
7734 next = ctxt->input->cur[1];
7735 if ((cur == '<') && (next == '?')) {
7736 if ((!terminate) &&
7737 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7738 goto done;
7739#ifdef DEBUG_PUSH
7740 xmlGenericError(xmlGenericErrorContext,
7741 "PP: Parsing PI\n");
7742#endif
7743 xmlParsePI(ctxt);
7744 } else if ((cur == '<') && (next == '!') &&
7745 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7746 if ((!terminate) &&
7747 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7748 goto done;
7749#ifdef DEBUG_PUSH
7750 xmlGenericError(xmlGenericErrorContext,
7751 "PP: Parsing Comment\n");
7752#endif
7753 xmlParseComment(ctxt);
7754 ctxt->instate = XML_PARSER_PROLOG;
7755 } else if ((cur == '<') && (next == '!') &&
7756 (avail < 4)) {
7757 goto done;
7758 } else {
7759 ctxt->instate = XML_PARSER_START_TAG;
7760#ifdef DEBUG_PUSH
7761 xmlGenericError(xmlGenericErrorContext,
7762 "PP: entering START_TAG\n");
7763#endif
7764 }
7765 break;
7766 case XML_PARSER_EPILOG:
7767 SKIP_BLANKS;
7768 if (ctxt->input->buf == NULL)
7769 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7770 else
7771 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7772 if (avail < 2)
7773 goto done;
7774 cur = ctxt->input->cur[0];
7775 next = ctxt->input->cur[1];
7776 if ((cur == '<') && (next == '?')) {
7777 if ((!terminate) &&
7778 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7779 goto done;
7780#ifdef DEBUG_PUSH
7781 xmlGenericError(xmlGenericErrorContext,
7782 "PP: Parsing PI\n");
7783#endif
7784 xmlParsePI(ctxt);
7785 ctxt->instate = XML_PARSER_EPILOG;
7786 } else if ((cur == '<') && (next == '!') &&
7787 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7788 if ((!terminate) &&
7789 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7790 goto done;
7791#ifdef DEBUG_PUSH
7792 xmlGenericError(xmlGenericErrorContext,
7793 "PP: Parsing Comment\n");
7794#endif
7795 xmlParseComment(ctxt);
7796 ctxt->instate = XML_PARSER_EPILOG;
7797 } else if ((cur == '<') && (next == '!') &&
7798 (avail < 4)) {
7799 goto done;
7800 } else {
7801 ctxt->errNo = XML_ERR_DOCUMENT_END;
7802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7803 ctxt->sax->error(ctxt->userData,
7804 "Extra content at the end of the document\n");
7805 ctxt->wellFormed = 0;
7806 ctxt->disableSAX = 1;
7807 ctxt->instate = XML_PARSER_EOF;
7808#ifdef DEBUG_PUSH
7809 xmlGenericError(xmlGenericErrorContext,
7810 "PP: entering EOF\n");
7811#endif
7812 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7813 (!ctxt->disableSAX))
7814 ctxt->sax->endDocument(ctxt->userData);
7815 goto done;
7816 }
7817 break;
7818 case XML_PARSER_START_TAG: {
7819 xmlChar *name, *oldname;
7820
7821 if ((avail < 2) && (ctxt->inputNr == 1))
7822 goto done;
7823 cur = ctxt->input->cur[0];
7824 if (cur != '<') {
7825 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7827 ctxt->sax->error(ctxt->userData,
7828 "Start tag expect, '<' not found\n");
7829 ctxt->wellFormed = 0;
7830 ctxt->disableSAX = 1;
7831 ctxt->instate = XML_PARSER_EOF;
7832#ifdef DEBUG_PUSH
7833 xmlGenericError(xmlGenericErrorContext,
7834 "PP: entering EOF\n");
7835#endif
7836 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7837 (!ctxt->disableSAX))
7838 ctxt->sax->endDocument(ctxt->userData);
7839 goto done;
7840 }
7841 if ((!terminate) &&
7842 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7843 goto done;
7844 if (ctxt->spaceNr == 0)
7845 spacePush(ctxt, -1);
7846 else
7847 spacePush(ctxt, *ctxt->space);
7848 name = xmlParseStartTag(ctxt);
7849 if (name == NULL) {
7850 spacePop(ctxt);
7851 ctxt->instate = XML_PARSER_EOF;
7852#ifdef DEBUG_PUSH
7853 xmlGenericError(xmlGenericErrorContext,
7854 "PP: entering EOF\n");
7855#endif
7856 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7857 (!ctxt->disableSAX))
7858 ctxt->sax->endDocument(ctxt->userData);
7859 goto done;
7860 }
7861 namePush(ctxt, xmlStrdup(name));
7862
7863 /*
7864 * [ VC: Root Element Type ]
7865 * The Name in the document type declaration must match
7866 * the element type of the root element.
7867 */
7868 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7869 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7870 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7871
7872 /*
7873 * Check for an Empty Element.
7874 */
7875 if ((RAW == '/') && (NXT(1) == '>')) {
7876 SKIP(2);
7877 if ((ctxt->sax != NULL) &&
7878 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7879 ctxt->sax->endElement(ctxt->userData, name);
7880 xmlFree(name);
7881 oldname = namePop(ctxt);
7882 spacePop(ctxt);
7883 if (oldname != NULL) {
7884#ifdef DEBUG_STACK
7885 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7886#endif
7887 xmlFree(oldname);
7888 }
7889 if (ctxt->name == NULL) {
7890 ctxt->instate = XML_PARSER_EPILOG;
7891#ifdef DEBUG_PUSH
7892 xmlGenericError(xmlGenericErrorContext,
7893 "PP: entering EPILOG\n");
7894#endif
7895 } else {
7896 ctxt->instate = XML_PARSER_CONTENT;
7897#ifdef DEBUG_PUSH
7898 xmlGenericError(xmlGenericErrorContext,
7899 "PP: entering CONTENT\n");
7900#endif
7901 }
7902 break;
7903 }
7904 if (RAW == '>') {
7905 NEXT;
7906 } else {
7907 ctxt->errNo = XML_ERR_GT_REQUIRED;
7908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7909 ctxt->sax->error(ctxt->userData,
7910 "Couldn't find end of Start Tag %s\n",
7911 name);
7912 ctxt->wellFormed = 0;
7913 ctxt->disableSAX = 1;
7914
7915 /*
7916 * end of parsing of this node.
7917 */
7918 nodePop(ctxt);
7919 oldname = namePop(ctxt);
7920 spacePop(ctxt);
7921 if (oldname != NULL) {
7922#ifdef DEBUG_STACK
7923 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7924#endif
7925 xmlFree(oldname);
7926 }
7927 }
7928 xmlFree(name);
7929 ctxt->instate = XML_PARSER_CONTENT;
7930#ifdef DEBUG_PUSH
7931 xmlGenericError(xmlGenericErrorContext,
7932 "PP: entering CONTENT\n");
7933#endif
7934 break;
7935 }
7936 case XML_PARSER_CONTENT: {
7937 const xmlChar *test;
7938 int cons;
7939 xmlChar tok;
7940
7941 /*
7942 * Handle preparsed entities and charRef
7943 */
7944 if (ctxt->token != 0) {
7945 xmlChar cur[2] = { 0 , 0 } ;
7946
7947 cur[0] = (xmlChar) ctxt->token;
7948 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7949 (ctxt->sax->characters != NULL))
7950 ctxt->sax->characters(ctxt->userData, cur, 1);
7951 ctxt->token = 0;
7952 }
7953 if ((avail < 2) && (ctxt->inputNr == 1))
7954 goto done;
7955 cur = ctxt->input->cur[0];
7956 next = ctxt->input->cur[1];
7957
7958 test = CUR_PTR;
7959 cons = ctxt->input->consumed;
7960 tok = ctxt->token;
7961 if ((cur == '<') && (next == '?')) {
7962 if ((!terminate) &&
7963 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7964 goto done;
7965#ifdef DEBUG_PUSH
7966 xmlGenericError(xmlGenericErrorContext,
7967 "PP: Parsing PI\n");
7968#endif
7969 xmlParsePI(ctxt);
7970 } else if ((cur == '<') && (next == '!') &&
7971 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7972 if ((!terminate) &&
7973 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7974 goto done;
7975#ifdef DEBUG_PUSH
7976 xmlGenericError(xmlGenericErrorContext,
7977 "PP: Parsing Comment\n");
7978#endif
7979 xmlParseComment(ctxt);
7980 ctxt->instate = XML_PARSER_CONTENT;
7981 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7982 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7983 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7984 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7985 (ctxt->input->cur[8] == '[')) {
7986 SKIP(9);
7987 ctxt->instate = XML_PARSER_CDATA_SECTION;
7988#ifdef DEBUG_PUSH
7989 xmlGenericError(xmlGenericErrorContext,
7990 "PP: entering CDATA_SECTION\n");
7991#endif
7992 break;
7993 } else if ((cur == '<') && (next == '!') &&
7994 (avail < 9)) {
7995 goto done;
7996 } else if ((cur == '<') && (next == '/')) {
7997 ctxt->instate = XML_PARSER_END_TAG;
7998#ifdef DEBUG_PUSH
7999 xmlGenericError(xmlGenericErrorContext,
8000 "PP: entering END_TAG\n");
8001#endif
8002 break;
8003 } else if (cur == '<') {
8004 ctxt->instate = XML_PARSER_START_TAG;
8005#ifdef DEBUG_PUSH
8006 xmlGenericError(xmlGenericErrorContext,
8007 "PP: entering START_TAG\n");
8008#endif
8009 break;
8010 } else if (cur == '&') {
8011 if ((!terminate) &&
8012 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8013 goto done;
8014#ifdef DEBUG_PUSH
8015 xmlGenericError(xmlGenericErrorContext,
8016 "PP: Parsing Reference\n");
8017#endif
8018 xmlParseReference(ctxt);
8019 } else {
8020 /* TODO Avoid the extra copy, handle directly !!! */
8021 /*
8022 * Goal of the following test is:
8023 * - minimize calls to the SAX 'character' callback
8024 * when they are mergeable
8025 * - handle an problem for isBlank when we only parse
8026 * a sequence of blank chars and the next one is
8027 * not available to check against '<' presence.
8028 * - tries to homogenize the differences in SAX
8029 * callbacks beween the push and pull versions
8030 * of the parser.
8031 */
8032 if ((ctxt->inputNr == 1) &&
8033 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8034 if ((!terminate) &&
8035 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8036 goto done;
8037 }
8038 ctxt->checkIndex = 0;
8039#ifdef DEBUG_PUSH
8040 xmlGenericError(xmlGenericErrorContext,
8041 "PP: Parsing char data\n");
8042#endif
8043 xmlParseCharData(ctxt, 0);
8044 }
8045 /*
8046 * Pop-up of finished entities.
8047 */
8048 while ((RAW == 0) && (ctxt->inputNr > 1))
8049 xmlPopInput(ctxt);
8050 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8051 (tok == ctxt->token)) {
8052 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8054 ctxt->sax->error(ctxt->userData,
8055 "detected an error in element content\n");
8056 ctxt->wellFormed = 0;
8057 ctxt->disableSAX = 1;
8058 ctxt->instate = XML_PARSER_EOF;
8059 break;
8060 }
8061 break;
8062 }
8063 case XML_PARSER_CDATA_SECTION: {
8064 /*
8065 * The Push mode need to have the SAX callback for
8066 * cdataBlock merge back contiguous callbacks.
8067 */
8068 int base;
8069
8070 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8071 if (base < 0) {
8072 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8073 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8074 if (ctxt->sax->cdataBlock != NULL)
8075 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8076 XML_PARSER_BIG_BUFFER_SIZE);
8077 }
8078 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8079 ctxt->checkIndex = 0;
8080 }
8081 goto done;
8082 } else {
8083 if ((ctxt->sax != NULL) && (base > 0) &&
8084 (!ctxt->disableSAX)) {
8085 if (ctxt->sax->cdataBlock != NULL)
8086 ctxt->sax->cdataBlock(ctxt->userData,
8087 ctxt->input->cur, base);
8088 }
8089 SKIP(base + 3);
8090 ctxt->checkIndex = 0;
8091 ctxt->instate = XML_PARSER_CONTENT;
8092#ifdef DEBUG_PUSH
8093 xmlGenericError(xmlGenericErrorContext,
8094 "PP: entering CONTENT\n");
8095#endif
8096 }
8097 break;
8098 }
8099 case XML_PARSER_END_TAG:
8100 if (avail < 2)
8101 goto done;
8102 if ((!terminate) &&
8103 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8104 goto done;
8105 xmlParseEndTag(ctxt);
8106 if (ctxt->name == NULL) {
8107 ctxt->instate = XML_PARSER_EPILOG;
8108#ifdef DEBUG_PUSH
8109 xmlGenericError(xmlGenericErrorContext,
8110 "PP: entering EPILOG\n");
8111#endif
8112 } else {
8113 ctxt->instate = XML_PARSER_CONTENT;
8114#ifdef DEBUG_PUSH
8115 xmlGenericError(xmlGenericErrorContext,
8116 "PP: entering CONTENT\n");
8117#endif
8118 }
8119 break;
8120 case XML_PARSER_DTD: {
8121 /*
8122 * Sorry but progressive parsing of the internal subset
8123 * is not expected to be supported. We first check that
8124 * the full content of the internal subset is available and
8125 * the parsing is launched only at that point.
8126 * Internal subset ends up with "']' S? '>'" in an unescaped
8127 * section and not in a ']]>' sequence which are conditional
8128 * sections (whoever argued to keep that crap in XML deserve
8129 * a place in hell !).
8130 */
8131 int base, i;
8132 xmlChar *buf;
8133 xmlChar quote = 0;
8134
8135 base = ctxt->input->cur - ctxt->input->base;
8136 if (base < 0) return(0);
8137 if (ctxt->checkIndex > base)
8138 base = ctxt->checkIndex;
8139 buf = ctxt->input->buf->buffer->content;
8140 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8141 base++) {
8142 if (quote != 0) {
8143 if (buf[base] == quote)
8144 quote = 0;
8145 continue;
8146 }
8147 if (buf[base] == '"') {
8148 quote = '"';
8149 continue;
8150 }
8151 if (buf[base] == '\'') {
8152 quote = '\'';
8153 continue;
8154 }
8155 if (buf[base] == ']') {
8156 if ((unsigned int) base +1 >=
8157 ctxt->input->buf->buffer->use)
8158 break;
8159 if (buf[base + 1] == ']') {
8160 /* conditional crap, skip both ']' ! */
8161 base++;
8162 continue;
8163 }
8164 for (i = 0;
8165 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8166 i++) {
8167 if (buf[base + i] == '>')
8168 goto found_end_int_subset;
8169 }
8170 break;
8171 }
8172 }
8173 /*
8174 * We didn't found the end of the Internal subset
8175 */
8176 if (quote == 0)
8177 ctxt->checkIndex = base;
8178#ifdef DEBUG_PUSH
8179 if (next == 0)
8180 xmlGenericError(xmlGenericErrorContext,
8181 "PP: lookup of int subset end filed\n");
8182#endif
8183 goto done;
8184
8185found_end_int_subset:
8186 xmlParseInternalSubset(ctxt);
8187 ctxt->inSubset = 2;
8188 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8189 (ctxt->sax->externalSubset != NULL))
8190 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8191 ctxt->extSubSystem, ctxt->extSubURI);
8192 ctxt->inSubset = 0;
8193 ctxt->instate = XML_PARSER_PROLOG;
8194 ctxt->checkIndex = 0;
8195#ifdef DEBUG_PUSH
8196 xmlGenericError(xmlGenericErrorContext,
8197 "PP: entering PROLOG\n");
8198#endif
8199 break;
8200 }
8201 case XML_PARSER_COMMENT:
8202 xmlGenericError(xmlGenericErrorContext,
8203 "PP: internal error, state == COMMENT\n");
8204 ctxt->instate = XML_PARSER_CONTENT;
8205#ifdef DEBUG_PUSH
8206 xmlGenericError(xmlGenericErrorContext,
8207 "PP: entering CONTENT\n");
8208#endif
8209 break;
8210 case XML_PARSER_PI:
8211 xmlGenericError(xmlGenericErrorContext,
8212 "PP: internal error, state == PI\n");
8213 ctxt->instate = XML_PARSER_CONTENT;
8214#ifdef DEBUG_PUSH
8215 xmlGenericError(xmlGenericErrorContext,
8216 "PP: entering CONTENT\n");
8217#endif
8218 break;
8219 case XML_PARSER_ENTITY_DECL:
8220 xmlGenericError(xmlGenericErrorContext,
8221 "PP: internal error, state == ENTITY_DECL\n");
8222 ctxt->instate = XML_PARSER_DTD;
8223#ifdef DEBUG_PUSH
8224 xmlGenericError(xmlGenericErrorContext,
8225 "PP: entering DTD\n");
8226#endif
8227 break;
8228 case XML_PARSER_ENTITY_VALUE:
8229 xmlGenericError(xmlGenericErrorContext,
8230 "PP: internal error, state == ENTITY_VALUE\n");
8231 ctxt->instate = XML_PARSER_CONTENT;
8232#ifdef DEBUG_PUSH
8233 xmlGenericError(xmlGenericErrorContext,
8234 "PP: entering DTD\n");
8235#endif
8236 break;
8237 case XML_PARSER_ATTRIBUTE_VALUE:
8238 xmlGenericError(xmlGenericErrorContext,
8239 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8240 ctxt->instate = XML_PARSER_START_TAG;
8241#ifdef DEBUG_PUSH
8242 xmlGenericError(xmlGenericErrorContext,
8243 "PP: entering START_TAG\n");
8244#endif
8245 break;
8246 case XML_PARSER_SYSTEM_LITERAL:
8247 xmlGenericError(xmlGenericErrorContext,
8248 "PP: internal error, state == SYSTEM_LITERAL\n");
8249 ctxt->instate = XML_PARSER_START_TAG;
8250#ifdef DEBUG_PUSH
8251 xmlGenericError(xmlGenericErrorContext,
8252 "PP: entering START_TAG\n");
8253#endif
8254 break;
8255 }
8256 }
8257done:
8258#ifdef DEBUG_PUSH
8259 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8260#endif
8261 return(ret);
8262}
8263
8264/**
8265 * xmlParseTry:
8266 * @ctxt: an XML parser context
8267 *
8268 * Try to progress on parsing
8269 *
8270 * Returns zero if no parsing was possible
8271 */
8272int
8273xmlParseTry(xmlParserCtxtPtr ctxt) {
8274 return(xmlParseTryOrFinish(ctxt, 0));
8275}
8276
8277/**
8278 * xmlParseChunk:
8279 * @ctxt: an XML parser context
8280 * @chunk: an char array
8281 * @size: the size in byte of the chunk
8282 * @terminate: last chunk indicator
8283 *
8284 * Parse a Chunk of memory
8285 *
8286 * Returns zero if no error, the xmlParserErrors otherwise.
8287 */
8288int
8289xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8290 int terminate) {
8291 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8292 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8293 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8294 int cur = ctxt->input->cur - ctxt->input->base;
8295
8296 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8297 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8298 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008299 ctxt->input->end =
8300 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008301#ifdef DEBUG_PUSH
8302 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8303#endif
8304
8305 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8306 xmlParseTryOrFinish(ctxt, terminate);
8307 } else if (ctxt->instate != XML_PARSER_EOF) {
8308 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8309 xmlParserInputBufferPtr in = ctxt->input->buf;
8310 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8311 (in->raw != NULL)) {
8312 int nbchars;
8313
8314 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8315 if (nbchars < 0) {
8316 xmlGenericError(xmlGenericErrorContext,
8317 "xmlParseChunk: encoder error\n");
8318 return(XML_ERR_INVALID_ENCODING);
8319 }
8320 }
8321 }
8322 }
8323 xmlParseTryOrFinish(ctxt, terminate);
8324 if (terminate) {
8325 /*
8326 * Check for termination
8327 */
8328 if ((ctxt->instate != XML_PARSER_EOF) &&
8329 (ctxt->instate != XML_PARSER_EPILOG)) {
8330 ctxt->errNo = XML_ERR_DOCUMENT_END;
8331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8332 ctxt->sax->error(ctxt->userData,
8333 "Extra content at the end of the document\n");
8334 ctxt->wellFormed = 0;
8335 ctxt->disableSAX = 1;
8336 }
8337 if (ctxt->instate != XML_PARSER_EOF) {
8338 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8339 (!ctxt->disableSAX))
8340 ctxt->sax->endDocument(ctxt->userData);
8341 }
8342 ctxt->instate = XML_PARSER_EOF;
8343 }
8344 return((xmlParserErrors) ctxt->errNo);
8345}
8346
8347/************************************************************************
8348 * *
8349 * I/O front end functions to the parser *
8350 * *
8351 ************************************************************************/
8352
8353/**
8354 * xmlStopParser:
8355 * @ctxt: an XML parser context
8356 *
8357 * Blocks further parser processing
8358 */
8359void
8360xmlStopParser(xmlParserCtxtPtr ctxt) {
8361 ctxt->instate = XML_PARSER_EOF;
8362 if (ctxt->input != NULL)
8363 ctxt->input->cur = BAD_CAST"";
8364}
8365
8366/**
8367 * xmlCreatePushParserCtxt:
8368 * @sax: a SAX handler
8369 * @user_data: The user data returned on SAX callbacks
8370 * @chunk: a pointer to an array of chars
8371 * @size: number of chars in the array
8372 * @filename: an optional file name or URI
8373 *
8374 * Create a parser context for using the XML parser in push mode
8375 * To allow content encoding detection, @size should be >= 4
8376 * The value of @filename is used for fetching external entities
8377 * and error/warning reports.
8378 *
8379 * Returns the new parser context or NULL
8380 */
8381xmlParserCtxtPtr
8382xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8383 const char *chunk, int size, const char *filename) {
8384 xmlParserCtxtPtr ctxt;
8385 xmlParserInputPtr inputStream;
8386 xmlParserInputBufferPtr buf;
8387 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8388
8389 /*
8390 * plug some encoding conversion routines
8391 */
8392 if ((chunk != NULL) && (size >= 4))
8393 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8394
8395 buf = xmlAllocParserInputBuffer(enc);
8396 if (buf == NULL) return(NULL);
8397
8398 ctxt = xmlNewParserCtxt();
8399 if (ctxt == NULL) {
8400 xmlFree(buf);
8401 return(NULL);
8402 }
8403 if (sax != NULL) {
8404 if (ctxt->sax != &xmlDefaultSAXHandler)
8405 xmlFree(ctxt->sax);
8406 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8407 if (ctxt->sax == NULL) {
8408 xmlFree(buf);
8409 xmlFree(ctxt);
8410 return(NULL);
8411 }
8412 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8413 if (user_data != NULL)
8414 ctxt->userData = user_data;
8415 }
8416 if (filename == NULL) {
8417 ctxt->directory = NULL;
8418 } else {
8419 ctxt->directory = xmlParserGetDirectory(filename);
8420 }
8421
8422 inputStream = xmlNewInputStream(ctxt);
8423 if (inputStream == NULL) {
8424 xmlFreeParserCtxt(ctxt);
8425 return(NULL);
8426 }
8427
8428 if (filename == NULL)
8429 inputStream->filename = NULL;
8430 else
8431 inputStream->filename = xmlMemStrdup(filename);
8432 inputStream->buf = buf;
8433 inputStream->base = inputStream->buf->buffer->content;
8434 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008435 inputStream->end =
8436 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008437 if (enc != XML_CHAR_ENCODING_NONE) {
8438 xmlSwitchEncoding(ctxt, enc);
8439 }
8440
8441 inputPush(ctxt, inputStream);
8442
8443 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8444 (ctxt->input->buf != NULL)) {
8445 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8446#ifdef DEBUG_PUSH
8447 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8448#endif
8449 }
8450
8451 return(ctxt);
8452}
8453
8454/**
8455 * xmlCreateIOParserCtxt:
8456 * @sax: a SAX handler
8457 * @user_data: The user data returned on SAX callbacks
8458 * @ioread: an I/O read function
8459 * @ioclose: an I/O close function
8460 * @ioctx: an I/O handler
8461 * @enc: the charset encoding if known
8462 *
8463 * Create a parser context for using the XML parser with an existing
8464 * I/O stream
8465 *
8466 * Returns the new parser context or NULL
8467 */
8468xmlParserCtxtPtr
8469xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8470 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8471 void *ioctx, xmlCharEncoding enc) {
8472 xmlParserCtxtPtr ctxt;
8473 xmlParserInputPtr inputStream;
8474 xmlParserInputBufferPtr buf;
8475
8476 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8477 if (buf == NULL) return(NULL);
8478
8479 ctxt = xmlNewParserCtxt();
8480 if (ctxt == NULL) {
8481 xmlFree(buf);
8482 return(NULL);
8483 }
8484 if (sax != NULL) {
8485 if (ctxt->sax != &xmlDefaultSAXHandler)
8486 xmlFree(ctxt->sax);
8487 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8488 if (ctxt->sax == NULL) {
8489 xmlFree(buf);
8490 xmlFree(ctxt);
8491 return(NULL);
8492 }
8493 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8494 if (user_data != NULL)
8495 ctxt->userData = user_data;
8496 }
8497
8498 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8499 if (inputStream == NULL) {
8500 xmlFreeParserCtxt(ctxt);
8501 return(NULL);
8502 }
8503 inputPush(ctxt, inputStream);
8504
8505 return(ctxt);
8506}
8507
8508/************************************************************************
8509 * *
8510 * Front ends when parsing a Dtd *
8511 * *
8512 ************************************************************************/
8513
8514/**
8515 * xmlIOParseDTD:
8516 * @sax: the SAX handler block or NULL
8517 * @input: an Input Buffer
8518 * @enc: the charset encoding if known
8519 *
8520 * Load and parse a DTD
8521 *
8522 * Returns the resulting xmlDtdPtr or NULL in case of error.
8523 * @input will be freed at parsing end.
8524 */
8525
8526xmlDtdPtr
8527xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8528 xmlCharEncoding enc) {
8529 xmlDtdPtr ret = NULL;
8530 xmlParserCtxtPtr ctxt;
8531 xmlParserInputPtr pinput = NULL;
8532
8533 if (input == NULL)
8534 return(NULL);
8535
8536 ctxt = xmlNewParserCtxt();
8537 if (ctxt == NULL) {
8538 return(NULL);
8539 }
8540
8541 /*
8542 * Set-up the SAX context
8543 */
8544 if (sax != NULL) {
8545 if (ctxt->sax != NULL)
8546 xmlFree(ctxt->sax);
8547 ctxt->sax = sax;
8548 ctxt->userData = NULL;
8549 }
8550
8551 /*
8552 * generate a parser input from the I/O handler
8553 */
8554
8555 pinput = xmlNewIOInputStream(ctxt, input, enc);
8556 if (pinput == NULL) {
8557 if (sax != NULL) ctxt->sax = NULL;
8558 xmlFreeParserCtxt(ctxt);
8559 return(NULL);
8560 }
8561
8562 /*
8563 * plug some encoding conversion routines here.
8564 */
8565 xmlPushInput(ctxt, pinput);
8566
8567 pinput->filename = NULL;
8568 pinput->line = 1;
8569 pinput->col = 1;
8570 pinput->base = ctxt->input->cur;
8571 pinput->cur = ctxt->input->cur;
8572 pinput->free = NULL;
8573
8574 /*
8575 * let's parse that entity knowing it's an external subset.
8576 */
8577 ctxt->inSubset = 2;
8578 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8579 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8580 BAD_CAST "none", BAD_CAST "none");
8581 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8582
8583 if (ctxt->myDoc != NULL) {
8584 if (ctxt->wellFormed) {
8585 ret = ctxt->myDoc->extSubset;
8586 ctxt->myDoc->extSubset = NULL;
8587 } else {
8588 ret = NULL;
8589 }
8590 xmlFreeDoc(ctxt->myDoc);
8591 ctxt->myDoc = NULL;
8592 }
8593 if (sax != NULL) ctxt->sax = NULL;
8594 xmlFreeParserCtxt(ctxt);
8595
8596 return(ret);
8597}
8598
8599/**
8600 * xmlSAXParseDTD:
8601 * @sax: the SAX handler block
8602 * @ExternalID: a NAME* containing the External ID of the DTD
8603 * @SystemID: a NAME* containing the URL to the DTD
8604 *
8605 * Load and parse an external subset.
8606 *
8607 * Returns the resulting xmlDtdPtr or NULL in case of error.
8608 */
8609
8610xmlDtdPtr
8611xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8612 const xmlChar *SystemID) {
8613 xmlDtdPtr ret = NULL;
8614 xmlParserCtxtPtr ctxt;
8615 xmlParserInputPtr input = NULL;
8616 xmlCharEncoding enc;
8617
8618 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8619
8620 ctxt = xmlNewParserCtxt();
8621 if (ctxt == NULL) {
8622 return(NULL);
8623 }
8624
8625 /*
8626 * Set-up the SAX context
8627 */
8628 if (sax != NULL) {
8629 if (ctxt->sax != NULL)
8630 xmlFree(ctxt->sax);
8631 ctxt->sax = sax;
8632 ctxt->userData = NULL;
8633 }
8634
8635 /*
8636 * Ask the Entity resolver to load the damn thing
8637 */
8638
8639 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8640 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8641 if (input == NULL) {
8642 if (sax != NULL) ctxt->sax = NULL;
8643 xmlFreeParserCtxt(ctxt);
8644 return(NULL);
8645 }
8646
8647 /*
8648 * plug some encoding conversion routines here.
8649 */
8650 xmlPushInput(ctxt, input);
8651 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8652 xmlSwitchEncoding(ctxt, enc);
8653
8654 if (input->filename == NULL)
8655 input->filename = (char *) xmlStrdup(SystemID);
8656 input->line = 1;
8657 input->col = 1;
8658 input->base = ctxt->input->cur;
8659 input->cur = ctxt->input->cur;
8660 input->free = NULL;
8661
8662 /*
8663 * let's parse that entity knowing it's an external subset.
8664 */
8665 ctxt->inSubset = 2;
8666 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8667 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8668 ExternalID, SystemID);
8669 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8670
8671 if (ctxt->myDoc != NULL) {
8672 if (ctxt->wellFormed) {
8673 ret = ctxt->myDoc->extSubset;
8674 ctxt->myDoc->extSubset = NULL;
8675 } else {
8676 ret = NULL;
8677 }
8678 xmlFreeDoc(ctxt->myDoc);
8679 ctxt->myDoc = NULL;
8680 }
8681 if (sax != NULL) ctxt->sax = NULL;
8682 xmlFreeParserCtxt(ctxt);
8683
8684 return(ret);
8685}
8686
8687/**
8688 * xmlParseDTD:
8689 * @ExternalID: a NAME* containing the External ID of the DTD
8690 * @SystemID: a NAME* containing the URL to the DTD
8691 *
8692 * Load and parse an external subset.
8693 *
8694 * Returns the resulting xmlDtdPtr or NULL in case of error.
8695 */
8696
8697xmlDtdPtr
8698xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8699 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8700}
8701
8702/************************************************************************
8703 * *
8704 * Front ends when parsing an Entity *
8705 * *
8706 ************************************************************************/
8707
8708/**
8709 * xmlSAXParseBalancedChunk:
8710 * @ctx: an XML parser context (possibly NULL)
8711 * @sax: the SAX handler bloc (possibly NULL)
8712 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8713 * @input: a parser input stream
8714 * @enc: the encoding
8715 *
8716 * Parse a well-balanced chunk of an XML document
8717 * The user has to provide SAX callback block whose routines will be
8718 * called by the parser
8719 * The allowed sequence for the Well Balanced Chunk is the one defined by
8720 * the content production in the XML grammar:
8721 *
8722 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8723 *
8724 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
8725 * the error code otherwise
8726 */
8727
8728int
8729xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8730 void *user_data, xmlParserInputPtr input,
8731 xmlCharEncoding enc) {
8732 xmlParserCtxtPtr ctxt;
8733 int ret;
8734
8735 if (input == NULL) return(-1);
8736
8737 if (ctx != NULL)
8738 ctxt = ctx;
8739 else {
8740 ctxt = xmlNewParserCtxt();
8741 if (ctxt == NULL)
8742 return(-1);
8743 if (sax == NULL)
8744 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8745 }
8746
8747 /*
8748 * Set-up the SAX context
8749 */
8750 if (sax != NULL) {
8751 if (ctxt->sax != NULL)
8752 xmlFree(ctxt->sax);
8753 ctxt->sax = sax;
8754 ctxt->userData = user_data;
8755 }
8756
8757 /*
8758 * plug some encoding conversion routines here.
8759 */
8760 xmlPushInput(ctxt, input);
8761 if (enc != XML_CHAR_ENCODING_NONE)
8762 xmlSwitchEncoding(ctxt, enc);
8763
8764 /*
8765 * let's parse that entity knowing it's an external subset.
8766 */
8767 xmlParseContent(ctxt);
8768 ret = ctxt->errNo;
8769
8770 if (ctx == NULL) {
8771 if (sax != NULL)
8772 ctxt->sax = NULL;
8773 else
8774 xmlFreeDoc(ctxt->myDoc);
8775 xmlFreeParserCtxt(ctxt);
8776 }
8777 return(ret);
8778}
8779
8780/**
8781 * xmlParseCtxtExternalEntity:
8782 * @ctx: the existing parsing context
8783 * @URL: the URL for the entity to load
8784 * @ID: the System ID for the entity to load
8785 * @list: the return value for the set of parsed nodes
8786 *
8787 * Parse an external general entity within an existing parsing context
8788 * An external general parsed entity is well-formed if it matches the
8789 * production labeled extParsedEnt.
8790 *
8791 * [78] extParsedEnt ::= TextDecl? content
8792 *
8793 * Returns 0 if the entity is well formed, -1 in case of args problem and
8794 * the parser error code otherwise
8795 */
8796
8797int
8798xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8799 const xmlChar *ID, xmlNodePtr *list) {
8800 xmlParserCtxtPtr ctxt;
8801 xmlDocPtr newDoc;
8802 xmlSAXHandlerPtr oldsax = NULL;
8803 int ret = 0;
8804
8805 if (ctx->depth > 40) {
8806 return(XML_ERR_ENTITY_LOOP);
8807 }
8808
8809 if (list != NULL)
8810 *list = NULL;
8811 if ((URL == NULL) && (ID == NULL))
8812 return(-1);
8813 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8814 return(-1);
8815
8816
8817 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8818 if (ctxt == NULL) return(-1);
8819 ctxt->userData = ctxt;
8820 oldsax = ctxt->sax;
8821 ctxt->sax = ctx->sax;
8822 newDoc = xmlNewDoc(BAD_CAST "1.0");
8823 if (newDoc == NULL) {
8824 xmlFreeParserCtxt(ctxt);
8825 return(-1);
8826 }
8827 if (ctx->myDoc != NULL) {
8828 newDoc->intSubset = ctx->myDoc->intSubset;
8829 newDoc->extSubset = ctx->myDoc->extSubset;
8830 }
8831 if (ctx->myDoc->URL != NULL) {
8832 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8833 }
8834 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8835 if (newDoc->children == NULL) {
8836 ctxt->sax = oldsax;
8837 xmlFreeParserCtxt(ctxt);
8838 newDoc->intSubset = NULL;
8839 newDoc->extSubset = NULL;
8840 xmlFreeDoc(newDoc);
8841 return(-1);
8842 }
8843 nodePush(ctxt, newDoc->children);
8844 if (ctx->myDoc == NULL) {
8845 ctxt->myDoc = newDoc;
8846 } else {
8847 ctxt->myDoc = ctx->myDoc;
8848 newDoc->children->doc = ctx->myDoc;
8849 }
8850
8851 /*
8852 * Parse a possible text declaration first
8853 */
8854 GROW;
8855 if ((RAW == '<') && (NXT(1) == '?') &&
8856 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8857 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8858 xmlParseTextDecl(ctxt);
8859 }
8860
8861 /*
8862 * Doing validity checking on chunk doesn't make sense
8863 */
8864 ctxt->instate = XML_PARSER_CONTENT;
8865 ctxt->validate = ctx->validate;
8866 ctxt->loadsubset = ctx->loadsubset;
8867 ctxt->depth = ctx->depth + 1;
8868 ctxt->replaceEntities = ctx->replaceEntities;
8869 if (ctxt->validate) {
8870 ctxt->vctxt.error = ctx->vctxt.error;
8871 ctxt->vctxt.warning = ctx->vctxt.warning;
8872 /* Allocate the Node stack */
8873 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8874 if (ctxt->vctxt.nodeTab == NULL) {
8875 xmlGenericError(xmlGenericErrorContext,
8876 "xmlParseCtxtExternalEntity: out of memory\n");
8877 ctxt->validate = 0;
8878 ctxt->vctxt.error = NULL;
8879 ctxt->vctxt.warning = NULL;
8880 } else {
8881 ctxt->vctxt.nodeNr = 0;
8882 ctxt->vctxt.nodeMax = 4;
8883 ctxt->vctxt.node = NULL;
8884 }
8885 } else {
8886 ctxt->vctxt.error = NULL;
8887 ctxt->vctxt.warning = NULL;
8888 }
8889
8890 xmlParseContent(ctxt);
8891
8892 if ((RAW == '<') && (NXT(1) == '/')) {
8893 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8895 ctxt->sax->error(ctxt->userData,
8896 "chunk is not well balanced\n");
8897 ctxt->wellFormed = 0;
8898 ctxt->disableSAX = 1;
8899 } else if (RAW != 0) {
8900 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8902 ctxt->sax->error(ctxt->userData,
8903 "extra content at the end of well balanced chunk\n");
8904 ctxt->wellFormed = 0;
8905 ctxt->disableSAX = 1;
8906 }
8907 if (ctxt->node != newDoc->children) {
8908 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8910 ctxt->sax->error(ctxt->userData,
8911 "chunk is not well balanced\n");
8912 ctxt->wellFormed = 0;
8913 ctxt->disableSAX = 1;
8914 }
8915
8916 if (!ctxt->wellFormed) {
8917 if (ctxt->errNo == 0)
8918 ret = 1;
8919 else
8920 ret = ctxt->errNo;
8921 } else {
8922 if (list != NULL) {
8923 xmlNodePtr cur;
8924
8925 /*
8926 * Return the newly created nodeset after unlinking it from
8927 * they pseudo parent.
8928 */
8929 cur = newDoc->children->children;
8930 *list = cur;
8931 while (cur != NULL) {
8932 cur->parent = NULL;
8933 cur = cur->next;
8934 }
8935 newDoc->children->children = NULL;
8936 }
8937 ret = 0;
8938 }
8939 ctxt->sax = oldsax;
8940 xmlFreeParserCtxt(ctxt);
8941 newDoc->intSubset = NULL;
8942 newDoc->extSubset = NULL;
8943 xmlFreeDoc(newDoc);
8944
8945 return(ret);
8946}
8947
8948/**
8949 * xmlParseExternalEntity:
8950 * @doc: the document the chunk pertains to
8951 * @sax: the SAX handler bloc (possibly NULL)
8952 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8953 * @depth: Used for loop detection, use 0
8954 * @URL: the URL for the entity to load
8955 * @ID: the System ID for the entity to load
8956 * @list: the return value for the set of parsed nodes
8957 *
8958 * Parse an external general entity
8959 * An external general parsed entity is well-formed if it matches the
8960 * production labeled extParsedEnt.
8961 *
8962 * [78] extParsedEnt ::= TextDecl? content
8963 *
8964 * Returns 0 if the entity is well formed, -1 in case of args problem and
8965 * the parser error code otherwise
8966 */
8967
8968int
8969xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
8970 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
8971 xmlParserCtxtPtr ctxt;
8972 xmlDocPtr newDoc;
8973 xmlSAXHandlerPtr oldsax = NULL;
8974 int ret = 0;
8975
8976 if (depth > 40) {
8977 return(XML_ERR_ENTITY_LOOP);
8978 }
8979
8980
8981
8982 if (list != NULL)
8983 *list = NULL;
8984 if ((URL == NULL) && (ID == NULL))
8985 return(-1);
8986 if (doc == NULL) /* @@ relax but check for dereferences */
8987 return(-1);
8988
8989
8990 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8991 if (ctxt == NULL) return(-1);
8992 ctxt->userData = ctxt;
8993 if (sax != NULL) {
8994 oldsax = ctxt->sax;
8995 ctxt->sax = sax;
8996 if (user_data != NULL)
8997 ctxt->userData = user_data;
8998 }
8999 newDoc = xmlNewDoc(BAD_CAST "1.0");
9000 if (newDoc == NULL) {
9001 xmlFreeParserCtxt(ctxt);
9002 return(-1);
9003 }
9004 if (doc != NULL) {
9005 newDoc->intSubset = doc->intSubset;
9006 newDoc->extSubset = doc->extSubset;
9007 }
9008 if (doc->URL != NULL) {
9009 newDoc->URL = xmlStrdup(doc->URL);
9010 }
9011 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9012 if (newDoc->children == NULL) {
9013 if (sax != NULL)
9014 ctxt->sax = oldsax;
9015 xmlFreeParserCtxt(ctxt);
9016 newDoc->intSubset = NULL;
9017 newDoc->extSubset = NULL;
9018 xmlFreeDoc(newDoc);
9019 return(-1);
9020 }
9021 nodePush(ctxt, newDoc->children);
9022 if (doc == NULL) {
9023 ctxt->myDoc = newDoc;
9024 } else {
9025 ctxt->myDoc = doc;
9026 newDoc->children->doc = doc;
9027 }
9028
9029 /*
9030 * Parse a possible text declaration first
9031 */
9032 GROW;
9033 if ((RAW == '<') && (NXT(1) == '?') &&
9034 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9035 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9036 xmlParseTextDecl(ctxt);
9037 }
9038
9039 /*
9040 * Doing validity checking on chunk doesn't make sense
9041 */
9042 ctxt->instate = XML_PARSER_CONTENT;
9043 ctxt->validate = 0;
9044 ctxt->loadsubset = 0;
9045 ctxt->depth = depth;
9046
9047 xmlParseContent(ctxt);
9048
9049 if ((RAW == '<') && (NXT(1) == '/')) {
9050 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9051 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9052 ctxt->sax->error(ctxt->userData,
9053 "chunk is not well balanced\n");
9054 ctxt->wellFormed = 0;
9055 ctxt->disableSAX = 1;
9056 } else if (RAW != 0) {
9057 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9059 ctxt->sax->error(ctxt->userData,
9060 "extra content at the end of well balanced chunk\n");
9061 ctxt->wellFormed = 0;
9062 ctxt->disableSAX = 1;
9063 }
9064 if (ctxt->node != newDoc->children) {
9065 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9067 ctxt->sax->error(ctxt->userData,
9068 "chunk is not well balanced\n");
9069 ctxt->wellFormed = 0;
9070 ctxt->disableSAX = 1;
9071 }
9072
9073 if (!ctxt->wellFormed) {
9074 if (ctxt->errNo == 0)
9075 ret = 1;
9076 else
9077 ret = ctxt->errNo;
9078 } else {
9079 if (list != NULL) {
9080 xmlNodePtr cur;
9081
9082 /*
9083 * Return the newly created nodeset after unlinking it from
9084 * they pseudo parent.
9085 */
9086 cur = newDoc->children->children;
9087 *list = cur;
9088 while (cur != NULL) {
9089 cur->parent = NULL;
9090 cur = cur->next;
9091 }
9092 newDoc->children->children = NULL;
9093 }
9094 ret = 0;
9095 }
9096 if (sax != NULL)
9097 ctxt->sax = oldsax;
9098 xmlFreeParserCtxt(ctxt);
9099 newDoc->intSubset = NULL;
9100 newDoc->extSubset = NULL;
9101 xmlFreeDoc(newDoc);
9102
9103 return(ret);
9104}
9105
9106/**
9107 * xmlParseBalancedChunk:
9108 * @doc: the document the chunk pertains to
9109 * @sax: the SAX handler bloc (possibly NULL)
9110 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9111 * @depth: Used for loop detection, use 0
9112 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9113 * @list: the return value for the set of parsed nodes
9114 *
9115 * Parse a well-balanced chunk of an XML document
9116 * called by the parser
9117 * The allowed sequence for the Well Balanced Chunk is the one defined by
9118 * the content production in the XML grammar:
9119 *
9120 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9121 *
9122 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9123 * the parser error code otherwise
9124 */
9125
9126int
9127xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9128 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9129 xmlParserCtxtPtr ctxt;
9130 xmlDocPtr newDoc;
9131 xmlSAXHandlerPtr oldsax = NULL;
9132 int size;
9133 int ret = 0;
9134
9135 if (depth > 40) {
9136 return(XML_ERR_ENTITY_LOOP);
9137 }
9138
9139
9140 if (list != NULL)
9141 *list = NULL;
9142 if (string == NULL)
9143 return(-1);
9144
9145 size = xmlStrlen(string);
9146
9147 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9148 if (ctxt == NULL) return(-1);
9149 ctxt->userData = ctxt;
9150 if (sax != NULL) {
9151 oldsax = ctxt->sax;
9152 ctxt->sax = sax;
9153 if (user_data != NULL)
9154 ctxt->userData = user_data;
9155 }
9156 newDoc = xmlNewDoc(BAD_CAST "1.0");
9157 if (newDoc == NULL) {
9158 xmlFreeParserCtxt(ctxt);
9159 return(-1);
9160 }
9161 if (doc != NULL) {
9162 newDoc->intSubset = doc->intSubset;
9163 newDoc->extSubset = doc->extSubset;
9164 }
9165 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9166 if (newDoc->children == NULL) {
9167 if (sax != NULL)
9168 ctxt->sax = oldsax;
9169 xmlFreeParserCtxt(ctxt);
9170 newDoc->intSubset = NULL;
9171 newDoc->extSubset = NULL;
9172 xmlFreeDoc(newDoc);
9173 return(-1);
9174 }
9175 nodePush(ctxt, newDoc->children);
9176 if (doc == NULL) {
9177 ctxt->myDoc = newDoc;
9178 } else {
9179 ctxt->myDoc = doc;
9180 newDoc->children->doc = doc;
9181 }
9182 ctxt->instate = XML_PARSER_CONTENT;
9183 ctxt->depth = depth;
9184
9185 /*
9186 * Doing validity checking on chunk doesn't make sense
9187 */
9188 ctxt->validate = 0;
9189 ctxt->loadsubset = 0;
9190
9191 xmlParseContent(ctxt);
9192
9193 if ((RAW == '<') && (NXT(1) == '/')) {
9194 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9196 ctxt->sax->error(ctxt->userData,
9197 "chunk is not well balanced\n");
9198 ctxt->wellFormed = 0;
9199 ctxt->disableSAX = 1;
9200 } else if (RAW != 0) {
9201 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9203 ctxt->sax->error(ctxt->userData,
9204 "extra content at the end of well balanced chunk\n");
9205 ctxt->wellFormed = 0;
9206 ctxt->disableSAX = 1;
9207 }
9208 if (ctxt->node != newDoc->children) {
9209 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9210 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9211 ctxt->sax->error(ctxt->userData,
9212 "chunk is not well balanced\n");
9213 ctxt->wellFormed = 0;
9214 ctxt->disableSAX = 1;
9215 }
9216
9217 if (!ctxt->wellFormed) {
9218 if (ctxt->errNo == 0)
9219 ret = 1;
9220 else
9221 ret = ctxt->errNo;
9222 } else {
9223 if (list != NULL) {
9224 xmlNodePtr cur;
9225
9226 /*
9227 * Return the newly created nodeset after unlinking it from
9228 * they pseudo parent.
9229 */
9230 cur = newDoc->children->children;
9231 *list = cur;
9232 while (cur != NULL) {
9233 cur->parent = NULL;
9234 cur = cur->next;
9235 }
9236 newDoc->children->children = NULL;
9237 }
9238 ret = 0;
9239 }
9240 if (sax != NULL)
9241 ctxt->sax = oldsax;
9242 xmlFreeParserCtxt(ctxt);
9243 newDoc->intSubset = NULL;
9244 newDoc->extSubset = NULL;
9245 xmlFreeDoc(newDoc);
9246
9247 return(ret);
9248}
9249
9250/**
9251 * xmlSAXParseEntity:
9252 * @sax: the SAX handler block
9253 * @filename: the filename
9254 *
9255 * parse an XML external entity out of context and build a tree.
9256 * It use the given SAX function block to handle the parsing callback.
9257 * If sax is NULL, fallback to the default DOM tree building routines.
9258 *
9259 * [78] extParsedEnt ::= TextDecl? content
9260 *
9261 * This correspond to a "Well Balanced" chunk
9262 *
9263 * Returns the resulting document tree
9264 */
9265
9266xmlDocPtr
9267xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9268 xmlDocPtr ret;
9269 xmlParserCtxtPtr ctxt;
9270 char *directory = NULL;
9271
9272 ctxt = xmlCreateFileParserCtxt(filename);
9273 if (ctxt == NULL) {
9274 return(NULL);
9275 }
9276 if (sax != NULL) {
9277 if (ctxt->sax != NULL)
9278 xmlFree(ctxt->sax);
9279 ctxt->sax = sax;
9280 ctxt->userData = NULL;
9281 }
9282
9283 if ((ctxt->directory == NULL) && (directory == NULL))
9284 directory = xmlParserGetDirectory(filename);
9285
9286 xmlParseExtParsedEnt(ctxt);
9287
9288 if (ctxt->wellFormed)
9289 ret = ctxt->myDoc;
9290 else {
9291 ret = NULL;
9292 xmlFreeDoc(ctxt->myDoc);
9293 ctxt->myDoc = NULL;
9294 }
9295 if (sax != NULL)
9296 ctxt->sax = NULL;
9297 xmlFreeParserCtxt(ctxt);
9298
9299 return(ret);
9300}
9301
9302/**
9303 * xmlParseEntity:
9304 * @filename: the filename
9305 *
9306 * parse an XML external entity out of context and build a tree.
9307 *
9308 * [78] extParsedEnt ::= TextDecl? content
9309 *
9310 * This correspond to a "Well Balanced" chunk
9311 *
9312 * Returns the resulting document tree
9313 */
9314
9315xmlDocPtr
9316xmlParseEntity(const char *filename) {
9317 return(xmlSAXParseEntity(NULL, filename));
9318}
9319
9320/**
9321 * xmlCreateEntityParserCtxt:
9322 * @URL: the entity URL
9323 * @ID: the entity PUBLIC ID
9324 * @base: a posible base for the target URI
9325 *
9326 * Create a parser context for an external entity
9327 * Automatic support for ZLIB/Compress compressed document is provided
9328 * by default if found at compile-time.
9329 *
9330 * Returns the new parser context or NULL
9331 */
9332xmlParserCtxtPtr
9333xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9334 const xmlChar *base) {
9335 xmlParserCtxtPtr ctxt;
9336 xmlParserInputPtr inputStream;
9337 char *directory = NULL;
9338 xmlChar *uri;
9339
9340 ctxt = xmlNewParserCtxt();
9341 if (ctxt == NULL) {
9342 return(NULL);
9343 }
9344
9345 uri = xmlBuildURI(URL, base);
9346
9347 if (uri == NULL) {
9348 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9349 if (inputStream == NULL) {
9350 xmlFreeParserCtxt(ctxt);
9351 return(NULL);
9352 }
9353
9354 inputPush(ctxt, inputStream);
9355
9356 if ((ctxt->directory == NULL) && (directory == NULL))
9357 directory = xmlParserGetDirectory((char *)URL);
9358 if ((ctxt->directory == NULL) && (directory != NULL))
9359 ctxt->directory = directory;
9360 } else {
9361 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9362 if (inputStream == NULL) {
9363 xmlFree(uri);
9364 xmlFreeParserCtxt(ctxt);
9365 return(NULL);
9366 }
9367
9368 inputPush(ctxt, inputStream);
9369
9370 if ((ctxt->directory == NULL) && (directory == NULL))
9371 directory = xmlParserGetDirectory((char *)uri);
9372 if ((ctxt->directory == NULL) && (directory != NULL))
9373 ctxt->directory = directory;
9374 xmlFree(uri);
9375 }
9376
9377 return(ctxt);
9378}
9379
9380/************************************************************************
9381 * *
9382 * Front ends when parsing from a file *
9383 * *
9384 ************************************************************************/
9385
9386/**
9387 * xmlCreateFileParserCtxt:
9388 * @filename: the filename
9389 *
9390 * Create a parser context for a file content.
9391 * Automatic support for ZLIB/Compress compressed document is provided
9392 * by default if found at compile-time.
9393 *
9394 * Returns the new parser context or NULL
9395 */
9396xmlParserCtxtPtr
9397xmlCreateFileParserCtxt(const char *filename)
9398{
9399 xmlParserCtxtPtr ctxt;
9400 xmlParserInputPtr inputStream;
9401 xmlParserInputBufferPtr buf;
9402 char *directory = NULL;
9403
9404 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9405 if (buf == NULL) {
9406 return(NULL);
9407 }
9408
9409 ctxt = xmlNewParserCtxt();
9410 if (ctxt == NULL) {
9411 if (xmlDefaultSAXHandler.error != NULL) {
9412 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9413 }
9414 return(NULL);
9415 }
9416
9417 inputStream = xmlNewInputStream(ctxt);
9418 if (inputStream == NULL) {
9419 xmlFreeParserCtxt(ctxt);
9420 return(NULL);
9421 }
9422
9423 inputStream->filename = xmlMemStrdup(filename);
9424 inputStream->buf = buf;
9425 inputStream->base = inputStream->buf->buffer->content;
9426 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009427 inputStream->end =
9428 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009429
9430 inputPush(ctxt, inputStream);
9431 if ((ctxt->directory == NULL) && (directory == NULL))
9432 directory = xmlParserGetDirectory(filename);
9433 if ((ctxt->directory == NULL) && (directory != NULL))
9434 ctxt->directory = directory;
9435
9436 return(ctxt);
9437}
9438
9439/**
9440 * xmlSAXParseFile:
9441 * @sax: the SAX handler block
9442 * @filename: the filename
9443 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9444 * documents
9445 *
9446 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9447 * compressed document is provided by default if found at compile-time.
9448 * It use the given SAX function block to handle the parsing callback.
9449 * If sax is NULL, fallback to the default DOM tree building routines.
9450 *
9451 * Returns the resulting document tree
9452 */
9453
9454xmlDocPtr
9455xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9456 int recovery) {
9457 xmlDocPtr ret;
9458 xmlParserCtxtPtr ctxt;
9459 char *directory = NULL;
9460
9461 ctxt = xmlCreateFileParserCtxt(filename);
9462 if (ctxt == NULL) {
9463 return(NULL);
9464 }
9465 if (sax != NULL) {
9466 if (ctxt->sax != NULL)
9467 xmlFree(ctxt->sax);
9468 ctxt->sax = sax;
9469 ctxt->userData = NULL;
9470 }
9471
9472 if ((ctxt->directory == NULL) && (directory == NULL))
9473 directory = xmlParserGetDirectory(filename);
9474 if ((ctxt->directory == NULL) && (directory != NULL))
9475 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9476
9477 xmlParseDocument(ctxt);
9478
9479 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9480 else {
9481 ret = NULL;
9482 xmlFreeDoc(ctxt->myDoc);
9483 ctxt->myDoc = NULL;
9484 }
9485 if (sax != NULL)
9486 ctxt->sax = NULL;
9487 xmlFreeParserCtxt(ctxt);
9488
9489 return(ret);
9490}
9491
9492/**
9493 * xmlRecoverDoc:
9494 * @cur: a pointer to an array of xmlChar
9495 *
9496 * parse an XML in-memory document and build a tree.
9497 * In the case the document is not Well Formed, a tree is built anyway
9498 *
9499 * Returns the resulting document tree
9500 */
9501
9502xmlDocPtr
9503xmlRecoverDoc(xmlChar *cur) {
9504 return(xmlSAXParseDoc(NULL, cur, 1));
9505}
9506
9507/**
9508 * xmlParseFile:
9509 * @filename: the filename
9510 *
9511 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9512 * compressed document is provided by default if found at compile-time.
9513 *
9514 * Returns the resulting document tree
9515 */
9516
9517xmlDocPtr
9518xmlParseFile(const char *filename) {
9519 return(xmlSAXParseFile(NULL, filename, 0));
9520}
9521
9522/**
9523 * xmlRecoverFile:
9524 * @filename: the filename
9525 *
9526 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9527 * compressed document is provided by default if found at compile-time.
9528 * In the case the document is not Well Formed, a tree is built anyway
9529 *
9530 * Returns the resulting document tree
9531 */
9532
9533xmlDocPtr
9534xmlRecoverFile(const char *filename) {
9535 return(xmlSAXParseFile(NULL, filename, 1));
9536}
9537
9538
9539/**
9540 * xmlSetupParserForBuffer:
9541 * @ctxt: an XML parser context
9542 * @buffer: a xmlChar * buffer
9543 * @filename: a file name
9544 *
9545 * Setup the parser context to parse a new buffer; Clears any prior
9546 * contents from the parser context. The buffer parameter must not be
9547 * NULL, but the filename parameter can be
9548 */
9549void
9550xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9551 const char* filename)
9552{
9553 xmlParserInputPtr input;
9554
9555 input = xmlNewInputStream(ctxt);
9556 if (input == NULL) {
9557 perror("malloc");
9558 xmlFree(ctxt);
9559 return;
9560 }
9561
9562 xmlClearParserCtxt(ctxt);
9563 if (filename != NULL)
9564 input->filename = xmlMemStrdup(filename);
9565 input->base = buffer;
9566 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009567 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009568 inputPush(ctxt, input);
9569}
9570
9571/**
9572 * xmlSAXUserParseFile:
9573 * @sax: a SAX handler
9574 * @user_data: The user data returned on SAX callbacks
9575 * @filename: a file name
9576 *
9577 * parse an XML file and call the given SAX handler routines.
9578 * Automatic support for ZLIB/Compress compressed document is provided
9579 *
9580 * Returns 0 in case of success or a error number otherwise
9581 */
9582int
9583xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9584 const char *filename) {
9585 int ret = 0;
9586 xmlParserCtxtPtr ctxt;
9587
9588 ctxt = xmlCreateFileParserCtxt(filename);
9589 if (ctxt == NULL) return -1;
9590 if (ctxt->sax != &xmlDefaultSAXHandler)
9591 xmlFree(ctxt->sax);
9592 ctxt->sax = sax;
9593 if (user_data != NULL)
9594 ctxt->userData = user_data;
9595
9596 xmlParseDocument(ctxt);
9597
9598 if (ctxt->wellFormed)
9599 ret = 0;
9600 else {
9601 if (ctxt->errNo != 0)
9602 ret = ctxt->errNo;
9603 else
9604 ret = -1;
9605 }
9606 if (sax != NULL)
9607 ctxt->sax = NULL;
9608 xmlFreeParserCtxt(ctxt);
9609
9610 return ret;
9611}
9612
9613/************************************************************************
9614 * *
9615 * Front ends when parsing from memory *
9616 * *
9617 ************************************************************************/
9618
9619/**
9620 * xmlCreateMemoryParserCtxt:
9621 * @buffer: a pointer to a char array
9622 * @size: the size of the array
9623 *
9624 * Create a parser context for an XML in-memory document.
9625 *
9626 * Returns the new parser context or NULL
9627 */
9628xmlParserCtxtPtr
9629xmlCreateMemoryParserCtxt(char *buffer, int size) {
9630 xmlParserCtxtPtr ctxt;
9631 xmlParserInputPtr input;
9632 xmlParserInputBufferPtr buf;
9633
9634 if (buffer == NULL)
9635 return(NULL);
9636 if (size <= 0)
9637 return(NULL);
9638
9639 ctxt = xmlNewParserCtxt();
9640 if (ctxt == NULL)
9641 return(NULL);
9642
9643 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9644 if (buf == NULL) return(NULL);
9645
9646 input = xmlNewInputStream(ctxt);
9647 if (input == NULL) {
9648 xmlFreeParserCtxt(ctxt);
9649 return(NULL);
9650 }
9651
9652 input->filename = NULL;
9653 input->buf = buf;
9654 input->base = input->buf->buffer->content;
9655 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009656 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009657
9658 inputPush(ctxt, input);
9659 return(ctxt);
9660}
9661
9662/**
9663 * xmlSAXParseMemory:
9664 * @sax: the SAX handler block
9665 * @buffer: an pointer to a char array
9666 * @size: the size of the array
9667 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9668 * documents
9669 *
9670 * parse an XML in-memory block and use the given SAX function block
9671 * to handle the parsing callback. If sax is NULL, fallback to the default
9672 * DOM tree building routines.
9673 *
9674 * Returns the resulting document tree
9675 */
9676xmlDocPtr
9677xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9678 xmlDocPtr ret;
9679 xmlParserCtxtPtr ctxt;
9680
9681 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9682 if (ctxt == NULL) return(NULL);
9683 if (sax != NULL) {
9684 ctxt->sax = sax;
9685 ctxt->userData = NULL;
9686 }
9687
9688 xmlParseDocument(ctxt);
9689
9690 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9691 else {
9692 ret = NULL;
9693 xmlFreeDoc(ctxt->myDoc);
9694 ctxt->myDoc = NULL;
9695 }
9696 if (sax != NULL)
9697 ctxt->sax = NULL;
9698 xmlFreeParserCtxt(ctxt);
9699
9700 return(ret);
9701}
9702
9703/**
9704 * xmlParseMemory:
9705 * @buffer: an pointer to a char array
9706 * @size: the size of the array
9707 *
9708 * parse an XML in-memory block and build a tree.
9709 *
9710 * Returns the resulting document tree
9711 */
9712
9713xmlDocPtr xmlParseMemory(char *buffer, int size) {
9714 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9715}
9716
9717/**
9718 * xmlRecoverMemory:
9719 * @buffer: an pointer to a char array
9720 * @size: the size of the array
9721 *
9722 * parse an XML in-memory block and build a tree.
9723 * In the case the document is not Well Formed, a tree is built anyway
9724 *
9725 * Returns the resulting document tree
9726 */
9727
9728xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9729 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9730}
9731
9732/**
9733 * xmlSAXUserParseMemory:
9734 * @sax: a SAX handler
9735 * @user_data: The user data returned on SAX callbacks
9736 * @buffer: an in-memory XML document input
9737 * @size: the length of the XML document in bytes
9738 *
9739 * A better SAX parsing routine.
9740 * parse an XML in-memory buffer and call the given SAX handler routines.
9741 *
9742 * Returns 0 in case of success or a error number otherwise
9743 */
9744int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9745 char *buffer, int size) {
9746 int ret = 0;
9747 xmlParserCtxtPtr ctxt;
9748 xmlSAXHandlerPtr oldsax = NULL;
9749
9750 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9751 if (ctxt == NULL) return -1;
9752 if (sax != NULL) {
9753 oldsax = ctxt->sax;
9754 ctxt->sax = sax;
9755 }
9756 ctxt->userData = user_data;
9757
9758 xmlParseDocument(ctxt);
9759
9760 if (ctxt->wellFormed)
9761 ret = 0;
9762 else {
9763 if (ctxt->errNo != 0)
9764 ret = ctxt->errNo;
9765 else
9766 ret = -1;
9767 }
9768 if (sax != NULL) {
9769 ctxt->sax = oldsax;
9770 }
9771 xmlFreeParserCtxt(ctxt);
9772
9773 return ret;
9774}
9775
9776/**
9777 * xmlCreateDocParserCtxt:
9778 * @cur: a pointer to an array of xmlChar
9779 *
9780 * Creates a parser context for an XML in-memory document.
9781 *
9782 * Returns the new parser context or NULL
9783 */
9784xmlParserCtxtPtr
9785xmlCreateDocParserCtxt(xmlChar *cur) {
9786 int len;
9787
9788 if (cur == NULL)
9789 return(NULL);
9790 len = xmlStrlen(cur);
9791 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9792}
9793
9794/**
9795 * xmlSAXParseDoc:
9796 * @sax: the SAX handler block
9797 * @cur: a pointer to an array of xmlChar
9798 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9799 * documents
9800 *
9801 * parse an XML in-memory document and build a tree.
9802 * It use the given SAX function block to handle the parsing callback.
9803 * If sax is NULL, fallback to the default DOM tree building routines.
9804 *
9805 * Returns the resulting document tree
9806 */
9807
9808xmlDocPtr
9809xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9810 xmlDocPtr ret;
9811 xmlParserCtxtPtr ctxt;
9812
9813 if (cur == NULL) return(NULL);
9814
9815
9816 ctxt = xmlCreateDocParserCtxt(cur);
9817 if (ctxt == NULL) return(NULL);
9818 if (sax != NULL) {
9819 ctxt->sax = sax;
9820 ctxt->userData = NULL;
9821 }
9822
9823 xmlParseDocument(ctxt);
9824 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9825 else {
9826 ret = NULL;
9827 xmlFreeDoc(ctxt->myDoc);
9828 ctxt->myDoc = NULL;
9829 }
9830 if (sax != NULL)
9831 ctxt->sax = NULL;
9832 xmlFreeParserCtxt(ctxt);
9833
9834 return(ret);
9835}
9836
9837/**
9838 * xmlParseDoc:
9839 * @cur: a pointer to an array of xmlChar
9840 *
9841 * parse an XML in-memory document and build a tree.
9842 *
9843 * Returns the resulting document tree
9844 */
9845
9846xmlDocPtr
9847xmlParseDoc(xmlChar *cur) {
9848 return(xmlSAXParseDoc(NULL, cur, 0));
9849}
9850
9851
9852/************************************************************************
9853 * *
9854 * Miscellaneous *
9855 * *
9856 ************************************************************************/
9857
9858#ifdef LIBXML_XPATH_ENABLED
9859#include <libxml/xpath.h>
9860#endif
9861
9862static int xmlParserInitialized = 0;
9863
9864/**
9865 * xmlInitParser:
9866 *
9867 * Initialization function for the XML parser.
9868 * This is not reentrant. Call once before processing in case of
9869 * use in multithreaded programs.
9870 */
9871
9872void
9873xmlInitParser(void) {
9874 if (xmlParserInitialized) return;
9875
9876 xmlInitCharEncodingHandlers();
9877 xmlInitializePredefinedEntities();
9878 xmlDefaultSAXHandlerInit();
9879 xmlRegisterDefaultInputCallbacks();
9880 xmlRegisterDefaultOutputCallbacks();
9881#ifdef LIBXML_HTML_ENABLED
9882 htmlInitAutoClose();
9883 htmlDefaultSAXHandlerInit();
9884#endif
9885#ifdef LIBXML_XPATH_ENABLED
9886 xmlXPathInit();
9887#endif
9888 xmlParserInitialized = 1;
9889}
9890
9891/**
9892 * xmlCleanupParser:
9893 *
9894 * Cleanup function for the XML parser. It tries to reclaim all
9895 * parsing related global memory allocated for the parser processing.
9896 * It doesn't deallocate any document related memory. Calling this
9897 * function should not prevent reusing the parser.
9898 */
9899
9900void
9901xmlCleanupParser(void) {
9902 xmlParserInitialized = 0;
9903 xmlCleanupCharEncodingHandlers();
9904 xmlCleanupPredefinedEntities();
9905}
9906
9907/**
9908 * xmlPedanticParserDefault:
9909 * @val: int 0 or 1
9910 *
9911 * Set and return the previous value for enabling pedantic warnings.
9912 *
9913 * Returns the last value for 0 for no substitution, 1 for substitution.
9914 */
9915
9916int
9917xmlPedanticParserDefault(int val) {
9918 int old = xmlPedanticParserDefaultValue;
9919
9920 xmlPedanticParserDefaultValue = val;
9921 return(old);
9922}
9923
9924/**
9925 * xmlSubstituteEntitiesDefault:
9926 * @val: int 0 or 1
9927 *
9928 * Set and return the previous value for default entity support.
9929 * Initially the parser always keep entity references instead of substituting
9930 * entity values in the output. This function has to be used to change the
9931 * default parser behaviour
9932 * SAX::subtituteEntities() has to be used for changing that on a file by
9933 * file basis.
9934 *
9935 * Returns the last value for 0 for no substitution, 1 for substitution.
9936 */
9937
9938int
9939xmlSubstituteEntitiesDefault(int val) {
9940 int old = xmlSubstituteEntitiesDefaultValue;
9941
9942 xmlSubstituteEntitiesDefaultValue = val;
9943 return(old);
9944}
9945
9946/**
9947 * xmlKeepBlanksDefault:
9948 * @val: int 0 or 1
9949 *
9950 * Set and return the previous value for default blanks text nodes support.
9951 * The 1.x version of the parser used an heuristic to try to detect
9952 * ignorable white spaces. As a result the SAX callback was generating
9953 * ignorableWhitespace() callbacks instead of characters() one, and when
9954 * using the DOM output text nodes containing those blanks were not generated.
9955 * The 2.x and later version will switch to the XML standard way and
9956 * ignorableWhitespace() are only generated when running the parser in
9957 * validating mode and when the current element doesn't allow CDATA or
9958 * mixed content.
9959 * This function is provided as a way to force the standard behaviour
9960 * on 1.X libs and to switch back to the old mode for compatibility when
9961 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9962 * by using xmlIsBlankNode() commodity function to detect the "empty"
9963 * nodes generated.
9964 * This value also affect autogeneration of indentation when saving code
9965 * if blanks sections are kept, indentation is not generated.
9966 *
9967 * Returns the last value for 0 for no substitution, 1 for substitution.
9968 */
9969
9970int
9971xmlKeepBlanksDefault(int val) {
9972 int old = xmlKeepBlanksDefaultValue;
9973
9974 xmlKeepBlanksDefaultValue = val;
9975 xmlIndentTreeOutput = !val;
9976 return(old);
9977}
9978