blob: 707bf8a3a285e7b20e2f2c1fe4f612addf197d74 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
36#ifdef WIN32
37#include "win32config.h"
38#define XML_DIR_SEP '\\'
39#else
40#include "config.h"
41#define XML_DIR_SEP '/'
42#endif
43
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <libxml/xmlmemory.h>
48#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
57
58#ifdef HAVE_CTYPE_H
59#include <ctype.h>
60#endif
61#ifdef HAVE_STDLIB_H
62#include <stdlib.h>
63#endif
64#ifdef HAVE_SYS_STAT_H
65#include <sys/stat.h>
66#endif
67#ifdef HAVE_FCNTL_H
68#include <fcntl.h>
69#endif
70#ifdef HAVE_UNISTD_H
71#include <unistd.h>
72#endif
73#ifdef HAVE_ZLIB_H
74#include <zlib.h>
75#endif
76
77
Daniel Veillard21a0f912001-02-25 19:54:14 +000078#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000079#define XML_PARSER_BUFFER_SIZE 100
80
81/*
82 * Various global defaults for parsing
83 */
84int xmlGetWarningsDefaultValue = 1;
85int xmlParserDebugEntities = 0;
86#ifdef VMS
87int xmlSubstituteEntitiesDefaultVal = 0;
88#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
89int xmlDoValidityCheckingDefaultVal = 0;
90#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
91#else
92int xmlSubstituteEntitiesDefaultValue = 0;
93int xmlDoValidityCheckingDefaultValue = 0;
94#endif
95int xmlLoadExtDtdDefaultValue = 0;
96int xmlPedanticParserDefaultValue = 0;
97int xmlKeepBlanksDefaultValue = 1;
98
99/*
100 * List of XML prefixed PI allowed by W3C specs
101 */
102
103const char *xmlW3CPIs[] = {
104 "xml-stylesheet",
105 NULL
106};
107
108/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
109void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
113
114/************************************************************************
115 * *
116 * Parser stacks related functions and macros *
117 * *
118 ************************************************************************/
119
120xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
121 const xmlChar ** str);
122
123/*
124 * Generic function for accessing stacks in the Parser Context
125 */
126
127#define PUSH_AND_POP(scope, type, name) \
128scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
129 if (ctxt->name##Nr >= ctxt->name##Max) { \
130 ctxt->name##Max *= 2; \
131 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
132 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
133 if (ctxt->name##Tab == NULL) { \
134 xmlGenericError(xmlGenericErrorContext, \
135 "realloc failed !\n"); \
136 return(0); \
137 } \
138 } \
139 ctxt->name##Tab[ctxt->name##Nr] = value; \
140 ctxt->name = value; \
141 return(ctxt->name##Nr++); \
142} \
143scope type name##Pop(xmlParserCtxtPtr ctxt) { \
144 type ret; \
145 if (ctxt->name##Nr <= 0) return(0); \
146 ctxt->name##Nr--; \
147 if (ctxt->name##Nr > 0) \
148 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
149 else \
150 ctxt->name = NULL; \
151 ret = ctxt->name##Tab[ctxt->name##Nr]; \
152 ctxt->name##Tab[ctxt->name##Nr] = 0; \
153 return(ret); \
154} \
155
156/*
157 * Those macros actually generate the functions
158 */
159PUSH_AND_POP(extern, xmlParserInputPtr, input)
160PUSH_AND_POP(extern, xmlNodePtr, node)
161PUSH_AND_POP(extern, xmlChar*, name)
162
163int spacePush(xmlParserCtxtPtr ctxt, int val) {
164 if (ctxt->spaceNr >= ctxt->spaceMax) {
165 ctxt->spaceMax *= 2;
166 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
167 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
168 if (ctxt->spaceTab == NULL) {
169 xmlGenericError(xmlGenericErrorContext,
170 "realloc failed !\n");
171 return(0);
172 }
173 }
174 ctxt->spaceTab[ctxt->spaceNr] = val;
175 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
176 return(ctxt->spaceNr++);
177}
178
179int spacePop(xmlParserCtxtPtr ctxt) {
180 int ret;
181 if (ctxt->spaceNr <= 0) return(0);
182 ctxt->spaceNr--;
183 if (ctxt->spaceNr > 0)
184 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
185 else
186 ctxt->space = NULL;
187 ret = ctxt->spaceTab[ctxt->spaceNr];
188 ctxt->spaceTab[ctxt->spaceNr] = -1;
189 return(ret);
190}
191
192/*
193 * Macros for accessing the content. Those should be used only by the parser,
194 * and not exported.
195 *
196 * Dirty macros, i.e. one often need to make assumption on the context to
197 * use them
198 *
199 * CUR_PTR return the current pointer to the xmlChar to be parsed.
200 * To be used with extreme caution since operations consuming
201 * characters may move the input buffer to a different location !
202 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
203 * This should be used internally by the parser
204 * only to compare to ASCII values otherwise it would break when
205 * running with UTF-8 encoding.
206 * RAW same as CUR but in the input buffer, bypass any token
207 * extraction that may have been done
208 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
209 * to compare on ASCII based substring.
210 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
211 * strings within the parser.
212 *
213 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
214 *
215 * NEXT Skip to the next character, this does the proper decoding
216 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
217 * NEXTL(l) Skip l xmlChars in the input buffer
218 * CUR_CHAR(l) returns the current unicode character (int), set l
219 * to the number of xmlChars used for the encoding [0-5].
220 * CUR_SCHAR same but operate on a string instead of the context
221 * COPY_BUF copy the current unicode char to the target buffer, increment
222 * the index
223 * GROW, SHRINK handling of input buffers
224 */
225
226#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
227#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
228#define NXT(val) ctxt->input->cur[(val)]
229#define CUR_PTR ctxt->input->cur
230
231#define SKIP(val) do { \
232 ctxt->nbChars += (val),ctxt->input->cur += (val); \
233 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000234 if ((*ctxt->input->cur == 0) && \
235 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
236 xmlPopInput(ctxt); \
237 } while (0)
238
Daniel Veillard48b2f892001-02-25 16:11:03 +0000239#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000240 xmlParserInputShrink(ctxt->input); \
241 if ((*ctxt->input->cur == 0) && \
242 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
243 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000244 }
Owen Taylor3473f882001-02-23 17:55:21 +0000245
Daniel Veillard48b2f892001-02-25 16:11:03 +0000246#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000247 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
248 if ((*ctxt->input->cur == 0) && \
249 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
250 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000251 }
Owen Taylor3473f882001-02-23 17:55:21 +0000252
253#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
254
255#define NEXT xmlNextChar(ctxt)
256
Daniel Veillard21a0f912001-02-25 19:54:14 +0000257#define NEXT1 { \
258 ctxt->input->cur++; \
259 ctxt->nbChars++; \
260 if (*ctxt->input->cur == 0) \
261 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
262 }
263
Owen Taylor3473f882001-02-23 17:55:21 +0000264#define NEXTL(l) do { \
265 if (*(ctxt->input->cur) == '\n') { \
266 ctxt->input->line++; ctxt->input->col = 1; \
267 } else ctxt->input->col++; \
268 ctxt->token = 0; ctxt->input->cur += l; \
269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000270 } while (0)
271
272#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
273#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
274
275#define COPY_BUF(l,b,i,v) \
276 if (l == 1) b[i++] = (xmlChar) v; \
277 else i += xmlCopyChar(l,&b[i],v)
278
279/**
280 * xmlSkipBlankChars:
281 * @ctxt: the XML parser context
282 *
283 * skip all blanks character found at that point in the input streams.
284 * It pops up finished entities in the process if allowable at that point.
285 *
286 * Returns the number of space chars skipped
287 */
288
289int
290xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
291 int cur, res = 0;
292
293 /*
294 * It's Okay to use CUR/NEXT here since all the blanks are on
295 * the ASCII range.
296 */
297 do {
298 cur = CUR;
299 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
300 NEXT;
301 cur = CUR;
302 res++;
303 }
304 while ((cur == 0) && (ctxt->inputNr > 1) &&
305 (ctxt->instate != XML_PARSER_COMMENT)) {
306 xmlPopInput(ctxt);
307 cur = CUR;
308 }
309 /*
310 * Need to handle support of entities branching here
311 */
312 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
313 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
314 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
315 return(res);
316}
317
318/************************************************************************
319 * *
320 * Commodity functions to handle entities *
321 * *
322 ************************************************************************/
323
324/**
325 * xmlPopInput:
326 * @ctxt: an XML parser context
327 *
328 * xmlPopInput: the current input pointed by ctxt->input came to an end
329 * pop it and return the next char.
330 *
331 * Returns the current xmlChar in the parser context
332 */
333xmlChar
334xmlPopInput(xmlParserCtxtPtr ctxt) {
335 if (ctxt->inputNr == 1) return(0); /* End of main Input */
336 if (xmlParserDebugEntities)
337 xmlGenericError(xmlGenericErrorContext,
338 "Popping input %d\n", ctxt->inputNr);
339 xmlFreeInputStream(inputPop(ctxt));
340 if ((*ctxt->input->cur == 0) &&
341 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
342 return(xmlPopInput(ctxt));
343 return(CUR);
344}
345
346/**
347 * xmlPushInput:
348 * @ctxt: an XML parser context
349 * @input: an XML parser input fragment (entity, XML fragment ...).
350 *
351 * xmlPushInput: switch to a new input stream which is stacked on top
352 * of the previous one(s).
353 */
354void
355xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
356 if (input == NULL) return;
357
358 if (xmlParserDebugEntities) {
359 if ((ctxt->input != NULL) && (ctxt->input->filename))
360 xmlGenericError(xmlGenericErrorContext,
361 "%s(%d): ", ctxt->input->filename,
362 ctxt->input->line);
363 xmlGenericError(xmlGenericErrorContext,
364 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
365 }
366 inputPush(ctxt, input);
367 GROW;
368}
369
370/**
371 * xmlParseCharRef:
372 * @ctxt: an XML parser context
373 *
374 * parse Reference declarations
375 *
376 * [66] CharRef ::= '&#' [0-9]+ ';' |
377 * '&#x' [0-9a-fA-F]+ ';'
378 *
379 * [ WFC: Legal Character ]
380 * Characters referred to using character references must match the
381 * production for Char.
382 *
383 * Returns the value parsed (as an int), 0 in case of error
384 */
385int
386xmlParseCharRef(xmlParserCtxtPtr ctxt) {
387 int val = 0;
388 int count = 0;
389
390 if (ctxt->token != 0) {
391 val = ctxt->token;
392 ctxt->token = 0;
393 return(val);
394 }
395 /*
396 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
397 */
398 if ((RAW == '&') && (NXT(1) == '#') &&
399 (NXT(2) == 'x')) {
400 SKIP(3);
401 GROW;
402 while (RAW != ';') { /* loop blocked by count */
403 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
404 val = val * 16 + (CUR - '0');
405 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
406 val = val * 16 + (CUR - 'a') + 10;
407 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
408 val = val * 16 + (CUR - 'A') + 10;
409 else {
410 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
412 ctxt->sax->error(ctxt->userData,
413 "xmlParseCharRef: invalid hexadecimal value\n");
414 ctxt->wellFormed = 0;
415 ctxt->disableSAX = 1;
416 val = 0;
417 break;
418 }
419 NEXT;
420 count++;
421 }
422 if (RAW == ';') {
423 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
424 ctxt->nbChars ++;
425 ctxt->input->cur++;
426 }
427 } else if ((RAW == '&') && (NXT(1) == '#')) {
428 SKIP(2);
429 GROW;
430 while (RAW != ';') { /* loop blocked by count */
431 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
432 val = val * 10 + (CUR - '0');
433 else {
434 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
436 ctxt->sax->error(ctxt->userData,
437 "xmlParseCharRef: invalid decimal value\n");
438 ctxt->wellFormed = 0;
439 ctxt->disableSAX = 1;
440 val = 0;
441 break;
442 }
443 NEXT;
444 count++;
445 }
446 if (RAW == ';') {
447 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
448 ctxt->nbChars ++;
449 ctxt->input->cur++;
450 }
451 } else {
452 ctxt->errNo = XML_ERR_INVALID_CHARREF;
453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
454 ctxt->sax->error(ctxt->userData,
455 "xmlParseCharRef: invalid value\n");
456 ctxt->wellFormed = 0;
457 ctxt->disableSAX = 1;
458 }
459
460 /*
461 * [ WFC: Legal Character ]
462 * Characters referred to using character references must match the
463 * production for Char.
464 */
465 if (IS_CHAR(val)) {
466 return(val);
467 } else {
468 ctxt->errNo = XML_ERR_INVALID_CHAR;
469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
470 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
471 val);
472 ctxt->wellFormed = 0;
473 ctxt->disableSAX = 1;
474 }
475 return(0);
476}
477
478/**
479 * xmlParseStringCharRef:
480 * @ctxt: an XML parser context
481 * @str: a pointer to an index in the string
482 *
483 * parse Reference declarations, variant parsing from a string rather
484 * than an an input flow.
485 *
486 * [66] CharRef ::= '&#' [0-9]+ ';' |
487 * '&#x' [0-9a-fA-F]+ ';'
488 *
489 * [ WFC: Legal Character ]
490 * Characters referred to using character references must match the
491 * production for Char.
492 *
493 * Returns the value parsed (as an int), 0 in case of error, str will be
494 * updated to the current value of the index
495 */
496int
497xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
498 const xmlChar *ptr;
499 xmlChar cur;
500 int val = 0;
501
502 if ((str == NULL) || (*str == NULL)) return(0);
503 ptr = *str;
504 cur = *ptr;
505 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
506 ptr += 3;
507 cur = *ptr;
508 while (cur != ';') { /* Non input consuming loop */
509 if ((cur >= '0') && (cur <= '9'))
510 val = val * 16 + (cur - '0');
511 else if ((cur >= 'a') && (cur <= 'f'))
512 val = val * 16 + (cur - 'a') + 10;
513 else if ((cur >= 'A') && (cur <= 'F'))
514 val = val * 16 + (cur - 'A') + 10;
515 else {
516 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
518 ctxt->sax->error(ctxt->userData,
519 "xmlParseStringCharRef: invalid hexadecimal value\n");
520 ctxt->wellFormed = 0;
521 ctxt->disableSAX = 1;
522 val = 0;
523 break;
524 }
525 ptr++;
526 cur = *ptr;
527 }
528 if (cur == ';')
529 ptr++;
530 } else if ((cur == '&') && (ptr[1] == '#')){
531 ptr += 2;
532 cur = *ptr;
533 while (cur != ';') { /* Non input consuming loops */
534 if ((cur >= '0') && (cur <= '9'))
535 val = val * 10 + (cur - '0');
536 else {
537 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
539 ctxt->sax->error(ctxt->userData,
540 "xmlParseStringCharRef: invalid decimal value\n");
541 ctxt->wellFormed = 0;
542 ctxt->disableSAX = 1;
543 val = 0;
544 break;
545 }
546 ptr++;
547 cur = *ptr;
548 }
549 if (cur == ';')
550 ptr++;
551 } else {
552 ctxt->errNo = XML_ERR_INVALID_CHARREF;
553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
554 ctxt->sax->error(ctxt->userData,
555 "xmlParseCharRef: invalid value\n");
556 ctxt->wellFormed = 0;
557 ctxt->disableSAX = 1;
558 return(0);
559 }
560 *str = ptr;
561
562 /*
563 * [ WFC: Legal Character ]
564 * Characters referred to using character references must match the
565 * production for Char.
566 */
567 if (IS_CHAR(val)) {
568 return(val);
569 } else {
570 ctxt->errNo = XML_ERR_INVALID_CHAR;
571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
572 ctxt->sax->error(ctxt->userData,
573 "CharRef: invalid xmlChar value %d\n", val);
574 ctxt->wellFormed = 0;
575 ctxt->disableSAX = 1;
576 }
577 return(0);
578}
579
580/**
581 * xmlParserHandlePEReference:
582 * @ctxt: the parser context
583 *
584 * [69] PEReference ::= '%' Name ';'
585 *
586 * [ WFC: No Recursion ]
587 * A parsed entity must not contain a recursive
588 * reference to itself, either directly or indirectly.
589 *
590 * [ WFC: Entity Declared ]
591 * In a document without any DTD, a document with only an internal DTD
592 * subset which contains no parameter entity references, or a document
593 * with "standalone='yes'", ... ... The declaration of a parameter
594 * entity must precede any reference to it...
595 *
596 * [ VC: Entity Declared ]
597 * In a document with an external subset or external parameter entities
598 * with "standalone='no'", ... ... The declaration of a parameter entity
599 * must precede any reference to it...
600 *
601 * [ WFC: In DTD ]
602 * Parameter-entity references may only appear in the DTD.
603 * NOTE: misleading but this is handled.
604 *
605 * A PEReference may have been detected in the current input stream
606 * the handling is done accordingly to
607 * http://www.w3.org/TR/REC-xml#entproc
608 * i.e.
609 * - Included in literal in entity values
610 * - Included as Paraemeter Entity reference within DTDs
611 */
612void
613xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
614 xmlChar *name;
615 xmlEntityPtr entity = NULL;
616 xmlParserInputPtr input;
617
618 if (ctxt->token != 0) {
619 return;
620 }
621 if (RAW != '%') return;
622 switch(ctxt->instate) {
623 case XML_PARSER_CDATA_SECTION:
624 return;
625 case XML_PARSER_COMMENT:
626 return;
627 case XML_PARSER_START_TAG:
628 return;
629 case XML_PARSER_END_TAG:
630 return;
631 case XML_PARSER_EOF:
632 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
634 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
635 ctxt->wellFormed = 0;
636 ctxt->disableSAX = 1;
637 return;
638 case XML_PARSER_PROLOG:
639 case XML_PARSER_START:
640 case XML_PARSER_MISC:
641 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
643 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
644 ctxt->wellFormed = 0;
645 ctxt->disableSAX = 1;
646 return;
647 case XML_PARSER_ENTITY_DECL:
648 case XML_PARSER_CONTENT:
649 case XML_PARSER_ATTRIBUTE_VALUE:
650 case XML_PARSER_PI:
651 case XML_PARSER_SYSTEM_LITERAL:
652 /* we just ignore it there */
653 return;
654 case XML_PARSER_EPILOG:
655 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
657 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
658 ctxt->wellFormed = 0;
659 ctxt->disableSAX = 1;
660 return;
661 case XML_PARSER_ENTITY_VALUE:
662 /*
663 * NOTE: in the case of entity values, we don't do the
664 * substitution here since we need the literal
665 * entity value to be able to save the internal
666 * subset of the document.
667 * This will be handled by xmlStringDecodeEntities
668 */
669 return;
670 case XML_PARSER_DTD:
671 /*
672 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
673 * In the internal DTD subset, parameter-entity references
674 * can occur only where markup declarations can occur, not
675 * within markup declarations.
676 * In that case this is handled in xmlParseMarkupDecl
677 */
678 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
679 return;
680 break;
681 case XML_PARSER_IGNORE:
682 return;
683 }
684
685 NEXT;
686 name = xmlParseName(ctxt);
687 if (xmlParserDebugEntities)
688 xmlGenericError(xmlGenericErrorContext,
689 "PE Reference: %s\n", name);
690 if (name == NULL) {
691 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
693 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
694 ctxt->wellFormed = 0;
695 ctxt->disableSAX = 1;
696 } else {
697 if (RAW == ';') {
698 NEXT;
699 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
700 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
701 if (entity == NULL) {
702
703 /*
704 * [ WFC: Entity Declared ]
705 * In a document without any DTD, a document with only an
706 * internal DTD subset which contains no parameter entity
707 * references, or a document with "standalone='yes'", ...
708 * ... The declaration of a parameter entity must precede
709 * any reference to it...
710 */
711 if ((ctxt->standalone == 1) ||
712 ((ctxt->hasExternalSubset == 0) &&
713 (ctxt->hasPErefs == 0))) {
714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
715 ctxt->sax->error(ctxt->userData,
716 "PEReference: %%%s; not found\n", name);
717 ctxt->wellFormed = 0;
718 ctxt->disableSAX = 1;
719 } else {
720 /*
721 * [ VC: Entity Declared ]
722 * In a document with an external subset or external
723 * parameter entities with "standalone='no'", ...
724 * ... The declaration of a parameter entity must precede
725 * any reference to it...
726 */
727 if ((!ctxt->disableSAX) &&
728 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
729 ctxt->vctxt.error(ctxt->vctxt.userData,
730 "PEReference: %%%s; not found\n", name);
731 } else if ((!ctxt->disableSAX) &&
732 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
733 ctxt->sax->warning(ctxt->userData,
734 "PEReference: %%%s; not found\n", name);
735 ctxt->valid = 0;
736 }
737 } else {
738 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
739 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
740 /*
741 * handle the extra spaces added before and after
742 * c.f. http://www.w3.org/TR/REC-xml#as-PE
743 * this is done independantly.
744 */
745 input = xmlNewEntityInputStream(ctxt, entity);
746 xmlPushInput(ctxt, input);
747 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
748 (RAW == '<') && (NXT(1) == '?') &&
749 (NXT(2) == 'x') && (NXT(3) == 'm') &&
750 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
751 xmlParseTextDecl(ctxt);
752 }
753 if (ctxt->token == 0)
754 ctxt->token = ' ';
755 } else {
756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
757 ctxt->sax->error(ctxt->userData,
758 "xmlHandlePEReference: %s is not a parameter entity\n",
759 name);
760 ctxt->wellFormed = 0;
761 ctxt->disableSAX = 1;
762 }
763 }
764 } else {
765 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
767 ctxt->sax->error(ctxt->userData,
768 "xmlHandlePEReference: expecting ';'\n");
769 ctxt->wellFormed = 0;
770 ctxt->disableSAX = 1;
771 }
772 xmlFree(name);
773 }
774}
775
776/*
777 * Macro used to grow the current buffer.
778 */
779#define growBuffer(buffer) { \
780 buffer##_size *= 2; \
781 buffer = (xmlChar *) \
782 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
783 if (buffer == NULL) { \
784 perror("realloc failed"); \
785 return(NULL); \
786 } \
787}
788
789/**
790 * xmlStringDecodeEntities:
791 * @ctxt: the parser context
792 * @str: the input string
793 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
794 * @end: an end marker xmlChar, 0 if none
795 * @end2: an end marker xmlChar, 0 if none
796 * @end3: an end marker xmlChar, 0 if none
797 *
798 * Takes a entity string content and process to do the adequate subtitutions.
799 *
800 * [67] Reference ::= EntityRef | CharRef
801 *
802 * [69] PEReference ::= '%' Name ';'
803 *
804 * Returns A newly allocated string with the substitution done. The caller
805 * must deallocate it !
806 */
807xmlChar *
808xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
809 xmlChar end, xmlChar end2, xmlChar end3) {
810 xmlChar *buffer = NULL;
811 int buffer_size = 0;
812
813 xmlChar *current = NULL;
814 xmlEntityPtr ent;
815 int c,l;
816 int nbchars = 0;
817
818 if (str == NULL)
819 return(NULL);
820
821 if (ctxt->depth > 40) {
822 ctxt->errNo = XML_ERR_ENTITY_LOOP;
823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
824 ctxt->sax->error(ctxt->userData,
825 "Detected entity reference loop\n");
826 ctxt->wellFormed = 0;
827 ctxt->disableSAX = 1;
828 return(NULL);
829 }
830
831 /*
832 * allocate a translation buffer.
833 */
834 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
835 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
836 if (buffer == NULL) {
837 perror("xmlDecodeEntities: malloc failed");
838 return(NULL);
839 }
840
841 /*
842 * Ok loop until we reach one of the ending char or a size limit.
843 * we are operating on already parsed values.
844 */
845 c = CUR_SCHAR(str, l);
846 while ((c != 0) && (c != end) && /* non input consuming loop */
847 (c != end2) && (c != end3)) {
848
849 if (c == 0) break;
850 if ((c == '&') && (str[1] == '#')) {
851 int val = xmlParseStringCharRef(ctxt, &str);
852 if (val != 0) {
853 COPY_BUF(0,buffer,nbchars,val);
854 }
855 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
856 if (xmlParserDebugEntities)
857 xmlGenericError(xmlGenericErrorContext,
858 "String decoding Entity Reference: %.30s\n",
859 str);
860 ent = xmlParseStringEntityRef(ctxt, &str);
861 if ((ent != NULL) &&
862 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
863 if (ent->content != NULL) {
864 COPY_BUF(0,buffer,nbchars,ent->content[0]);
865 } else {
866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
867 ctxt->sax->error(ctxt->userData,
868 "internal error entity has no content\n");
869 }
870 } else if ((ent != NULL) && (ent->content != NULL)) {
871 xmlChar *rep;
872
873 ctxt->depth++;
874 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
875 0, 0, 0);
876 ctxt->depth--;
877 if (rep != NULL) {
878 current = rep;
879 while (*current != 0) { /* non input consuming loop */
880 buffer[nbchars++] = *current++;
881 if (nbchars >
882 buffer_size - XML_PARSER_BUFFER_SIZE) {
883 growBuffer(buffer);
884 }
885 }
886 xmlFree(rep);
887 }
888 } else if (ent != NULL) {
889 int i = xmlStrlen(ent->name);
890 const xmlChar *cur = ent->name;
891
892 buffer[nbchars++] = '&';
893 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
894 growBuffer(buffer);
895 }
896 for (;i > 0;i--)
897 buffer[nbchars++] = *cur++;
898 buffer[nbchars++] = ';';
899 }
900 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
901 if (xmlParserDebugEntities)
902 xmlGenericError(xmlGenericErrorContext,
903 "String decoding PE Reference: %.30s\n", str);
904 ent = xmlParseStringPEReference(ctxt, &str);
905 if (ent != NULL) {
906 xmlChar *rep;
907
908 ctxt->depth++;
909 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
910 0, 0, 0);
911 ctxt->depth--;
912 if (rep != NULL) {
913 current = rep;
914 while (*current != 0) { /* non input consuming loop */
915 buffer[nbchars++] = *current++;
916 if (nbchars >
917 buffer_size - XML_PARSER_BUFFER_SIZE) {
918 growBuffer(buffer);
919 }
920 }
921 xmlFree(rep);
922 }
923 }
924 } else {
925 COPY_BUF(l,buffer,nbchars,c);
926 str += l;
927 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
928 growBuffer(buffer);
929 }
930 }
931 c = CUR_SCHAR(str, l);
932 }
933 buffer[nbchars++] = 0;
934 return(buffer);
935}
936
937
938/************************************************************************
939 * *
940 * Commodity functions to handle xmlChars *
941 * *
942 ************************************************************************/
943
944/**
945 * xmlStrndup:
946 * @cur: the input xmlChar *
947 * @len: the len of @cur
948 *
949 * a strndup for array of xmlChar's
950 *
951 * Returns a new xmlChar * or NULL
952 */
953xmlChar *
954xmlStrndup(const xmlChar *cur, int len) {
955 xmlChar *ret;
956
957 if ((cur == NULL) || (len < 0)) return(NULL);
958 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
959 if (ret == NULL) {
960 xmlGenericError(xmlGenericErrorContext,
961 "malloc of %ld byte failed\n",
962 (len + 1) * (long)sizeof(xmlChar));
963 return(NULL);
964 }
965 memcpy(ret, cur, len * sizeof(xmlChar));
966 ret[len] = 0;
967 return(ret);
968}
969
970/**
971 * xmlStrdup:
972 * @cur: the input xmlChar *
973 *
974 * a strdup for array of xmlChar's. Since they are supposed to be
975 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
976 * a termination mark of '0'.
977 *
978 * Returns a new xmlChar * or NULL
979 */
980xmlChar *
981xmlStrdup(const xmlChar *cur) {
982 const xmlChar *p = cur;
983
984 if (cur == NULL) return(NULL);
985 while (*p != 0) p++; /* non input consuming */
986 return(xmlStrndup(cur, p - cur));
987}
988
989/**
990 * xmlCharStrndup:
991 * @cur: the input char *
992 * @len: the len of @cur
993 *
994 * a strndup for char's to xmlChar's
995 *
996 * Returns a new xmlChar * or NULL
997 */
998
999xmlChar *
1000xmlCharStrndup(const char *cur, int len) {
1001 int i;
1002 xmlChar *ret;
1003
1004 if ((cur == NULL) || (len < 0)) return(NULL);
1005 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1006 if (ret == NULL) {
1007 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1008 (len + 1) * (long)sizeof(xmlChar));
1009 return(NULL);
1010 }
1011 for (i = 0;i < len;i++)
1012 ret[i] = (xmlChar) cur[i];
1013 ret[len] = 0;
1014 return(ret);
1015}
1016
1017/**
1018 * xmlCharStrdup:
1019 * @cur: the input char *
1020 * @len: the len of @cur
1021 *
1022 * a strdup for char's to xmlChar's
1023 *
1024 * Returns a new xmlChar * or NULL
1025 */
1026
1027xmlChar *
1028xmlCharStrdup(const char *cur) {
1029 const char *p = cur;
1030
1031 if (cur == NULL) return(NULL);
1032 while (*p != '\0') p++; /* non input consuming */
1033 return(xmlCharStrndup(cur, p - cur));
1034}
1035
1036/**
1037 * xmlStrcmp:
1038 * @str1: the first xmlChar *
1039 * @str2: the second xmlChar *
1040 *
1041 * a strcmp for xmlChar's
1042 *
1043 * Returns the integer result of the comparison
1044 */
1045
1046int
1047xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1048 register int tmp;
1049
1050 if (str1 == str2) return(0);
1051 if (str1 == NULL) return(-1);
1052 if (str2 == NULL) return(1);
1053 do {
1054 tmp = *str1++ - *str2;
1055 if (tmp != 0) return(tmp);
1056 } while (*str2++ != 0);
1057 return 0;
1058}
1059
1060/**
1061 * xmlStrEqual:
1062 * @str1: the first xmlChar *
1063 * @str2: the second xmlChar *
1064 *
1065 * Check if both string are equal of have same content
1066 * Should be a bit more readable and faster than xmlStrEqual()
1067 *
1068 * Returns 1 if they are equal, 0 if they are different
1069 */
1070
1071int
1072xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1073 if (str1 == str2) return(1);
1074 if (str1 == NULL) return(0);
1075 if (str2 == NULL) return(0);
1076 do {
1077 if (*str1++ != *str2) return(0);
1078 } while (*str2++);
1079 return(1);
1080}
1081
1082/**
1083 * xmlStrncmp:
1084 * @str1: the first xmlChar *
1085 * @str2: the second xmlChar *
1086 * @len: the max comparison length
1087 *
1088 * a strncmp for xmlChar's
1089 *
1090 * Returns the integer result of the comparison
1091 */
1092
1093int
1094xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1095 register int tmp;
1096
1097 if (len <= 0) return(0);
1098 if (str1 == str2) return(0);
1099 if (str1 == NULL) return(-1);
1100 if (str2 == NULL) return(1);
1101 do {
1102 tmp = *str1++ - *str2;
1103 if (tmp != 0 || --len == 0) return(tmp);
1104 } while (*str2++ != 0);
1105 return 0;
1106}
1107
1108static xmlChar casemap[256] = {
1109 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1110 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1111 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1112 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1113 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1114 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1115 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1116 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1117 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1118 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1119 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1120 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1121 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1122 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1123 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1124 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1125 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1126 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1127 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1128 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1129 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1130 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1131 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1132 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1133 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1134 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1135 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1136 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1137 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1138 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1139 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1140 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1141};
1142
1143/**
1144 * xmlStrcasecmp:
1145 * @str1: the first xmlChar *
1146 * @str2: the second xmlChar *
1147 *
1148 * a strcasecmp for xmlChar's
1149 *
1150 * Returns the integer result of the comparison
1151 */
1152
1153int
1154xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1155 register int tmp;
1156
1157 if (str1 == str2) return(0);
1158 if (str1 == NULL) return(-1);
1159 if (str2 == NULL) return(1);
1160 do {
1161 tmp = casemap[*str1++] - casemap[*str2];
1162 if (tmp != 0) return(tmp);
1163 } while (*str2++ != 0);
1164 return 0;
1165}
1166
1167/**
1168 * xmlStrncasecmp:
1169 * @str1: the first xmlChar *
1170 * @str2: the second xmlChar *
1171 * @len: the max comparison length
1172 *
1173 * a strncasecmp for xmlChar's
1174 *
1175 * Returns the integer result of the comparison
1176 */
1177
1178int
1179xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1180 register int tmp;
1181
1182 if (len <= 0) return(0);
1183 if (str1 == str2) return(0);
1184 if (str1 == NULL) return(-1);
1185 if (str2 == NULL) return(1);
1186 do {
1187 tmp = casemap[*str1++] - casemap[*str2];
1188 if (tmp != 0 || --len == 0) return(tmp);
1189 } while (*str2++ != 0);
1190 return 0;
1191}
1192
1193/**
1194 * xmlStrchr:
1195 * @str: the xmlChar * array
1196 * @val: the xmlChar to search
1197 *
1198 * a strchr for xmlChar's
1199 *
1200 * Returns the xmlChar * for the first occurence or NULL.
1201 */
1202
1203const xmlChar *
1204xmlStrchr(const xmlChar *str, xmlChar val) {
1205 if (str == NULL) return(NULL);
1206 while (*str != 0) { /* non input consuming */
1207 if (*str == val) return((xmlChar *) str);
1208 str++;
1209 }
1210 return(NULL);
1211}
1212
1213/**
1214 * xmlStrstr:
1215 * @str: the xmlChar * array (haystack)
1216 * @val: the xmlChar to search (needle)
1217 *
1218 * a strstr for xmlChar's
1219 *
1220 * Returns the xmlChar * for the first occurence or NULL.
1221 */
1222
1223const xmlChar *
1224xmlStrstr(const xmlChar *str, xmlChar *val) {
1225 int n;
1226
1227 if (str == NULL) return(NULL);
1228 if (val == NULL) return(NULL);
1229 n = xmlStrlen(val);
1230
1231 if (n == 0) return(str);
1232 while (*str != 0) { /* non input consuming */
1233 if (*str == *val) {
1234 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1235 }
1236 str++;
1237 }
1238 return(NULL);
1239}
1240
1241/**
1242 * xmlStrcasestr:
1243 * @str: the xmlChar * array (haystack)
1244 * @val: the xmlChar to search (needle)
1245 *
1246 * a case-ignoring strstr for xmlChar's
1247 *
1248 * Returns the xmlChar * for the first occurence or NULL.
1249 */
1250
1251const xmlChar *
1252xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1253 int n;
1254
1255 if (str == NULL) return(NULL);
1256 if (val == NULL) return(NULL);
1257 n = xmlStrlen(val);
1258
1259 if (n == 0) return(str);
1260 while (*str != 0) { /* non input consuming */
1261 if (casemap[*str] == casemap[*val])
1262 if (!xmlStrncasecmp(str, val, n)) return(str);
1263 str++;
1264 }
1265 return(NULL);
1266}
1267
1268/**
1269 * xmlStrsub:
1270 * @str: the xmlChar * array (haystack)
1271 * @start: the index of the first char (zero based)
1272 * @len: the length of the substring
1273 *
1274 * Extract a substring of a given string
1275 *
1276 * Returns the xmlChar * for the first occurence or NULL.
1277 */
1278
1279xmlChar *
1280xmlStrsub(const xmlChar *str, int start, int len) {
1281 int i;
1282
1283 if (str == NULL) return(NULL);
1284 if (start < 0) return(NULL);
1285 if (len < 0) return(NULL);
1286
1287 for (i = 0;i < start;i++) {
1288 if (*str == 0) return(NULL);
1289 str++;
1290 }
1291 if (*str == 0) return(NULL);
1292 return(xmlStrndup(str, len));
1293}
1294
1295/**
1296 * xmlStrlen:
1297 * @str: the xmlChar * array
1298 *
1299 * length of a xmlChar's string
1300 *
1301 * Returns the number of xmlChar contained in the ARRAY.
1302 */
1303
1304int
1305xmlStrlen(const xmlChar *str) {
1306 int len = 0;
1307
1308 if (str == NULL) return(0);
1309 while (*str != 0) { /* non input consuming */
1310 str++;
1311 len++;
1312 }
1313 return(len);
1314}
1315
1316/**
1317 * xmlStrncat:
1318 * @cur: the original xmlChar * array
1319 * @add: the xmlChar * array added
1320 * @len: the length of @add
1321 *
1322 * a strncat for array of xmlChar's, it will extend cur with the len
1323 * first bytes of @add.
1324 *
1325 * Returns a new xmlChar *, the original @cur is reallocated if needed
1326 * and should not be freed
1327 */
1328
1329xmlChar *
1330xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1331 int size;
1332 xmlChar *ret;
1333
1334 if ((add == NULL) || (len == 0))
1335 return(cur);
1336 if (cur == NULL)
1337 return(xmlStrndup(add, len));
1338
1339 size = xmlStrlen(cur);
1340 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1341 if (ret == NULL) {
1342 xmlGenericError(xmlGenericErrorContext,
1343 "xmlStrncat: realloc of %ld byte failed\n",
1344 (size + len + 1) * (long)sizeof(xmlChar));
1345 return(cur);
1346 }
1347 memcpy(&ret[size], add, len * sizeof(xmlChar));
1348 ret[size + len] = 0;
1349 return(ret);
1350}
1351
1352/**
1353 * xmlStrcat:
1354 * @cur: the original xmlChar * array
1355 * @add: the xmlChar * array added
1356 *
1357 * a strcat for array of xmlChar's. Since they are supposed to be
1358 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1359 * a termination mark of '0'.
1360 *
1361 * Returns a new xmlChar * containing the concatenated string.
1362 */
1363xmlChar *
1364xmlStrcat(xmlChar *cur, const xmlChar *add) {
1365 const xmlChar *p = add;
1366
1367 if (add == NULL) return(cur);
1368 if (cur == NULL)
1369 return(xmlStrdup(add));
1370
1371 while (*p != 0) p++; /* non input consuming */
1372 return(xmlStrncat(cur, add, p - add));
1373}
1374
1375/************************************************************************
1376 * *
1377 * Commodity functions, cleanup needed ? *
1378 * *
1379 ************************************************************************/
1380
1381/**
1382 * areBlanks:
1383 * @ctxt: an XML parser context
1384 * @str: a xmlChar *
1385 * @len: the size of @str
1386 *
1387 * Is this a sequence of blank chars that one can ignore ?
1388 *
1389 * Returns 1 if ignorable 0 otherwise.
1390 */
1391
1392static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1393 int i, ret;
1394 xmlNodePtr lastChild;
1395
Daniel Veillard2f362242001-03-02 17:36:21 +00001396 if (ctxt->keepBlanks)
1397 return(0);
1398
Owen Taylor3473f882001-02-23 17:55:21 +00001399 /*
1400 * Check for xml:space value.
1401 */
1402 if (*(ctxt->space) == 1)
1403 return(0);
1404
1405 /*
1406 * Check that the string is made of blanks
1407 */
1408 for (i = 0;i < len;i++)
1409 if (!(IS_BLANK(str[i]))) return(0);
1410
1411 /*
1412 * Look if the element is mixed content in the Dtd if available
1413 */
1414 if (ctxt->myDoc != NULL) {
1415 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1416 if (ret == 0) return(1);
1417 if (ret == 1) return(0);
1418 }
1419
1420 /*
1421 * Otherwise, heuristic :-\
1422 */
Owen Taylor3473f882001-02-23 17:55:21 +00001423 if (RAW != '<') return(0);
1424 if (ctxt->node == NULL) return(0);
1425 if ((ctxt->node->children == NULL) &&
1426 (RAW == '<') && (NXT(1) == '/')) return(0);
1427
1428 lastChild = xmlGetLastChild(ctxt->node);
1429 if (lastChild == NULL) {
1430 if (ctxt->node->content != NULL) return(0);
1431 } else if (xmlNodeIsText(lastChild))
1432 return(0);
1433 else if ((ctxt->node->children != NULL) &&
1434 (xmlNodeIsText(ctxt->node->children)))
1435 return(0);
1436 return(1);
1437}
1438
1439/*
1440 * Forward definition for recusive behaviour.
1441 */
1442void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1443void xmlParseReference(xmlParserCtxtPtr ctxt);
1444
1445/************************************************************************
1446 * *
1447 * Extra stuff for namespace support *
1448 * Relates to http://www.w3.org/TR/WD-xml-names *
1449 * *
1450 ************************************************************************/
1451
1452/**
1453 * xmlSplitQName:
1454 * @ctxt: an XML parser context
1455 * @name: an XML parser context
1456 * @prefix: a xmlChar **
1457 *
1458 * parse an UTF8 encoded XML qualified name string
1459 *
1460 * [NS 5] QName ::= (Prefix ':')? LocalPart
1461 *
1462 * [NS 6] Prefix ::= NCName
1463 *
1464 * [NS 7] LocalPart ::= NCName
1465 *
1466 * Returns the local part, and prefix is updated
1467 * to get the Prefix if any.
1468 */
1469
1470xmlChar *
1471xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1472 xmlChar buf[XML_MAX_NAMELEN + 5];
1473 xmlChar *buffer = NULL;
1474 int len = 0;
1475 int max = XML_MAX_NAMELEN;
1476 xmlChar *ret = NULL;
1477 const xmlChar *cur = name;
1478 int c;
1479
1480 *prefix = NULL;
1481
1482#ifndef XML_XML_NAMESPACE
1483 /* xml: prefix is not really a namespace */
1484 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1485 (cur[2] == 'l') && (cur[3] == ':'))
1486 return(xmlStrdup(name));
1487#endif
1488
1489 /* nasty but valid */
1490 if (cur[0] == ':')
1491 return(xmlStrdup(name));
1492
1493 c = *cur++;
1494 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1495 buf[len++] = c;
1496 c = *cur++;
1497 }
1498 if (len >= max) {
1499 /*
1500 * Okay someone managed to make a huge name, so he's ready to pay
1501 * for the processing speed.
1502 */
1503 max = len * 2;
1504
1505 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1506 if (buffer == NULL) {
1507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1508 ctxt->sax->error(ctxt->userData,
1509 "xmlSplitQName: out of memory\n");
1510 return(NULL);
1511 }
1512 memcpy(buffer, buf, len);
1513 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1514 if (len + 10 > max) {
1515 max *= 2;
1516 buffer = (xmlChar *) xmlRealloc(buffer,
1517 max * sizeof(xmlChar));
1518 if (buffer == NULL) {
1519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1520 ctxt->sax->error(ctxt->userData,
1521 "xmlSplitQName: out of memory\n");
1522 return(NULL);
1523 }
1524 }
1525 buffer[len++] = c;
1526 c = *cur++;
1527 }
1528 buffer[len] = 0;
1529 }
1530
1531 if (buffer == NULL)
1532 ret = xmlStrndup(buf, len);
1533 else {
1534 ret = buffer;
1535 buffer = NULL;
1536 max = XML_MAX_NAMELEN;
1537 }
1538
1539
1540 if (c == ':') {
1541 c = *cur++;
1542 if (c == 0) return(ret);
1543 *prefix = ret;
1544 len = 0;
1545
1546 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1547 buf[len++] = c;
1548 c = *cur++;
1549 }
1550 if (len >= max) {
1551 /*
1552 * Okay someone managed to make a huge name, so he's ready to pay
1553 * for the processing speed.
1554 */
1555 max = len * 2;
1556
1557 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1558 if (buffer == NULL) {
1559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1560 ctxt->sax->error(ctxt->userData,
1561 "xmlSplitQName: out of memory\n");
1562 return(NULL);
1563 }
1564 memcpy(buffer, buf, len);
1565 while (c != 0) { /* tested bigname2.xml */
1566 if (len + 10 > max) {
1567 max *= 2;
1568 buffer = (xmlChar *) xmlRealloc(buffer,
1569 max * sizeof(xmlChar));
1570 if (buffer == NULL) {
1571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1572 ctxt->sax->error(ctxt->userData,
1573 "xmlSplitQName: out of memory\n");
1574 return(NULL);
1575 }
1576 }
1577 buffer[len++] = c;
1578 c = *cur++;
1579 }
1580 buffer[len] = 0;
1581 }
1582
1583 if (buffer == NULL)
1584 ret = xmlStrndup(buf, len);
1585 else {
1586 ret = buffer;
1587 }
1588 }
1589
1590 return(ret);
1591}
1592
1593/************************************************************************
1594 * *
1595 * The parser itself *
1596 * Relates to http://www.w3.org/TR/REC-xml *
1597 * *
1598 ************************************************************************/
1599
Daniel Veillard21a0f912001-02-25 19:54:14 +00001600xmlChar *xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001601/**
1602 * xmlParseName:
1603 * @ctxt: an XML parser context
1604 *
1605 * parse an XML name.
1606 *
1607 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1608 * CombiningChar | Extender
1609 *
1610 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1611 *
1612 * [6] Names ::= Name (S Name)*
1613 *
1614 * Returns the Name parsed or NULL
1615 */
1616
1617xmlChar *
1618xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001619 const xmlChar *in;
1620 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001621 int count = 0;
1622
1623 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001624
1625 /*
1626 * Accelerator for simple ASCII names
1627 */
1628 in = ctxt->input->cur;
1629 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1630 ((*in >= 0x41) && (*in <= 0x5A)) ||
1631 (*in == '_') || (*in == ':')) {
1632 in++;
1633 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1634 ((*in >= 0x41) && (*in <= 0x5A)) ||
1635 ((*in >= 0x30) && (*in <= 0x39)) ||
1636 (*in == '_') || (*in == ':'))
1637 in++;
1638 if ((*in == ' ') || (*in == '>') || (*in == '/')) {
1639 count = in - ctxt->input->cur;
1640 ret = xmlStrndup(ctxt->input->cur, count);
1641 ctxt->input->cur = in;
1642 return(ret);
1643 }
1644 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001645 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001646}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001647
Daniel Veillard21a0f912001-02-25 19:54:14 +00001648xmlChar *
1649xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1650 xmlChar buf[XML_MAX_NAMELEN + 5];
1651 int len = 0, l;
1652 int c;
1653 int count = 0;
1654
1655 /*
1656 * Handler for more complex cases
1657 */
1658 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001659 c = CUR_CHAR(l);
1660 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1661 (!IS_LETTER(c) && (c != '_') &&
1662 (c != ':'))) {
1663 return(NULL);
1664 }
1665
1666 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1667 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1668 (c == '.') || (c == '-') ||
1669 (c == '_') || (c == ':') ||
1670 (IS_COMBINING(c)) ||
1671 (IS_EXTENDER(c)))) {
1672 if (count++ > 100) {
1673 count = 0;
1674 GROW;
1675 }
1676 COPY_BUF(l,buf,len,c);
1677 NEXTL(l);
1678 c = CUR_CHAR(l);
1679 if (len >= XML_MAX_NAMELEN) {
1680 /*
1681 * Okay someone managed to make a huge name, so he's ready to pay
1682 * for the processing speed.
1683 */
1684 xmlChar *buffer;
1685 int max = len * 2;
1686
1687 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1688 if (buffer == NULL) {
1689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1690 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001691 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001692 return(NULL);
1693 }
1694 memcpy(buffer, buf, len);
1695 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1696 (c == '.') || (c == '-') ||
1697 (c == '_') || (c == ':') ||
1698 (IS_COMBINING(c)) ||
1699 (IS_EXTENDER(c))) {
1700 if (count++ > 100) {
1701 count = 0;
1702 GROW;
1703 }
1704 if (len + 10 > max) {
1705 max *= 2;
1706 buffer = (xmlChar *) xmlRealloc(buffer,
1707 max * sizeof(xmlChar));
1708 if (buffer == NULL) {
1709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1710 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001711 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001712 return(NULL);
1713 }
1714 }
1715 COPY_BUF(l,buffer,len,c);
1716 NEXTL(l);
1717 c = CUR_CHAR(l);
1718 }
1719 buffer[len] = 0;
1720 return(buffer);
1721 }
1722 }
1723 return(xmlStrndup(buf, len));
1724}
1725
1726/**
1727 * xmlParseStringName:
1728 * @ctxt: an XML parser context
1729 * @str: a pointer to the string pointer (IN/OUT)
1730 *
1731 * parse an XML name.
1732 *
1733 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1734 * CombiningChar | Extender
1735 *
1736 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1737 *
1738 * [6] Names ::= Name (S Name)*
1739 *
1740 * Returns the Name parsed or NULL. The str pointer
1741 * is updated to the current location in the string.
1742 */
1743
1744xmlChar *
1745xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1746 xmlChar buf[XML_MAX_NAMELEN + 5];
1747 const xmlChar *cur = *str;
1748 int len = 0, l;
1749 int c;
1750
1751 c = CUR_SCHAR(cur, l);
1752 if (!IS_LETTER(c) && (c != '_') &&
1753 (c != ':')) {
1754 return(NULL);
1755 }
1756
1757 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1758 (c == '.') || (c == '-') ||
1759 (c == '_') || (c == ':') ||
1760 (IS_COMBINING(c)) ||
1761 (IS_EXTENDER(c))) {
1762 COPY_BUF(l,buf,len,c);
1763 cur += l;
1764 c = CUR_SCHAR(cur, l);
1765 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1766 /*
1767 * Okay someone managed to make a huge name, so he's ready to pay
1768 * for the processing speed.
1769 */
1770 xmlChar *buffer;
1771 int max = len * 2;
1772
1773 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1774 if (buffer == NULL) {
1775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1776 ctxt->sax->error(ctxt->userData,
1777 "xmlParseStringName: out of memory\n");
1778 return(NULL);
1779 }
1780 memcpy(buffer, buf, len);
1781 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1782 (c == '.') || (c == '-') ||
1783 (c == '_') || (c == ':') ||
1784 (IS_COMBINING(c)) ||
1785 (IS_EXTENDER(c))) {
1786 if (len + 10 > max) {
1787 max *= 2;
1788 buffer = (xmlChar *) xmlRealloc(buffer,
1789 max * sizeof(xmlChar));
1790 if (buffer == NULL) {
1791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1792 ctxt->sax->error(ctxt->userData,
1793 "xmlParseStringName: out of memory\n");
1794 return(NULL);
1795 }
1796 }
1797 COPY_BUF(l,buffer,len,c);
1798 cur += l;
1799 c = CUR_SCHAR(cur, l);
1800 }
1801 buffer[len] = 0;
1802 *str = cur;
1803 return(buffer);
1804 }
1805 }
1806 *str = cur;
1807 return(xmlStrndup(buf, len));
1808}
1809
1810/**
1811 * xmlParseNmtoken:
1812 * @ctxt: an XML parser context
1813 *
1814 * parse an XML Nmtoken.
1815 *
1816 * [7] Nmtoken ::= (NameChar)+
1817 *
1818 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1819 *
1820 * Returns the Nmtoken parsed or NULL
1821 */
1822
1823xmlChar *
1824xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1825 xmlChar buf[XML_MAX_NAMELEN + 5];
1826 int len = 0, l;
1827 int c;
1828 int count = 0;
1829
1830 GROW;
1831 c = CUR_CHAR(l);
1832
1833 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1834 (c == '.') || (c == '-') ||
1835 (c == '_') || (c == ':') ||
1836 (IS_COMBINING(c)) ||
1837 (IS_EXTENDER(c))) {
1838 if (count++ > 100) {
1839 count = 0;
1840 GROW;
1841 }
1842 COPY_BUF(l,buf,len,c);
1843 NEXTL(l);
1844 c = CUR_CHAR(l);
1845 if (len >= XML_MAX_NAMELEN) {
1846 /*
1847 * Okay someone managed to make a huge token, so he's ready to pay
1848 * for the processing speed.
1849 */
1850 xmlChar *buffer;
1851 int max = len * 2;
1852
1853 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1854 if (buffer == NULL) {
1855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1856 ctxt->sax->error(ctxt->userData,
1857 "xmlParseNmtoken: out of memory\n");
1858 return(NULL);
1859 }
1860 memcpy(buffer, buf, len);
1861 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1862 (c == '.') || (c == '-') ||
1863 (c == '_') || (c == ':') ||
1864 (IS_COMBINING(c)) ||
1865 (IS_EXTENDER(c))) {
1866 if (count++ > 100) {
1867 count = 0;
1868 GROW;
1869 }
1870 if (len + 10 > max) {
1871 max *= 2;
1872 buffer = (xmlChar *) xmlRealloc(buffer,
1873 max * sizeof(xmlChar));
1874 if (buffer == NULL) {
1875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1876 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001877 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001878 return(NULL);
1879 }
1880 }
1881 COPY_BUF(l,buffer,len,c);
1882 NEXTL(l);
1883 c = CUR_CHAR(l);
1884 }
1885 buffer[len] = 0;
1886 return(buffer);
1887 }
1888 }
1889 if (len == 0)
1890 return(NULL);
1891 return(xmlStrndup(buf, len));
1892}
1893
1894/**
1895 * xmlParseEntityValue:
1896 * @ctxt: an XML parser context
1897 * @orig: if non-NULL store a copy of the original entity value
1898 *
1899 * parse a value for ENTITY declarations
1900 *
1901 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1902 * "'" ([^%&'] | PEReference | Reference)* "'"
1903 *
1904 * Returns the EntityValue parsed with reference substitued or NULL
1905 */
1906
1907xmlChar *
1908xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1909 xmlChar *buf = NULL;
1910 int len = 0;
1911 int size = XML_PARSER_BUFFER_SIZE;
1912 int c, l;
1913 xmlChar stop;
1914 xmlChar *ret = NULL;
1915 const xmlChar *cur = NULL;
1916 xmlParserInputPtr input;
1917
1918 if (RAW == '"') stop = '"';
1919 else if (RAW == '\'') stop = '\'';
1920 else {
1921 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1923 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1924 ctxt->wellFormed = 0;
1925 ctxt->disableSAX = 1;
1926 return(NULL);
1927 }
1928 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1929 if (buf == NULL) {
1930 xmlGenericError(xmlGenericErrorContext,
1931 "malloc of %d byte failed\n", size);
1932 return(NULL);
1933 }
1934
1935 /*
1936 * The content of the entity definition is copied in a buffer.
1937 */
1938
1939 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1940 input = ctxt->input;
1941 GROW;
1942 NEXT;
1943 c = CUR_CHAR(l);
1944 /*
1945 * NOTE: 4.4.5 Included in Literal
1946 * When a parameter entity reference appears in a literal entity
1947 * value, ... a single or double quote character in the replacement
1948 * text is always treated as a normal data character and will not
1949 * terminate the literal.
1950 * In practice it means we stop the loop only when back at parsing
1951 * the initial entity and the quote is found
1952 */
1953 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1954 (ctxt->input != input))) {
1955 if (len + 5 >= size) {
1956 size *= 2;
1957 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1958 if (buf == NULL) {
1959 xmlGenericError(xmlGenericErrorContext,
1960 "realloc of %d byte failed\n", size);
1961 return(NULL);
1962 }
1963 }
1964 COPY_BUF(l,buf,len,c);
1965 NEXTL(l);
1966 /*
1967 * Pop-up of finished entities.
1968 */
1969 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1970 xmlPopInput(ctxt);
1971
1972 GROW;
1973 c = CUR_CHAR(l);
1974 if (c == 0) {
1975 GROW;
1976 c = CUR_CHAR(l);
1977 }
1978 }
1979 buf[len] = 0;
1980
1981 /*
1982 * Raise problem w.r.t. '&' and '%' being used in non-entities
1983 * reference constructs. Note Charref will be handled in
1984 * xmlStringDecodeEntities()
1985 */
1986 cur = buf;
1987 while (*cur != 0) { /* non input consuming */
1988 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1989 xmlChar *name;
1990 xmlChar tmp = *cur;
1991
1992 cur++;
1993 name = xmlParseStringName(ctxt, &cur);
1994 if ((name == NULL) || (*cur != ';')) {
1995 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1997 ctxt->sax->error(ctxt->userData,
1998 "EntityValue: '%c' forbidden except for entities references\n",
1999 tmp);
2000 ctxt->wellFormed = 0;
2001 ctxt->disableSAX = 1;
2002 }
2003 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2004 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2006 ctxt->sax->error(ctxt->userData,
2007 "EntityValue: PEReferences forbidden in internal subset\n",
2008 tmp);
2009 ctxt->wellFormed = 0;
2010 ctxt->disableSAX = 1;
2011 }
2012 if (name != NULL)
2013 xmlFree(name);
2014 }
2015 cur++;
2016 }
2017
2018 /*
2019 * Then PEReference entities are substituted.
2020 */
2021 if (c != stop) {
2022 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2024 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2025 ctxt->wellFormed = 0;
2026 ctxt->disableSAX = 1;
2027 xmlFree(buf);
2028 } else {
2029 NEXT;
2030 /*
2031 * NOTE: 4.4.7 Bypassed
2032 * When a general entity reference appears in the EntityValue in
2033 * an entity declaration, it is bypassed and left as is.
2034 * so XML_SUBSTITUTE_REF is not set here.
2035 */
2036 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2037 0, 0, 0);
2038 if (orig != NULL)
2039 *orig = buf;
2040 else
2041 xmlFree(buf);
2042 }
2043
2044 return(ret);
2045}
2046
2047/**
2048 * xmlParseAttValue:
2049 * @ctxt: an XML parser context
2050 *
2051 * parse a value for an attribute
2052 * Note: the parser won't do substitution of entities here, this
2053 * will be handled later in xmlStringGetNodeList
2054 *
2055 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2056 * "'" ([^<&'] | Reference)* "'"
2057 *
2058 * 3.3.3 Attribute-Value Normalization:
2059 * Before the value of an attribute is passed to the application or
2060 * checked for validity, the XML processor must normalize it as follows:
2061 * - a character reference is processed by appending the referenced
2062 * character to the attribute value
2063 * - an entity reference is processed by recursively processing the
2064 * replacement text of the entity
2065 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2066 * appending #x20 to the normalized value, except that only a single
2067 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2068 * parsed entity or the literal entity value of an internal parsed entity
2069 * - other characters are processed by appending them to the normalized value
2070 * If the declared value is not CDATA, then the XML processor must further
2071 * process the normalized attribute value by discarding any leading and
2072 * trailing space (#x20) characters, and by replacing sequences of space
2073 * (#x20) characters by a single space (#x20) character.
2074 * All attributes for which no declaration has been read should be treated
2075 * by a non-validating parser as if declared CDATA.
2076 *
2077 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2078 */
2079
2080xmlChar *
2081xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2082 xmlChar limit = 0;
2083 xmlChar *buf = NULL;
2084 int len = 0;
2085 int buf_size = 0;
2086 int c, l;
2087 xmlChar *current = NULL;
2088 xmlEntityPtr ent;
2089
2090
2091 SHRINK;
2092 if (NXT(0) == '"') {
2093 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2094 limit = '"';
2095 NEXT;
2096 } else if (NXT(0) == '\'') {
2097 limit = '\'';
2098 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2099 NEXT;
2100 } else {
2101 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2103 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2104 ctxt->wellFormed = 0;
2105 ctxt->disableSAX = 1;
2106 return(NULL);
2107 }
2108
2109 /*
2110 * allocate a translation buffer.
2111 */
2112 buf_size = XML_PARSER_BUFFER_SIZE;
2113 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2114 if (buf == NULL) {
2115 perror("xmlParseAttValue: malloc failed");
2116 return(NULL);
2117 }
2118
2119 /*
2120 * Ok loop until we reach one of the ending char or a size limit.
2121 */
2122 c = CUR_CHAR(l);
2123 while (((NXT(0) != limit) && /* checked */
2124 (c != '<')) || (ctxt->token != 0)) {
2125 if (c == 0) break;
2126 if (ctxt->token == '&') {
2127 /*
2128 * The reparsing will be done in xmlStringGetNodeList()
2129 * called by the attribute() function in SAX.c
2130 */
2131 static xmlChar buffer[6] = "&#38;";
2132
2133 if (len > buf_size - 10) {
2134 growBuffer(buf);
2135 }
2136 current = &buffer[0];
2137 while (*current != 0) { /* non input consuming */
2138 buf[len++] = *current++;
2139 }
2140 ctxt->token = 0;
2141 } else if (c == '&') {
2142 if (NXT(1) == '#') {
2143 int val = xmlParseCharRef(ctxt);
2144 if (val == '&') {
2145 /*
2146 * The reparsing will be done in xmlStringGetNodeList()
2147 * called by the attribute() function in SAX.c
2148 */
2149 static xmlChar buffer[6] = "&#38;";
2150
2151 if (len > buf_size - 10) {
2152 growBuffer(buf);
2153 }
2154 current = &buffer[0];
2155 while (*current != 0) { /* non input consuming */
2156 buf[len++] = *current++;
2157 }
2158 } else {
2159 len += xmlCopyChar(0, &buf[len], val);
2160 }
2161 } else {
2162 ent = xmlParseEntityRef(ctxt);
2163 if ((ent != NULL) &&
2164 (ctxt->replaceEntities != 0)) {
2165 xmlChar *rep;
2166
2167 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2168 rep = xmlStringDecodeEntities(ctxt, ent->content,
2169 XML_SUBSTITUTE_REF, 0, 0, 0);
2170 if (rep != NULL) {
2171 current = rep;
2172 while (*current != 0) { /* non input consuming */
2173 buf[len++] = *current++;
2174 if (len > buf_size - 10) {
2175 growBuffer(buf);
2176 }
2177 }
2178 xmlFree(rep);
2179 }
2180 } else {
2181 if (ent->content != NULL)
2182 buf[len++] = ent->content[0];
2183 }
2184 } else if (ent != NULL) {
2185 int i = xmlStrlen(ent->name);
2186 const xmlChar *cur = ent->name;
2187
2188 /*
2189 * This may look absurd but is needed to detect
2190 * entities problems
2191 */
2192 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2193 (ent->content != NULL)) {
2194 xmlChar *rep;
2195 rep = xmlStringDecodeEntities(ctxt, ent->content,
2196 XML_SUBSTITUTE_REF, 0, 0, 0);
2197 if (rep != NULL)
2198 xmlFree(rep);
2199 }
2200
2201 /*
2202 * Just output the reference
2203 */
2204 buf[len++] = '&';
2205 if (len > buf_size - i - 10) {
2206 growBuffer(buf);
2207 }
2208 for (;i > 0;i--)
2209 buf[len++] = *cur++;
2210 buf[len++] = ';';
2211 }
2212 }
2213 } else {
2214 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2215 COPY_BUF(l,buf,len,0x20);
2216 if (len > buf_size - 10) {
2217 growBuffer(buf);
2218 }
2219 } else {
2220 COPY_BUF(l,buf,len,c);
2221 if (len > buf_size - 10) {
2222 growBuffer(buf);
2223 }
2224 }
2225 NEXTL(l);
2226 }
2227 GROW;
2228 c = CUR_CHAR(l);
2229 }
2230 buf[len++] = 0;
2231 if (RAW == '<') {
2232 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2234 ctxt->sax->error(ctxt->userData,
2235 "Unescaped '<' not allowed in attributes values\n");
2236 ctxt->wellFormed = 0;
2237 ctxt->disableSAX = 1;
2238 } else if (RAW != limit) {
2239 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2241 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2242 ctxt->wellFormed = 0;
2243 ctxt->disableSAX = 1;
2244 } else
2245 NEXT;
2246 return(buf);
2247}
2248
2249/**
2250 * xmlParseSystemLiteral:
2251 * @ctxt: an XML parser context
2252 *
2253 * parse an XML Literal
2254 *
2255 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2256 *
2257 * Returns the SystemLiteral parsed or NULL
2258 */
2259
2260xmlChar *
2261xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2262 xmlChar *buf = NULL;
2263 int len = 0;
2264 int size = XML_PARSER_BUFFER_SIZE;
2265 int cur, l;
2266 xmlChar stop;
2267 int state = ctxt->instate;
2268 int count = 0;
2269
2270 SHRINK;
2271 if (RAW == '"') {
2272 NEXT;
2273 stop = '"';
2274 } else if (RAW == '\'') {
2275 NEXT;
2276 stop = '\'';
2277 } else {
2278 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2280 ctxt->sax->error(ctxt->userData,
2281 "SystemLiteral \" or ' expected\n");
2282 ctxt->wellFormed = 0;
2283 ctxt->disableSAX = 1;
2284 return(NULL);
2285 }
2286
2287 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2288 if (buf == NULL) {
2289 xmlGenericError(xmlGenericErrorContext,
2290 "malloc of %d byte failed\n", size);
2291 return(NULL);
2292 }
2293 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2294 cur = CUR_CHAR(l);
2295 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2296 if (len + 5 >= size) {
2297 size *= 2;
2298 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2299 if (buf == NULL) {
2300 xmlGenericError(xmlGenericErrorContext,
2301 "realloc of %d byte failed\n", size);
2302 ctxt->instate = (xmlParserInputState) state;
2303 return(NULL);
2304 }
2305 }
2306 count++;
2307 if (count > 50) {
2308 GROW;
2309 count = 0;
2310 }
2311 COPY_BUF(l,buf,len,cur);
2312 NEXTL(l);
2313 cur = CUR_CHAR(l);
2314 if (cur == 0) {
2315 GROW;
2316 SHRINK;
2317 cur = CUR_CHAR(l);
2318 }
2319 }
2320 buf[len] = 0;
2321 ctxt->instate = (xmlParserInputState) state;
2322 if (!IS_CHAR(cur)) {
2323 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2325 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2326 ctxt->wellFormed = 0;
2327 ctxt->disableSAX = 1;
2328 } else {
2329 NEXT;
2330 }
2331 return(buf);
2332}
2333
2334/**
2335 * xmlParsePubidLiteral:
2336 * @ctxt: an XML parser context
2337 *
2338 * parse an XML public literal
2339 *
2340 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2341 *
2342 * Returns the PubidLiteral parsed or NULL.
2343 */
2344
2345xmlChar *
2346xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2347 xmlChar *buf = NULL;
2348 int len = 0;
2349 int size = XML_PARSER_BUFFER_SIZE;
2350 xmlChar cur;
2351 xmlChar stop;
2352 int count = 0;
2353
2354 SHRINK;
2355 if (RAW == '"') {
2356 NEXT;
2357 stop = '"';
2358 } else if (RAW == '\'') {
2359 NEXT;
2360 stop = '\'';
2361 } else {
2362 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2364 ctxt->sax->error(ctxt->userData,
2365 "SystemLiteral \" or ' expected\n");
2366 ctxt->wellFormed = 0;
2367 ctxt->disableSAX = 1;
2368 return(NULL);
2369 }
2370 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2371 if (buf == NULL) {
2372 xmlGenericError(xmlGenericErrorContext,
2373 "malloc of %d byte failed\n", size);
2374 return(NULL);
2375 }
2376 cur = CUR;
2377 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2378 if (len + 1 >= size) {
2379 size *= 2;
2380 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2381 if (buf == NULL) {
2382 xmlGenericError(xmlGenericErrorContext,
2383 "realloc of %d byte failed\n", size);
2384 return(NULL);
2385 }
2386 }
2387 buf[len++] = cur;
2388 count++;
2389 if (count > 50) {
2390 GROW;
2391 count = 0;
2392 }
2393 NEXT;
2394 cur = CUR;
2395 if (cur == 0) {
2396 GROW;
2397 SHRINK;
2398 cur = CUR;
2399 }
2400 }
2401 buf[len] = 0;
2402 if (cur != stop) {
2403 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2405 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2406 ctxt->wellFormed = 0;
2407 ctxt->disableSAX = 1;
2408 } else {
2409 NEXT;
2410 }
2411 return(buf);
2412}
2413
Daniel Veillard48b2f892001-02-25 16:11:03 +00002414void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002415/**
2416 * xmlParseCharData:
2417 * @ctxt: an XML parser context
2418 * @cdata: int indicating whether we are within a CDATA section
2419 *
2420 * parse a CharData section.
2421 * if we are within a CDATA section ']]>' marks an end of section.
2422 *
2423 * The right angle bracket (>) may be represented using the string "&gt;",
2424 * and must, for compatibility, be escaped using "&gt;" or a character
2425 * reference when it appears in the string "]]>" in content, when that
2426 * string is not marking the end of a CDATA section.
2427 *
2428 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2429 */
2430
2431void
2432xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002433 const xmlChar *in;
2434 int nbchar = 0;
2435
2436 SHRINK;
2437 GROW;
2438 /*
2439 * Accelerated common case where input don't need to be
2440 * modified before passing it to the handler.
2441 */
2442 if ((ctxt->token == 0) && (!cdata)) {
2443 in = ctxt->input->cur;
2444 do {
2445 while (((*in >= 0x20) && (*in != '<') &&
2446 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2447 in++;
2448 if (*in == 0xA) {
2449 ctxt->input->line++;
2450 continue; /* while */
2451 }
2452 nbchar = in - ctxt->input->cur;
2453 if (IS_BLANK(*ctxt->input->cur) &&
2454 areBlanks(ctxt, ctxt->input->cur, nbchar)) {
2455 if (ctxt->sax->ignorableWhitespace != NULL)
2456 ctxt->sax->ignorableWhitespace(ctxt->userData,
2457 ctxt->input->cur, nbchar);
2458 } else {
2459 if (ctxt->sax->characters != NULL)
2460 ctxt->sax->characters(ctxt->userData,
2461 ctxt->input->cur, nbchar);
2462 }
2463 ctxt->input->cur = in;
2464 if (*in == 0xD) {
2465 in++;
2466 if (*in == 0xA) {
2467 ctxt->input->cur = in;
2468 in++;
2469 ctxt->input->line++;
2470 continue; /* while */
2471 }
2472 in--;
2473 }
2474 if ((*in == '<') || (*in == '&')) {
2475 return;
2476 }
2477 SHRINK;
2478 GROW;
2479 in = ctxt->input->cur;
2480 } while ((*in >= 0x20) && (*in <= 0x7F));
2481 nbchar = 0;
2482 }
2483 xmlParseCharDataComplex(ctxt, cdata);
2484}
2485
2486void
2487xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002488 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2489 int nbchar = 0;
2490 int cur, l;
2491 int count = 0;
2492
2493 SHRINK;
2494 GROW;
2495 cur = CUR_CHAR(l);
2496 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2497 ((cur != '&') || (ctxt->token == '&')) &&
2498 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2499 if ((cur == ']') && (NXT(1) == ']') &&
2500 (NXT(2) == '>')) {
2501 if (cdata) break;
2502 else {
2503 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2505 ctxt->sax->error(ctxt->userData,
2506 "Sequence ']]>' not allowed in content\n");
2507 /* Should this be relaxed ??? I see a "must here */
2508 ctxt->wellFormed = 0;
2509 ctxt->disableSAX = 1;
2510 }
2511 }
2512 COPY_BUF(l,buf,nbchar,cur);
2513 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2514 /*
2515 * Ok the segment is to be consumed as chars.
2516 */
2517 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2518 if (areBlanks(ctxt, buf, nbchar)) {
2519 if (ctxt->sax->ignorableWhitespace != NULL)
2520 ctxt->sax->ignorableWhitespace(ctxt->userData,
2521 buf, nbchar);
2522 } else {
2523 if (ctxt->sax->characters != NULL)
2524 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2525 }
2526 }
2527 nbchar = 0;
2528 }
2529 count++;
2530 if (count > 50) {
2531 GROW;
2532 count = 0;
2533 }
2534 NEXTL(l);
2535 cur = CUR_CHAR(l);
2536 }
2537 if (nbchar != 0) {
2538 /*
2539 * Ok the segment is to be consumed as chars.
2540 */
2541 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2542 if (areBlanks(ctxt, buf, nbchar)) {
2543 if (ctxt->sax->ignorableWhitespace != NULL)
2544 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2545 } else {
2546 if (ctxt->sax->characters != NULL)
2547 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2548 }
2549 }
2550 }
2551}
2552
2553/**
2554 * xmlParseExternalID:
2555 * @ctxt: an XML parser context
2556 * @publicID: a xmlChar** receiving PubidLiteral
2557 * @strict: indicate whether we should restrict parsing to only
2558 * production [75], see NOTE below
2559 *
2560 * Parse an External ID or a Public ID
2561 *
2562 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2563 * 'PUBLIC' S PubidLiteral S SystemLiteral
2564 *
2565 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2566 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2567 *
2568 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2569 *
2570 * Returns the function returns SystemLiteral and in the second
2571 * case publicID receives PubidLiteral, is strict is off
2572 * it is possible to return NULL and have publicID set.
2573 */
2574
2575xmlChar *
2576xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2577 xmlChar *URI = NULL;
2578
2579 SHRINK;
2580 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2581 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2582 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2583 SKIP(6);
2584 if (!IS_BLANK(CUR)) {
2585 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2587 ctxt->sax->error(ctxt->userData,
2588 "Space required after 'SYSTEM'\n");
2589 ctxt->wellFormed = 0;
2590 ctxt->disableSAX = 1;
2591 }
2592 SKIP_BLANKS;
2593 URI = xmlParseSystemLiteral(ctxt);
2594 if (URI == NULL) {
2595 ctxt->errNo = XML_ERR_URI_REQUIRED;
2596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2597 ctxt->sax->error(ctxt->userData,
2598 "xmlParseExternalID: SYSTEM, no URI\n");
2599 ctxt->wellFormed = 0;
2600 ctxt->disableSAX = 1;
2601 }
2602 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2603 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2604 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2605 SKIP(6);
2606 if (!IS_BLANK(CUR)) {
2607 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2609 ctxt->sax->error(ctxt->userData,
2610 "Space required after 'PUBLIC'\n");
2611 ctxt->wellFormed = 0;
2612 ctxt->disableSAX = 1;
2613 }
2614 SKIP_BLANKS;
2615 *publicID = xmlParsePubidLiteral(ctxt);
2616 if (*publicID == NULL) {
2617 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2619 ctxt->sax->error(ctxt->userData,
2620 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2621 ctxt->wellFormed = 0;
2622 ctxt->disableSAX = 1;
2623 }
2624 if (strict) {
2625 /*
2626 * We don't handle [83] so "S SystemLiteral" is required.
2627 */
2628 if (!IS_BLANK(CUR)) {
2629 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2631 ctxt->sax->error(ctxt->userData,
2632 "Space required after the Public Identifier\n");
2633 ctxt->wellFormed = 0;
2634 ctxt->disableSAX = 1;
2635 }
2636 } else {
2637 /*
2638 * We handle [83] so we return immediately, if
2639 * "S SystemLiteral" is not detected. From a purely parsing
2640 * point of view that's a nice mess.
2641 */
2642 const xmlChar *ptr;
2643 GROW;
2644
2645 ptr = CUR_PTR;
2646 if (!IS_BLANK(*ptr)) return(NULL);
2647
2648 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2649 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2650 }
2651 SKIP_BLANKS;
2652 URI = xmlParseSystemLiteral(ctxt);
2653 if (URI == NULL) {
2654 ctxt->errNo = XML_ERR_URI_REQUIRED;
2655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2656 ctxt->sax->error(ctxt->userData,
2657 "xmlParseExternalID: PUBLIC, no URI\n");
2658 ctxt->wellFormed = 0;
2659 ctxt->disableSAX = 1;
2660 }
2661 }
2662 return(URI);
2663}
2664
2665/**
2666 * xmlParseComment:
2667 * @ctxt: an XML parser context
2668 *
2669 * Skip an XML (SGML) comment <!-- .... -->
2670 * The spec says that "For compatibility, the string "--" (double-hyphen)
2671 * must not occur within comments. "
2672 *
2673 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2674 */
2675void
2676xmlParseComment(xmlParserCtxtPtr ctxt) {
2677 xmlChar *buf = NULL;
2678 int len;
2679 int size = XML_PARSER_BUFFER_SIZE;
2680 int q, ql;
2681 int r, rl;
2682 int cur, l;
2683 xmlParserInputState state;
2684 xmlParserInputPtr input = ctxt->input;
2685 int count = 0;
2686
2687 /*
2688 * Check that there is a comment right here.
2689 */
2690 if ((RAW != '<') || (NXT(1) != '!') ||
2691 (NXT(2) != '-') || (NXT(3) != '-')) return;
2692
2693 state = ctxt->instate;
2694 ctxt->instate = XML_PARSER_COMMENT;
2695 SHRINK;
2696 SKIP(4);
2697 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2698 if (buf == NULL) {
2699 xmlGenericError(xmlGenericErrorContext,
2700 "malloc of %d byte failed\n", size);
2701 ctxt->instate = state;
2702 return;
2703 }
2704 q = CUR_CHAR(ql);
2705 NEXTL(ql);
2706 r = CUR_CHAR(rl);
2707 NEXTL(rl);
2708 cur = CUR_CHAR(l);
2709 len = 0;
2710 while (IS_CHAR(cur) && /* checked */
2711 ((cur != '>') ||
2712 (r != '-') || (q != '-'))) {
2713 if ((r == '-') && (q == '-') && (len > 1)) {
2714 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2716 ctxt->sax->error(ctxt->userData,
2717 "Comment must not contain '--' (double-hyphen)`\n");
2718 ctxt->wellFormed = 0;
2719 ctxt->disableSAX = 1;
2720 }
2721 if (len + 5 >= size) {
2722 size *= 2;
2723 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2724 if (buf == NULL) {
2725 xmlGenericError(xmlGenericErrorContext,
2726 "realloc of %d byte failed\n", size);
2727 ctxt->instate = state;
2728 return;
2729 }
2730 }
2731 COPY_BUF(ql,buf,len,q);
2732 q = r;
2733 ql = rl;
2734 r = cur;
2735 rl = l;
2736
2737 count++;
2738 if (count > 50) {
2739 GROW;
2740 count = 0;
2741 }
2742 NEXTL(l);
2743 cur = CUR_CHAR(l);
2744 if (cur == 0) {
2745 SHRINK;
2746 GROW;
2747 cur = CUR_CHAR(l);
2748 }
2749 }
2750 buf[len] = 0;
2751 if (!IS_CHAR(cur)) {
2752 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2753 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2754 ctxt->sax->error(ctxt->userData,
2755 "Comment not terminated \n<!--%.50s\n", buf);
2756 ctxt->wellFormed = 0;
2757 ctxt->disableSAX = 1;
2758 xmlFree(buf);
2759 } else {
2760 if (input != ctxt->input) {
2761 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2763 ctxt->sax->error(ctxt->userData,
2764"Comment doesn't start and stop in the same entity\n");
2765 ctxt->wellFormed = 0;
2766 ctxt->disableSAX = 1;
2767 }
2768 NEXT;
2769 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2770 (!ctxt->disableSAX))
2771 ctxt->sax->comment(ctxt->userData, buf);
2772 xmlFree(buf);
2773 }
2774 ctxt->instate = state;
2775}
2776
2777/**
2778 * xmlParsePITarget:
2779 * @ctxt: an XML parser context
2780 *
2781 * parse the name of a PI
2782 *
2783 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2784 *
2785 * Returns the PITarget name or NULL
2786 */
2787
2788xmlChar *
2789xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2790 xmlChar *name;
2791
2792 name = xmlParseName(ctxt);
2793 if ((name != NULL) &&
2794 ((name[0] == 'x') || (name[0] == 'X')) &&
2795 ((name[1] == 'm') || (name[1] == 'M')) &&
2796 ((name[2] == 'l') || (name[2] == 'L'))) {
2797 int i;
2798 if ((name[0] == 'x') && (name[1] == 'm') &&
2799 (name[2] == 'l') && (name[3] == 0)) {
2800 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2802 ctxt->sax->error(ctxt->userData,
2803 "XML declaration allowed only at the start of the document\n");
2804 ctxt->wellFormed = 0;
2805 ctxt->disableSAX = 1;
2806 return(name);
2807 } else if (name[3] == 0) {
2808 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2810 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2811 ctxt->wellFormed = 0;
2812 ctxt->disableSAX = 1;
2813 return(name);
2814 }
2815 for (i = 0;;i++) {
2816 if (xmlW3CPIs[i] == NULL) break;
2817 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2818 return(name);
2819 }
2820 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2821 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2822 ctxt->sax->warning(ctxt->userData,
2823 "xmlParsePItarget: invalid name prefix 'xml'\n");
2824 }
2825 }
2826 return(name);
2827}
2828
2829/**
2830 * xmlParsePI:
2831 * @ctxt: an XML parser context
2832 *
2833 * parse an XML Processing Instruction.
2834 *
2835 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2836 *
2837 * The processing is transfered to SAX once parsed.
2838 */
2839
2840void
2841xmlParsePI(xmlParserCtxtPtr ctxt) {
2842 xmlChar *buf = NULL;
2843 int len = 0;
2844 int size = XML_PARSER_BUFFER_SIZE;
2845 int cur, l;
2846 xmlChar *target;
2847 xmlParserInputState state;
2848 int count = 0;
2849
2850 if ((RAW == '<') && (NXT(1) == '?')) {
2851 xmlParserInputPtr input = ctxt->input;
2852 state = ctxt->instate;
2853 ctxt->instate = XML_PARSER_PI;
2854 /*
2855 * this is a Processing Instruction.
2856 */
2857 SKIP(2);
2858 SHRINK;
2859
2860 /*
2861 * Parse the target name and check for special support like
2862 * namespace.
2863 */
2864 target = xmlParsePITarget(ctxt);
2865 if (target != NULL) {
2866 if ((RAW == '?') && (NXT(1) == '>')) {
2867 if (input != ctxt->input) {
2868 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2870 ctxt->sax->error(ctxt->userData,
2871 "PI declaration doesn't start and stop in the same entity\n");
2872 ctxt->wellFormed = 0;
2873 ctxt->disableSAX = 1;
2874 }
2875 SKIP(2);
2876
2877 /*
2878 * SAX: PI detected.
2879 */
2880 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2881 (ctxt->sax->processingInstruction != NULL))
2882 ctxt->sax->processingInstruction(ctxt->userData,
2883 target, NULL);
2884 ctxt->instate = state;
2885 xmlFree(target);
2886 return;
2887 }
2888 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2889 if (buf == NULL) {
2890 xmlGenericError(xmlGenericErrorContext,
2891 "malloc of %d byte failed\n", size);
2892 ctxt->instate = state;
2893 return;
2894 }
2895 cur = CUR;
2896 if (!IS_BLANK(cur)) {
2897 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2899 ctxt->sax->error(ctxt->userData,
2900 "xmlParsePI: PI %s space expected\n", target);
2901 ctxt->wellFormed = 0;
2902 ctxt->disableSAX = 1;
2903 }
2904 SKIP_BLANKS;
2905 cur = CUR_CHAR(l);
2906 while (IS_CHAR(cur) && /* checked */
2907 ((cur != '?') || (NXT(1) != '>'))) {
2908 if (len + 5 >= size) {
2909 size *= 2;
2910 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2911 if (buf == NULL) {
2912 xmlGenericError(xmlGenericErrorContext,
2913 "realloc of %d byte failed\n", size);
2914 ctxt->instate = state;
2915 return;
2916 }
2917 }
2918 count++;
2919 if (count > 50) {
2920 GROW;
2921 count = 0;
2922 }
2923 COPY_BUF(l,buf,len,cur);
2924 NEXTL(l);
2925 cur = CUR_CHAR(l);
2926 if (cur == 0) {
2927 SHRINK;
2928 GROW;
2929 cur = CUR_CHAR(l);
2930 }
2931 }
2932 buf[len] = 0;
2933 if (cur != '?') {
2934 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
2935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2936 ctxt->sax->error(ctxt->userData,
2937 "xmlParsePI: PI %s never end ...\n", target);
2938 ctxt->wellFormed = 0;
2939 ctxt->disableSAX = 1;
2940 } else {
2941 if (input != ctxt->input) {
2942 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2944 ctxt->sax->error(ctxt->userData,
2945 "PI declaration doesn't start and stop in the same entity\n");
2946 ctxt->wellFormed = 0;
2947 ctxt->disableSAX = 1;
2948 }
2949 SKIP(2);
2950
2951 /*
2952 * SAX: PI detected.
2953 */
2954 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2955 (ctxt->sax->processingInstruction != NULL))
2956 ctxt->sax->processingInstruction(ctxt->userData,
2957 target, buf);
2958 }
2959 xmlFree(buf);
2960 xmlFree(target);
2961 } else {
2962 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
2963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2964 ctxt->sax->error(ctxt->userData,
2965 "xmlParsePI : no target name\n");
2966 ctxt->wellFormed = 0;
2967 ctxt->disableSAX = 1;
2968 }
2969 ctxt->instate = state;
2970 }
2971}
2972
2973/**
2974 * xmlParseNotationDecl:
2975 * @ctxt: an XML parser context
2976 *
2977 * parse a notation declaration
2978 *
2979 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2980 *
2981 * Hence there is actually 3 choices:
2982 * 'PUBLIC' S PubidLiteral
2983 * 'PUBLIC' S PubidLiteral S SystemLiteral
2984 * and 'SYSTEM' S SystemLiteral
2985 *
2986 * See the NOTE on xmlParseExternalID().
2987 */
2988
2989void
2990xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
2991 xmlChar *name;
2992 xmlChar *Pubid;
2993 xmlChar *Systemid;
2994
2995 if ((RAW == '<') && (NXT(1) == '!') &&
2996 (NXT(2) == 'N') && (NXT(3) == 'O') &&
2997 (NXT(4) == 'T') && (NXT(5) == 'A') &&
2998 (NXT(6) == 'T') && (NXT(7) == 'I') &&
2999 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3000 xmlParserInputPtr input = ctxt->input;
3001 SHRINK;
3002 SKIP(10);
3003 if (!IS_BLANK(CUR)) {
3004 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3006 ctxt->sax->error(ctxt->userData,
3007 "Space required after '<!NOTATION'\n");
3008 ctxt->wellFormed = 0;
3009 ctxt->disableSAX = 1;
3010 return;
3011 }
3012 SKIP_BLANKS;
3013
Daniel Veillard29631a82001-03-05 09:49:20 +00003014 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003015 if (name == NULL) {
3016 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3018 ctxt->sax->error(ctxt->userData,
3019 "NOTATION: Name expected here\n");
3020 ctxt->wellFormed = 0;
3021 ctxt->disableSAX = 1;
3022 return;
3023 }
3024 if (!IS_BLANK(CUR)) {
3025 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3026 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3027 ctxt->sax->error(ctxt->userData,
3028 "Space required after the NOTATION name'\n");
3029 ctxt->wellFormed = 0;
3030 ctxt->disableSAX = 1;
3031 return;
3032 }
3033 SKIP_BLANKS;
3034
3035 /*
3036 * Parse the IDs.
3037 */
3038 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3039 SKIP_BLANKS;
3040
3041 if (RAW == '>') {
3042 if (input != ctxt->input) {
3043 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3045 ctxt->sax->error(ctxt->userData,
3046"Notation declaration doesn't start and stop in the same entity\n");
3047 ctxt->wellFormed = 0;
3048 ctxt->disableSAX = 1;
3049 }
3050 NEXT;
3051 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3052 (ctxt->sax->notationDecl != NULL))
3053 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3054 } else {
3055 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3057 ctxt->sax->error(ctxt->userData,
3058 "'>' required to close NOTATION declaration\n");
3059 ctxt->wellFormed = 0;
3060 ctxt->disableSAX = 1;
3061 }
3062 xmlFree(name);
3063 if (Systemid != NULL) xmlFree(Systemid);
3064 if (Pubid != NULL) xmlFree(Pubid);
3065 }
3066}
3067
3068/**
3069 * xmlParseEntityDecl:
3070 * @ctxt: an XML parser context
3071 *
3072 * parse <!ENTITY declarations
3073 *
3074 * [70] EntityDecl ::= GEDecl | PEDecl
3075 *
3076 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3077 *
3078 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3079 *
3080 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3081 *
3082 * [74] PEDef ::= EntityValue | ExternalID
3083 *
3084 * [76] NDataDecl ::= S 'NDATA' S Name
3085 *
3086 * [ VC: Notation Declared ]
3087 * The Name must match the declared name of a notation.
3088 */
3089
3090void
3091xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3092 xmlChar *name = NULL;
3093 xmlChar *value = NULL;
3094 xmlChar *URI = NULL, *literal = NULL;
3095 xmlChar *ndata = NULL;
3096 int isParameter = 0;
3097 xmlChar *orig = NULL;
3098
3099 GROW;
3100 if ((RAW == '<') && (NXT(1) == '!') &&
3101 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3102 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3103 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3104 xmlParserInputPtr input = ctxt->input;
3105 ctxt->instate = XML_PARSER_ENTITY_DECL;
3106 SHRINK;
3107 SKIP(8);
3108 if (!IS_BLANK(CUR)) {
3109 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3110 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3111 ctxt->sax->error(ctxt->userData,
3112 "Space required after '<!ENTITY'\n");
3113 ctxt->wellFormed = 0;
3114 ctxt->disableSAX = 1;
3115 }
3116 SKIP_BLANKS;
3117
3118 if (RAW == '%') {
3119 NEXT;
3120 if (!IS_BLANK(CUR)) {
3121 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3123 ctxt->sax->error(ctxt->userData,
3124 "Space required after '%'\n");
3125 ctxt->wellFormed = 0;
3126 ctxt->disableSAX = 1;
3127 }
3128 SKIP_BLANKS;
3129 isParameter = 1;
3130 }
3131
Daniel Veillard29631a82001-03-05 09:49:20 +00003132 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003133 if (name == NULL) {
3134 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3136 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3137 ctxt->wellFormed = 0;
3138 ctxt->disableSAX = 1;
3139 return;
3140 }
3141 if (!IS_BLANK(CUR)) {
3142 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3144 ctxt->sax->error(ctxt->userData,
3145 "Space required after the entity name\n");
3146 ctxt->wellFormed = 0;
3147 ctxt->disableSAX = 1;
3148 }
3149 SKIP_BLANKS;
3150
3151 /*
3152 * handle the various case of definitions...
3153 */
3154 if (isParameter) {
3155 if ((RAW == '"') || (RAW == '\'')) {
3156 value = xmlParseEntityValue(ctxt, &orig);
3157 if (value) {
3158 if ((ctxt->sax != NULL) &&
3159 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3160 ctxt->sax->entityDecl(ctxt->userData, name,
3161 XML_INTERNAL_PARAMETER_ENTITY,
3162 NULL, NULL, value);
3163 }
3164 } else {
3165 URI = xmlParseExternalID(ctxt, &literal, 1);
3166 if ((URI == NULL) && (literal == NULL)) {
3167 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3168 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3169 ctxt->sax->error(ctxt->userData,
3170 "Entity value required\n");
3171 ctxt->wellFormed = 0;
3172 ctxt->disableSAX = 1;
3173 }
3174 if (URI) {
3175 xmlURIPtr uri;
3176
3177 uri = xmlParseURI((const char *) URI);
3178 if (uri == NULL) {
3179 ctxt->errNo = XML_ERR_INVALID_URI;
3180 if ((ctxt->sax != NULL) &&
3181 (!ctxt->disableSAX) &&
3182 (ctxt->sax->error != NULL))
3183 ctxt->sax->error(ctxt->userData,
3184 "Invalid URI: %s\n", URI);
3185 ctxt->wellFormed = 0;
3186 } else {
3187 if (uri->fragment != NULL) {
3188 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3189 if ((ctxt->sax != NULL) &&
3190 (!ctxt->disableSAX) &&
3191 (ctxt->sax->error != NULL))
3192 ctxt->sax->error(ctxt->userData,
3193 "Fragment not allowed: %s\n", URI);
3194 ctxt->wellFormed = 0;
3195 } else {
3196 if ((ctxt->sax != NULL) &&
3197 (!ctxt->disableSAX) &&
3198 (ctxt->sax->entityDecl != NULL))
3199 ctxt->sax->entityDecl(ctxt->userData, name,
3200 XML_EXTERNAL_PARAMETER_ENTITY,
3201 literal, URI, NULL);
3202 }
3203 xmlFreeURI(uri);
3204 }
3205 }
3206 }
3207 } else {
3208 if ((RAW == '"') || (RAW == '\'')) {
3209 value = xmlParseEntityValue(ctxt, &orig);
3210 if ((ctxt->sax != NULL) &&
3211 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3212 ctxt->sax->entityDecl(ctxt->userData, name,
3213 XML_INTERNAL_GENERAL_ENTITY,
3214 NULL, NULL, value);
3215 } else {
3216 URI = xmlParseExternalID(ctxt, &literal, 1);
3217 if ((URI == NULL) && (literal == NULL)) {
3218 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3220 ctxt->sax->error(ctxt->userData,
3221 "Entity value required\n");
3222 ctxt->wellFormed = 0;
3223 ctxt->disableSAX = 1;
3224 }
3225 if (URI) {
3226 xmlURIPtr uri;
3227
3228 uri = xmlParseURI((const char *)URI);
3229 if (uri == NULL) {
3230 ctxt->errNo = XML_ERR_INVALID_URI;
3231 if ((ctxt->sax != NULL) &&
3232 (!ctxt->disableSAX) &&
3233 (ctxt->sax->error != NULL))
3234 ctxt->sax->error(ctxt->userData,
3235 "Invalid URI: %s\n", URI);
3236 ctxt->wellFormed = 0;
3237 } else {
3238 if (uri->fragment != NULL) {
3239 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3240 if ((ctxt->sax != NULL) &&
3241 (!ctxt->disableSAX) &&
3242 (ctxt->sax->error != NULL))
3243 ctxt->sax->error(ctxt->userData,
3244 "Fragment not allowed: %s\n", URI);
3245 ctxt->wellFormed = 0;
3246 }
3247 xmlFreeURI(uri);
3248 }
3249 }
3250 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3251 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3253 ctxt->sax->error(ctxt->userData,
3254 "Space required before 'NDATA'\n");
3255 ctxt->wellFormed = 0;
3256 ctxt->disableSAX = 1;
3257 }
3258 SKIP_BLANKS;
3259 if ((RAW == 'N') && (NXT(1) == 'D') &&
3260 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3261 (NXT(4) == 'A')) {
3262 SKIP(5);
3263 if (!IS_BLANK(CUR)) {
3264 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3266 ctxt->sax->error(ctxt->userData,
3267 "Space required after 'NDATA'\n");
3268 ctxt->wellFormed = 0;
3269 ctxt->disableSAX = 1;
3270 }
3271 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003272 ndata = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003273 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3274 (ctxt->sax->unparsedEntityDecl != NULL))
3275 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3276 literal, URI, ndata);
3277 } else {
3278 if ((ctxt->sax != NULL) &&
3279 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3280 ctxt->sax->entityDecl(ctxt->userData, name,
3281 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3282 literal, URI, NULL);
3283 }
3284 }
3285 }
3286 SKIP_BLANKS;
3287 if (RAW != '>') {
3288 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3289 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3290 ctxt->sax->error(ctxt->userData,
3291 "xmlParseEntityDecl: entity %s not terminated\n", name);
3292 ctxt->wellFormed = 0;
3293 ctxt->disableSAX = 1;
3294 } else {
3295 if (input != ctxt->input) {
3296 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3298 ctxt->sax->error(ctxt->userData,
3299"Entity declaration doesn't start and stop in the same entity\n");
3300 ctxt->wellFormed = 0;
3301 ctxt->disableSAX = 1;
3302 }
3303 NEXT;
3304 }
3305 if (orig != NULL) {
3306 /*
3307 * Ugly mechanism to save the raw entity value.
3308 */
3309 xmlEntityPtr cur = NULL;
3310
3311 if (isParameter) {
3312 if ((ctxt->sax != NULL) &&
3313 (ctxt->sax->getParameterEntity != NULL))
3314 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3315 } else {
3316 if ((ctxt->sax != NULL) &&
3317 (ctxt->sax->getEntity != NULL))
3318 cur = ctxt->sax->getEntity(ctxt->userData, name);
3319 }
3320 if (cur != NULL) {
3321 if (cur->orig != NULL)
3322 xmlFree(orig);
3323 else
3324 cur->orig = orig;
3325 } else
3326 xmlFree(orig);
3327 }
3328 if (name != NULL) xmlFree(name);
3329 if (value != NULL) xmlFree(value);
3330 if (URI != NULL) xmlFree(URI);
3331 if (literal != NULL) xmlFree(literal);
3332 if (ndata != NULL) xmlFree(ndata);
3333 }
3334}
3335
3336/**
3337 * xmlParseDefaultDecl:
3338 * @ctxt: an XML parser context
3339 * @value: Receive a possible fixed default value for the attribute
3340 *
3341 * Parse an attribute default declaration
3342 *
3343 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3344 *
3345 * [ VC: Required Attribute ]
3346 * if the default declaration is the keyword #REQUIRED, then the
3347 * attribute must be specified for all elements of the type in the
3348 * attribute-list declaration.
3349 *
3350 * [ VC: Attribute Default Legal ]
3351 * The declared default value must meet the lexical constraints of
3352 * the declared attribute type c.f. xmlValidateAttributeDecl()
3353 *
3354 * [ VC: Fixed Attribute Default ]
3355 * if an attribute has a default value declared with the #FIXED
3356 * keyword, instances of that attribute must match the default value.
3357 *
3358 * [ WFC: No < in Attribute Values ]
3359 * handled in xmlParseAttValue()
3360 *
3361 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3362 * or XML_ATTRIBUTE_FIXED.
3363 */
3364
3365int
3366xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3367 int val;
3368 xmlChar *ret;
3369
3370 *value = NULL;
3371 if ((RAW == '#') && (NXT(1) == 'R') &&
3372 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3373 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3374 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3375 (NXT(8) == 'D')) {
3376 SKIP(9);
3377 return(XML_ATTRIBUTE_REQUIRED);
3378 }
3379 if ((RAW == '#') && (NXT(1) == 'I') &&
3380 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3381 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3382 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3383 SKIP(8);
3384 return(XML_ATTRIBUTE_IMPLIED);
3385 }
3386 val = XML_ATTRIBUTE_NONE;
3387 if ((RAW == '#') && (NXT(1) == 'F') &&
3388 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3389 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3390 SKIP(6);
3391 val = XML_ATTRIBUTE_FIXED;
3392 if (!IS_BLANK(CUR)) {
3393 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3394 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3395 ctxt->sax->error(ctxt->userData,
3396 "Space required after '#FIXED'\n");
3397 ctxt->wellFormed = 0;
3398 ctxt->disableSAX = 1;
3399 }
3400 SKIP_BLANKS;
3401 }
3402 ret = xmlParseAttValue(ctxt);
3403 ctxt->instate = XML_PARSER_DTD;
3404 if (ret == NULL) {
3405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3406 ctxt->sax->error(ctxt->userData,
3407 "Attribute default value declaration error\n");
3408 ctxt->wellFormed = 0;
3409 ctxt->disableSAX = 1;
3410 } else
3411 *value = ret;
3412 return(val);
3413}
3414
3415/**
3416 * xmlParseNotationType:
3417 * @ctxt: an XML parser context
3418 *
3419 * parse an Notation attribute type.
3420 *
3421 * Note: the leading 'NOTATION' S part has already being parsed...
3422 *
3423 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3424 *
3425 * [ VC: Notation Attributes ]
3426 * Values of this type must match one of the notation names included
3427 * in the declaration; all notation names in the declaration must be declared.
3428 *
3429 * Returns: the notation attribute tree built while parsing
3430 */
3431
3432xmlEnumerationPtr
3433xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3434 xmlChar *name;
3435 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3436
3437 if (RAW != '(') {
3438 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3439 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3440 ctxt->sax->error(ctxt->userData,
3441 "'(' required to start 'NOTATION'\n");
3442 ctxt->wellFormed = 0;
3443 ctxt->disableSAX = 1;
3444 return(NULL);
3445 }
3446 SHRINK;
3447 do {
3448 NEXT;
3449 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003450 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003451 if (name == NULL) {
3452 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3454 ctxt->sax->error(ctxt->userData,
3455 "Name expected in NOTATION declaration\n");
3456 ctxt->wellFormed = 0;
3457 ctxt->disableSAX = 1;
3458 return(ret);
3459 }
3460 cur = xmlCreateEnumeration(name);
3461 xmlFree(name);
3462 if (cur == NULL) return(ret);
3463 if (last == NULL) ret = last = cur;
3464 else {
3465 last->next = cur;
3466 last = cur;
3467 }
3468 SKIP_BLANKS;
3469 } while (RAW == '|');
3470 if (RAW != ')') {
3471 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3473 ctxt->sax->error(ctxt->userData,
3474 "')' required to finish NOTATION declaration\n");
3475 ctxt->wellFormed = 0;
3476 ctxt->disableSAX = 1;
3477 if ((last != NULL) && (last != ret))
3478 xmlFreeEnumeration(last);
3479 return(ret);
3480 }
3481 NEXT;
3482 return(ret);
3483}
3484
3485/**
3486 * xmlParseEnumerationType:
3487 * @ctxt: an XML parser context
3488 *
3489 * parse an Enumeration attribute type.
3490 *
3491 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3492 *
3493 * [ VC: Enumeration ]
3494 * Values of this type must match one of the Nmtoken tokens in
3495 * the declaration
3496 *
3497 * Returns: the enumeration attribute tree built while parsing
3498 */
3499
3500xmlEnumerationPtr
3501xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3502 xmlChar *name;
3503 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3504
3505 if (RAW != '(') {
3506 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3508 ctxt->sax->error(ctxt->userData,
3509 "'(' required to start ATTLIST enumeration\n");
3510 ctxt->wellFormed = 0;
3511 ctxt->disableSAX = 1;
3512 return(NULL);
3513 }
3514 SHRINK;
3515 do {
3516 NEXT;
3517 SKIP_BLANKS;
3518 name = xmlParseNmtoken(ctxt);
3519 if (name == NULL) {
3520 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3522 ctxt->sax->error(ctxt->userData,
3523 "NmToken expected in ATTLIST enumeration\n");
3524 ctxt->wellFormed = 0;
3525 ctxt->disableSAX = 1;
3526 return(ret);
3527 }
3528 cur = xmlCreateEnumeration(name);
3529 xmlFree(name);
3530 if (cur == NULL) return(ret);
3531 if (last == NULL) ret = last = cur;
3532 else {
3533 last->next = cur;
3534 last = cur;
3535 }
3536 SKIP_BLANKS;
3537 } while (RAW == '|');
3538 if (RAW != ')') {
3539 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3540 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3541 ctxt->sax->error(ctxt->userData,
3542 "')' required to finish ATTLIST enumeration\n");
3543 ctxt->wellFormed = 0;
3544 ctxt->disableSAX = 1;
3545 return(ret);
3546 }
3547 NEXT;
3548 return(ret);
3549}
3550
3551/**
3552 * xmlParseEnumeratedType:
3553 * @ctxt: an XML parser context
3554 * @tree: the enumeration tree built while parsing
3555 *
3556 * parse an Enumerated attribute type.
3557 *
3558 * [57] EnumeratedType ::= NotationType | Enumeration
3559 *
3560 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3561 *
3562 *
3563 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3564 */
3565
3566int
3567xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3568 if ((RAW == 'N') && (NXT(1) == 'O') &&
3569 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3570 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3571 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3572 SKIP(8);
3573 if (!IS_BLANK(CUR)) {
3574 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3575 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3576 ctxt->sax->error(ctxt->userData,
3577 "Space required after 'NOTATION'\n");
3578 ctxt->wellFormed = 0;
3579 ctxt->disableSAX = 1;
3580 return(0);
3581 }
3582 SKIP_BLANKS;
3583 *tree = xmlParseNotationType(ctxt);
3584 if (*tree == NULL) return(0);
3585 return(XML_ATTRIBUTE_NOTATION);
3586 }
3587 *tree = xmlParseEnumerationType(ctxt);
3588 if (*tree == NULL) return(0);
3589 return(XML_ATTRIBUTE_ENUMERATION);
3590}
3591
3592/**
3593 * xmlParseAttributeType:
3594 * @ctxt: an XML parser context
3595 * @tree: the enumeration tree built while parsing
3596 *
3597 * parse the Attribute list def for an element
3598 *
3599 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3600 *
3601 * [55] StringType ::= 'CDATA'
3602 *
3603 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3604 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3605 *
3606 * Validity constraints for attribute values syntax are checked in
3607 * xmlValidateAttributeValue()
3608 *
3609 * [ VC: ID ]
3610 * Values of type ID must match the Name production. A name must not
3611 * appear more than once in an XML document as a value of this type;
3612 * i.e., ID values must uniquely identify the elements which bear them.
3613 *
3614 * [ VC: One ID per Element Type ]
3615 * No element type may have more than one ID attribute specified.
3616 *
3617 * [ VC: ID Attribute Default ]
3618 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3619 *
3620 * [ VC: IDREF ]
3621 * Values of type IDREF must match the Name production, and values
3622 * of type IDREFS must match Names; each IDREF Name must match the value
3623 * of an ID attribute on some element in the XML document; i.e. IDREF
3624 * values must match the value of some ID attribute.
3625 *
3626 * [ VC: Entity Name ]
3627 * Values of type ENTITY must match the Name production, values
3628 * of type ENTITIES must match Names; each Entity Name must match the
3629 * name of an unparsed entity declared in the DTD.
3630 *
3631 * [ VC: Name Token ]
3632 * Values of type NMTOKEN must match the Nmtoken production; values
3633 * of type NMTOKENS must match Nmtokens.
3634 *
3635 * Returns the attribute type
3636 */
3637int
3638xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3639 SHRINK;
3640 if ((RAW == 'C') && (NXT(1) == 'D') &&
3641 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3642 (NXT(4) == 'A')) {
3643 SKIP(5);
3644 return(XML_ATTRIBUTE_CDATA);
3645 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3646 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3647 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3648 SKIP(6);
3649 return(XML_ATTRIBUTE_IDREFS);
3650 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3651 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3652 (NXT(4) == 'F')) {
3653 SKIP(5);
3654 return(XML_ATTRIBUTE_IDREF);
3655 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3656 SKIP(2);
3657 return(XML_ATTRIBUTE_ID);
3658 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3659 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3660 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3661 SKIP(6);
3662 return(XML_ATTRIBUTE_ENTITY);
3663 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3664 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3665 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3666 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3667 SKIP(8);
3668 return(XML_ATTRIBUTE_ENTITIES);
3669 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3670 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3671 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3672 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3673 SKIP(8);
3674 return(XML_ATTRIBUTE_NMTOKENS);
3675 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3676 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3677 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3678 (NXT(6) == 'N')) {
3679 SKIP(7);
3680 return(XML_ATTRIBUTE_NMTOKEN);
3681 }
3682 return(xmlParseEnumeratedType(ctxt, tree));
3683}
3684
3685/**
3686 * xmlParseAttributeListDecl:
3687 * @ctxt: an XML parser context
3688 *
3689 * : parse the Attribute list def for an element
3690 *
3691 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3692 *
3693 * [53] AttDef ::= S Name S AttType S DefaultDecl
3694 *
3695 */
3696void
3697xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3698 xmlChar *elemName;
3699 xmlChar *attrName;
3700 xmlEnumerationPtr tree;
3701
3702 if ((RAW == '<') && (NXT(1) == '!') &&
3703 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3704 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3705 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3706 (NXT(8) == 'T')) {
3707 xmlParserInputPtr input = ctxt->input;
3708
3709 SKIP(9);
3710 if (!IS_BLANK(CUR)) {
3711 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3713 ctxt->sax->error(ctxt->userData,
3714 "Space required after '<!ATTLIST'\n");
3715 ctxt->wellFormed = 0;
3716 ctxt->disableSAX = 1;
3717 }
3718 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003719 elemName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003720 if (elemName == NULL) {
3721 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3723 ctxt->sax->error(ctxt->userData,
3724 "ATTLIST: no name for Element\n");
3725 ctxt->wellFormed = 0;
3726 ctxt->disableSAX = 1;
3727 return;
3728 }
3729 SKIP_BLANKS;
3730 GROW;
3731 while (RAW != '>') {
3732 const xmlChar *check = CUR_PTR;
3733 int type;
3734 int def;
3735 xmlChar *defaultValue = NULL;
3736
3737 GROW;
3738 tree = NULL;
Daniel Veillard29631a82001-03-05 09:49:20 +00003739 attrName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003740 if (attrName == NULL) {
3741 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3743 ctxt->sax->error(ctxt->userData,
3744 "ATTLIST: no name for Attribute\n");
3745 ctxt->wellFormed = 0;
3746 ctxt->disableSAX = 1;
3747 break;
3748 }
3749 GROW;
3750 if (!IS_BLANK(CUR)) {
3751 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3753 ctxt->sax->error(ctxt->userData,
3754 "Space required after the attribute name\n");
3755 ctxt->wellFormed = 0;
3756 ctxt->disableSAX = 1;
3757 if (attrName != NULL)
3758 xmlFree(attrName);
3759 if (defaultValue != NULL)
3760 xmlFree(defaultValue);
3761 break;
3762 }
3763 SKIP_BLANKS;
3764
3765 type = xmlParseAttributeType(ctxt, &tree);
3766 if (type <= 0) {
3767 if (attrName != NULL)
3768 xmlFree(attrName);
3769 if (defaultValue != NULL)
3770 xmlFree(defaultValue);
3771 break;
3772 }
3773
3774 GROW;
3775 if (!IS_BLANK(CUR)) {
3776 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3778 ctxt->sax->error(ctxt->userData,
3779 "Space required after the attribute type\n");
3780 ctxt->wellFormed = 0;
3781 ctxt->disableSAX = 1;
3782 if (attrName != NULL)
3783 xmlFree(attrName);
3784 if (defaultValue != NULL)
3785 xmlFree(defaultValue);
3786 if (tree != NULL)
3787 xmlFreeEnumeration(tree);
3788 break;
3789 }
3790 SKIP_BLANKS;
3791
3792 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3793 if (def <= 0) {
3794 if (attrName != NULL)
3795 xmlFree(attrName);
3796 if (defaultValue != NULL)
3797 xmlFree(defaultValue);
3798 if (tree != NULL)
3799 xmlFreeEnumeration(tree);
3800 break;
3801 }
3802
3803 GROW;
3804 if (RAW != '>') {
3805 if (!IS_BLANK(CUR)) {
3806 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3808 ctxt->sax->error(ctxt->userData,
3809 "Space required after the attribute default value\n");
3810 ctxt->wellFormed = 0;
3811 ctxt->disableSAX = 1;
3812 if (attrName != NULL)
3813 xmlFree(attrName);
3814 if (defaultValue != NULL)
3815 xmlFree(defaultValue);
3816 if (tree != NULL)
3817 xmlFreeEnumeration(tree);
3818 break;
3819 }
3820 SKIP_BLANKS;
3821 }
3822 if (check == CUR_PTR) {
3823 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3825 ctxt->sax->error(ctxt->userData,
3826 "xmlParseAttributeListDecl: detected internal error\n");
3827 if (attrName != NULL)
3828 xmlFree(attrName);
3829 if (defaultValue != NULL)
3830 xmlFree(defaultValue);
3831 if (tree != NULL)
3832 xmlFreeEnumeration(tree);
3833 break;
3834 }
3835 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3836 (ctxt->sax->attributeDecl != NULL))
3837 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3838 type, def, defaultValue, tree);
3839 if (attrName != NULL)
3840 xmlFree(attrName);
3841 if (defaultValue != NULL)
3842 xmlFree(defaultValue);
3843 GROW;
3844 }
3845 if (RAW == '>') {
3846 if (input != ctxt->input) {
3847 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3849 ctxt->sax->error(ctxt->userData,
3850"Attribute list declaration doesn't start and stop in the same entity\n");
3851 ctxt->wellFormed = 0;
3852 ctxt->disableSAX = 1;
3853 }
3854 NEXT;
3855 }
3856
3857 xmlFree(elemName);
3858 }
3859}
3860
3861/**
3862 * xmlParseElementMixedContentDecl:
3863 * @ctxt: an XML parser context
3864 *
3865 * parse the declaration for a Mixed Element content
3866 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3867 *
3868 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3869 * '(' S? '#PCDATA' S? ')'
3870 *
3871 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3872 *
3873 * [ VC: No Duplicate Types ]
3874 * The same name must not appear more than once in a single
3875 * mixed-content declaration.
3876 *
3877 * returns: the list of the xmlElementContentPtr describing the element choices
3878 */
3879xmlElementContentPtr
3880xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3881 xmlElementContentPtr ret = NULL, cur = NULL, n;
3882 xmlChar *elem = NULL;
3883
3884 GROW;
3885 if ((RAW == '#') && (NXT(1) == 'P') &&
3886 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3887 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3888 (NXT(6) == 'A')) {
3889 SKIP(7);
3890 SKIP_BLANKS;
3891 SHRINK;
3892 if (RAW == ')') {
3893 ctxt->entity = ctxt->input;
3894 NEXT;
3895 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3896 if (RAW == '*') {
3897 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3898 NEXT;
3899 }
3900 return(ret);
3901 }
3902 if ((RAW == '(') || (RAW == '|')) {
3903 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3904 if (ret == NULL) return(NULL);
3905 }
3906 while (RAW == '|') {
3907 NEXT;
3908 if (elem == NULL) {
3909 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3910 if (ret == NULL) return(NULL);
3911 ret->c1 = cur;
3912 cur = ret;
3913 } else {
3914 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3915 if (n == NULL) return(NULL);
3916 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3917 cur->c2 = n;
3918 cur = n;
3919 xmlFree(elem);
3920 }
3921 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003922 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003923 if (elem == NULL) {
3924 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3926 ctxt->sax->error(ctxt->userData,
3927 "xmlParseElementMixedContentDecl : Name expected\n");
3928 ctxt->wellFormed = 0;
3929 ctxt->disableSAX = 1;
3930 xmlFreeElementContent(cur);
3931 return(NULL);
3932 }
3933 SKIP_BLANKS;
3934 GROW;
3935 }
3936 if ((RAW == ')') && (NXT(1) == '*')) {
3937 if (elem != NULL) {
3938 cur->c2 = xmlNewElementContent(elem,
3939 XML_ELEMENT_CONTENT_ELEMENT);
3940 xmlFree(elem);
3941 }
3942 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3943 ctxt->entity = ctxt->input;
3944 SKIP(2);
3945 } else {
3946 if (elem != NULL) xmlFree(elem);
3947 xmlFreeElementContent(ret);
3948 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
3949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3950 ctxt->sax->error(ctxt->userData,
3951 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
3952 ctxt->wellFormed = 0;
3953 ctxt->disableSAX = 1;
3954 return(NULL);
3955 }
3956
3957 } else {
3958 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
3959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3960 ctxt->sax->error(ctxt->userData,
3961 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3962 ctxt->wellFormed = 0;
3963 ctxt->disableSAX = 1;
3964 }
3965 return(ret);
3966}
3967
3968/**
3969 * xmlParseElementChildrenContentDecl:
3970 * @ctxt: an XML parser context
3971 *
3972 * parse the declaration for a Mixed Element content
3973 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3974 *
3975 *
3976 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3977 *
3978 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3979 *
3980 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3981 *
3982 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3983 *
3984 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3985 * TODO Parameter-entity replacement text must be properly nested
3986 * with parenthetized groups. That is to say, if either of the
3987 * opening or closing parentheses in a choice, seq, or Mixed
3988 * construct is contained in the replacement text for a parameter
3989 * entity, both must be contained in the same replacement text. For
3990 * interoperability, if a parameter-entity reference appears in a
3991 * choice, seq, or Mixed construct, its replacement text should not
3992 * be empty, and neither the first nor last non-blank character of
3993 * the replacement text should be a connector (| or ,).
3994 *
3995 * returns: the tree of xmlElementContentPtr describing the element
3996 * hierarchy.
3997 */
3998xmlElementContentPtr
3999#ifdef VMS
4000xmlParseElementChildrenContentD
4001#else
4002xmlParseElementChildrenContentDecl
4003#endif
4004(xmlParserCtxtPtr ctxt) {
4005 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4006 xmlChar *elem;
4007 xmlChar type = 0;
4008
4009 SKIP_BLANKS;
4010 GROW;
4011 if (RAW == '(') {
4012 /* Recurse on first child */
4013 NEXT;
4014 SKIP_BLANKS;
4015 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4016 SKIP_BLANKS;
4017 GROW;
4018 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004019 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004020 if (elem == NULL) {
4021 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4023 ctxt->sax->error(ctxt->userData,
4024 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4025 ctxt->wellFormed = 0;
4026 ctxt->disableSAX = 1;
4027 return(NULL);
4028 }
4029 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4030 GROW;
4031 if (RAW == '?') {
4032 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4033 NEXT;
4034 } else if (RAW == '*') {
4035 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4036 NEXT;
4037 } else if (RAW == '+') {
4038 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4039 NEXT;
4040 } else {
4041 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4042 }
4043 xmlFree(elem);
4044 GROW;
4045 }
4046 SKIP_BLANKS;
4047 SHRINK;
4048 while (RAW != ')') {
4049 /*
4050 * Each loop we parse one separator and one element.
4051 */
4052 if (RAW == ',') {
4053 if (type == 0) type = CUR;
4054
4055 /*
4056 * Detect "Name | Name , Name" error
4057 */
4058 else if (type != CUR) {
4059 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4061 ctxt->sax->error(ctxt->userData,
4062 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4063 type);
4064 ctxt->wellFormed = 0;
4065 ctxt->disableSAX = 1;
4066 if ((op != NULL) && (op != ret))
4067 xmlFreeElementContent(op);
4068 if ((last != NULL) && (last != ret) &&
4069 (last != ret->c1) && (last != ret->c2))
4070 xmlFreeElementContent(last);
4071 if (ret != NULL)
4072 xmlFreeElementContent(ret);
4073 return(NULL);
4074 }
4075 NEXT;
4076
4077 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4078 if (op == NULL) {
4079 xmlFreeElementContent(ret);
4080 return(NULL);
4081 }
4082 if (last == NULL) {
4083 op->c1 = ret;
4084 ret = cur = op;
4085 } else {
4086 cur->c2 = op;
4087 op->c1 = last;
4088 cur =op;
4089 last = NULL;
4090 }
4091 } else if (RAW == '|') {
4092 if (type == 0) type = CUR;
4093
4094 /*
4095 * Detect "Name , Name | Name" error
4096 */
4097 else if (type != CUR) {
4098 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4100 ctxt->sax->error(ctxt->userData,
4101 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4102 type);
4103 ctxt->wellFormed = 0;
4104 ctxt->disableSAX = 1;
4105 if ((op != NULL) && (op != ret) && (op != last))
4106 xmlFreeElementContent(op);
4107 if ((last != NULL) && (last != ret) &&
4108 (last != ret->c1) && (last != ret->c2))
4109 xmlFreeElementContent(last);
4110 if (ret != NULL)
4111 xmlFreeElementContent(ret);
4112 return(NULL);
4113 }
4114 NEXT;
4115
4116 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4117 if (op == NULL) {
4118 if ((op != NULL) && (op != ret))
4119 xmlFreeElementContent(op);
4120 if ((last != NULL) && (last != ret) &&
4121 (last != ret->c1) && (last != ret->c2))
4122 xmlFreeElementContent(last);
4123 if (ret != NULL)
4124 xmlFreeElementContent(ret);
4125 return(NULL);
4126 }
4127 if (last == NULL) {
4128 op->c1 = ret;
4129 ret = cur = op;
4130 } else {
4131 cur->c2 = op;
4132 op->c1 = last;
4133 cur =op;
4134 last = NULL;
4135 }
4136 } else {
4137 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4138 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4139 ctxt->sax->error(ctxt->userData,
4140 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4141 ctxt->wellFormed = 0;
4142 ctxt->disableSAX = 1;
4143 if ((op != NULL) && (op != ret))
4144 xmlFreeElementContent(op);
4145 if ((last != NULL) && (last != ret) &&
4146 (last != ret->c1) && (last != ret->c2))
4147 xmlFreeElementContent(last);
4148 if (ret != NULL)
4149 xmlFreeElementContent(ret);
4150 return(NULL);
4151 }
4152 GROW;
4153 SKIP_BLANKS;
4154 GROW;
4155 if (RAW == '(') {
4156 /* Recurse on second child */
4157 NEXT;
4158 SKIP_BLANKS;
4159 last = xmlParseElementChildrenContentDecl(ctxt);
4160 SKIP_BLANKS;
4161 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004162 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004163 if (elem == NULL) {
4164 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4165 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4166 ctxt->sax->error(ctxt->userData,
4167 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4168 ctxt->wellFormed = 0;
4169 ctxt->disableSAX = 1;
4170 if ((op != NULL) && (op != ret))
4171 xmlFreeElementContent(op);
4172 if ((last != NULL) && (last != ret) &&
4173 (last != ret->c1) && (last != ret->c2))
4174 xmlFreeElementContent(last);
4175 if (ret != NULL)
4176 xmlFreeElementContent(ret);
4177 return(NULL);
4178 }
4179 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4180 xmlFree(elem);
4181 if (RAW == '?') {
4182 last->ocur = XML_ELEMENT_CONTENT_OPT;
4183 NEXT;
4184 } else if (RAW == '*') {
4185 last->ocur = XML_ELEMENT_CONTENT_MULT;
4186 NEXT;
4187 } else if (RAW == '+') {
4188 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4189 NEXT;
4190 } else {
4191 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4192 }
4193 }
4194 SKIP_BLANKS;
4195 GROW;
4196 }
4197 if ((cur != NULL) && (last != NULL)) {
4198 cur->c2 = last;
4199 }
4200 ctxt->entity = ctxt->input;
4201 NEXT;
4202 if (RAW == '?') {
4203 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4204 NEXT;
4205 } else if (RAW == '*') {
4206 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4207 NEXT;
4208 } else if (RAW == '+') {
4209 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4210 NEXT;
4211 }
4212 return(ret);
4213}
4214
4215/**
4216 * xmlParseElementContentDecl:
4217 * @ctxt: an XML parser context
4218 * @name: the name of the element being defined.
4219 * @result: the Element Content pointer will be stored here if any
4220 *
4221 * parse the declaration for an Element content either Mixed or Children,
4222 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4223 *
4224 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4225 *
4226 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4227 */
4228
4229int
4230xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4231 xmlElementContentPtr *result) {
4232
4233 xmlElementContentPtr tree = NULL;
4234 xmlParserInputPtr input = ctxt->input;
4235 int res;
4236
4237 *result = NULL;
4238
4239 if (RAW != '(') {
4240 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4242 ctxt->sax->error(ctxt->userData,
4243 "xmlParseElementContentDecl : '(' expected\n");
4244 ctxt->wellFormed = 0;
4245 ctxt->disableSAX = 1;
4246 return(-1);
4247 }
4248 NEXT;
4249 GROW;
4250 SKIP_BLANKS;
4251 if ((RAW == '#') && (NXT(1) == 'P') &&
4252 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4253 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4254 (NXT(6) == 'A')) {
4255 tree = xmlParseElementMixedContentDecl(ctxt);
4256 res = XML_ELEMENT_TYPE_MIXED;
4257 } else {
4258 tree = xmlParseElementChildrenContentDecl(ctxt);
4259 res = XML_ELEMENT_TYPE_ELEMENT;
4260 }
4261 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4262 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4263 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4264 ctxt->sax->error(ctxt->userData,
4265"Element content declaration doesn't start and stop in the same entity\n");
4266 ctxt->wellFormed = 0;
4267 ctxt->disableSAX = 1;
4268 }
4269 SKIP_BLANKS;
4270 *result = tree;
4271 return(res);
4272}
4273
4274/**
4275 * xmlParseElementDecl:
4276 * @ctxt: an XML parser context
4277 *
4278 * parse an Element declaration.
4279 *
4280 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4281 *
4282 * [ VC: Unique Element Type Declaration ]
4283 * No element type may be declared more than once
4284 *
4285 * Returns the type of the element, or -1 in case of error
4286 */
4287int
4288xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4289 xmlChar *name;
4290 int ret = -1;
4291 xmlElementContentPtr content = NULL;
4292
4293 GROW;
4294 if ((RAW == '<') && (NXT(1) == '!') &&
4295 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4296 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4297 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4298 (NXT(8) == 'T')) {
4299 xmlParserInputPtr input = ctxt->input;
4300
4301 SKIP(9);
4302 if (!IS_BLANK(CUR)) {
4303 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4304 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4305 ctxt->sax->error(ctxt->userData,
4306 "Space required after 'ELEMENT'\n");
4307 ctxt->wellFormed = 0;
4308 ctxt->disableSAX = 1;
4309 }
4310 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00004311 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004312 if (name == NULL) {
4313 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4314 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4315 ctxt->sax->error(ctxt->userData,
4316 "xmlParseElementDecl: no name for Element\n");
4317 ctxt->wellFormed = 0;
4318 ctxt->disableSAX = 1;
4319 return(-1);
4320 }
4321 while ((RAW == 0) && (ctxt->inputNr > 1))
4322 xmlPopInput(ctxt);
4323 if (!IS_BLANK(CUR)) {
4324 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4326 ctxt->sax->error(ctxt->userData,
4327 "Space required after the element name\n");
4328 ctxt->wellFormed = 0;
4329 ctxt->disableSAX = 1;
4330 }
4331 SKIP_BLANKS;
4332 if ((RAW == 'E') && (NXT(1) == 'M') &&
4333 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4334 (NXT(4) == 'Y')) {
4335 SKIP(5);
4336 /*
4337 * Element must always be empty.
4338 */
4339 ret = XML_ELEMENT_TYPE_EMPTY;
4340 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4341 (NXT(2) == 'Y')) {
4342 SKIP(3);
4343 /*
4344 * Element is a generic container.
4345 */
4346 ret = XML_ELEMENT_TYPE_ANY;
4347 } else if (RAW == '(') {
4348 ret = xmlParseElementContentDecl(ctxt, name, &content);
4349 } else {
4350 /*
4351 * [ WFC: PEs in Internal Subset ] error handling.
4352 */
4353 if ((RAW == '%') && (ctxt->external == 0) &&
4354 (ctxt->inputNr == 1)) {
4355 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4357 ctxt->sax->error(ctxt->userData,
4358 "PEReference: forbidden within markup decl in internal subset\n");
4359 } else {
4360 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4362 ctxt->sax->error(ctxt->userData,
4363 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4364 }
4365 ctxt->wellFormed = 0;
4366 ctxt->disableSAX = 1;
4367 if (name != NULL) xmlFree(name);
4368 return(-1);
4369 }
4370
4371 SKIP_BLANKS;
4372 /*
4373 * Pop-up of finished entities.
4374 */
4375 while ((RAW == 0) && (ctxt->inputNr > 1))
4376 xmlPopInput(ctxt);
4377 SKIP_BLANKS;
4378
4379 if (RAW != '>') {
4380 ctxt->errNo = XML_ERR_GT_REQUIRED;
4381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4382 ctxt->sax->error(ctxt->userData,
4383 "xmlParseElementDecl: expected '>' at the end\n");
4384 ctxt->wellFormed = 0;
4385 ctxt->disableSAX = 1;
4386 } else {
4387 if (input != ctxt->input) {
4388 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4389 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4390 ctxt->sax->error(ctxt->userData,
4391"Element declaration doesn't start and stop in the same entity\n");
4392 ctxt->wellFormed = 0;
4393 ctxt->disableSAX = 1;
4394 }
4395
4396 NEXT;
4397 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4398 (ctxt->sax->elementDecl != NULL))
4399 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4400 content);
4401 }
4402 if (content != NULL) {
4403 xmlFreeElementContent(content);
4404 }
4405 if (name != NULL) {
4406 xmlFree(name);
4407 }
4408 }
4409 return(ret);
4410}
4411
4412/**
4413 * xmlParseMarkupDecl:
4414 * @ctxt: an XML parser context
4415 *
4416 * parse Markup declarations
4417 *
4418 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4419 * NotationDecl | PI | Comment
4420 *
4421 * [ VC: Proper Declaration/PE Nesting ]
4422 * Parameter-entity replacement text must be properly nested with
4423 * markup declarations. That is to say, if either the first character
4424 * or the last character of a markup declaration (markupdecl above) is
4425 * contained in the replacement text for a parameter-entity reference,
4426 * both must be contained in the same replacement text.
4427 *
4428 * [ WFC: PEs in Internal Subset ]
4429 * In the internal DTD subset, parameter-entity references can occur
4430 * only where markup declarations can occur, not within markup declarations.
4431 * (This does not apply to references that occur in external parameter
4432 * entities or to the external subset.)
4433 */
4434void
4435xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4436 GROW;
4437 xmlParseElementDecl(ctxt);
4438 xmlParseAttributeListDecl(ctxt);
4439 xmlParseEntityDecl(ctxt);
4440 xmlParseNotationDecl(ctxt);
4441 xmlParsePI(ctxt);
4442 xmlParseComment(ctxt);
4443 /*
4444 * This is only for internal subset. On external entities,
4445 * the replacement is done before parsing stage
4446 */
4447 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4448 xmlParsePEReference(ctxt);
4449 ctxt->instate = XML_PARSER_DTD;
4450}
4451
4452/**
4453 * xmlParseTextDecl:
4454 * @ctxt: an XML parser context
4455 *
4456 * parse an XML declaration header for external entities
4457 *
4458 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4459 *
4460 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4461 */
4462
4463void
4464xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4465 xmlChar *version;
4466
4467 /*
4468 * We know that '<?xml' is here.
4469 */
4470 if ((RAW == '<') && (NXT(1) == '?') &&
4471 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4472 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4473 SKIP(5);
4474 } else {
4475 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4477 ctxt->sax->error(ctxt->userData,
4478 "Text declaration '<?xml' required\n");
4479 ctxt->wellFormed = 0;
4480 ctxt->disableSAX = 1;
4481
4482 return;
4483 }
4484
4485 if (!IS_BLANK(CUR)) {
4486 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4487 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4488 ctxt->sax->error(ctxt->userData,
4489 "Space needed after '<?xml'\n");
4490 ctxt->wellFormed = 0;
4491 ctxt->disableSAX = 1;
4492 }
4493 SKIP_BLANKS;
4494
4495 /*
4496 * We may have the VersionInfo here.
4497 */
4498 version = xmlParseVersionInfo(ctxt);
4499 if (version == NULL)
4500 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4501 ctxt->input->version = version;
4502
4503 /*
4504 * We must have the encoding declaration
4505 */
4506 if (!IS_BLANK(CUR)) {
4507 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4509 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4510 ctxt->wellFormed = 0;
4511 ctxt->disableSAX = 1;
4512 }
4513 xmlParseEncodingDecl(ctxt);
4514 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4515 /*
4516 * The XML REC instructs us to stop parsing right here
4517 */
4518 return;
4519 }
4520
4521 SKIP_BLANKS;
4522 if ((RAW == '?') && (NXT(1) == '>')) {
4523 SKIP(2);
4524 } else if (RAW == '>') {
4525 /* Deprecated old WD ... */
4526 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4527 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4528 ctxt->sax->error(ctxt->userData,
4529 "XML declaration must end-up with '?>'\n");
4530 ctxt->wellFormed = 0;
4531 ctxt->disableSAX = 1;
4532 NEXT;
4533 } else {
4534 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4536 ctxt->sax->error(ctxt->userData,
4537 "parsing XML declaration: '?>' expected\n");
4538 ctxt->wellFormed = 0;
4539 ctxt->disableSAX = 1;
4540 MOVETO_ENDTAG(CUR_PTR);
4541 NEXT;
4542 }
4543}
4544
4545/*
4546 * xmlParseConditionalSections
4547 * @ctxt: an XML parser context
4548 *
4549 * [61] conditionalSect ::= includeSect | ignoreSect
4550 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4551 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4552 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4553 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4554 */
4555
4556void
4557xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4558 SKIP(3);
4559 SKIP_BLANKS;
4560 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4561 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4562 (NXT(6) == 'E')) {
4563 SKIP(7);
4564 SKIP_BLANKS;
4565 if (RAW != '[') {
4566 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4568 ctxt->sax->error(ctxt->userData,
4569 "XML conditional section '[' expected\n");
4570 ctxt->wellFormed = 0;
4571 ctxt->disableSAX = 1;
4572 } else {
4573 NEXT;
4574 }
4575 if (xmlParserDebugEntities) {
4576 if ((ctxt->input != NULL) && (ctxt->input->filename))
4577 xmlGenericError(xmlGenericErrorContext,
4578 "%s(%d): ", ctxt->input->filename,
4579 ctxt->input->line);
4580 xmlGenericError(xmlGenericErrorContext,
4581 "Entering INCLUDE Conditional Section\n");
4582 }
4583
4584 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4585 (NXT(2) != '>'))) {
4586 const xmlChar *check = CUR_PTR;
4587 int cons = ctxt->input->consumed;
4588 int tok = ctxt->token;
4589
4590 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4591 xmlParseConditionalSections(ctxt);
4592 } else if (IS_BLANK(CUR)) {
4593 NEXT;
4594 } else if (RAW == '%') {
4595 xmlParsePEReference(ctxt);
4596 } else
4597 xmlParseMarkupDecl(ctxt);
4598
4599 /*
4600 * Pop-up of finished entities.
4601 */
4602 while ((RAW == 0) && (ctxt->inputNr > 1))
4603 xmlPopInput(ctxt);
4604
4605 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4606 (tok == ctxt->token)) {
4607 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4609 ctxt->sax->error(ctxt->userData,
4610 "Content error in the external subset\n");
4611 ctxt->wellFormed = 0;
4612 ctxt->disableSAX = 1;
4613 break;
4614 }
4615 }
4616 if (xmlParserDebugEntities) {
4617 if ((ctxt->input != NULL) && (ctxt->input->filename))
4618 xmlGenericError(xmlGenericErrorContext,
4619 "%s(%d): ", ctxt->input->filename,
4620 ctxt->input->line);
4621 xmlGenericError(xmlGenericErrorContext,
4622 "Leaving INCLUDE Conditional Section\n");
4623 }
4624
4625 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4626 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4627 int state;
4628 int instate;
4629 int depth = 0;
4630
4631 SKIP(6);
4632 SKIP_BLANKS;
4633 if (RAW != '[') {
4634 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4636 ctxt->sax->error(ctxt->userData,
4637 "XML conditional section '[' expected\n");
4638 ctxt->wellFormed = 0;
4639 ctxt->disableSAX = 1;
4640 } else {
4641 NEXT;
4642 }
4643 if (xmlParserDebugEntities) {
4644 if ((ctxt->input != NULL) && (ctxt->input->filename))
4645 xmlGenericError(xmlGenericErrorContext,
4646 "%s(%d): ", ctxt->input->filename,
4647 ctxt->input->line);
4648 xmlGenericError(xmlGenericErrorContext,
4649 "Entering IGNORE Conditional Section\n");
4650 }
4651
4652 /*
4653 * Parse up to the end of the conditionnal section
4654 * But disable SAX event generating DTD building in the meantime
4655 */
4656 state = ctxt->disableSAX;
4657 instate = ctxt->instate;
4658 ctxt->disableSAX = 1;
4659 ctxt->instate = XML_PARSER_IGNORE;
4660
4661 while (depth >= 0) {
4662 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4663 depth++;
4664 SKIP(3);
4665 continue;
4666 }
4667 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4668 if (--depth >= 0) SKIP(3);
4669 continue;
4670 }
4671 NEXT;
4672 continue;
4673 }
4674
4675 ctxt->disableSAX = state;
4676 ctxt->instate = instate;
4677
4678 if (xmlParserDebugEntities) {
4679 if ((ctxt->input != NULL) && (ctxt->input->filename))
4680 xmlGenericError(xmlGenericErrorContext,
4681 "%s(%d): ", ctxt->input->filename,
4682 ctxt->input->line);
4683 xmlGenericError(xmlGenericErrorContext,
4684 "Leaving IGNORE Conditional Section\n");
4685 }
4686
4687 } else {
4688 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4690 ctxt->sax->error(ctxt->userData,
4691 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4692 ctxt->wellFormed = 0;
4693 ctxt->disableSAX = 1;
4694 }
4695
4696 if (RAW == 0)
4697 SHRINK;
4698
4699 if (RAW == 0) {
4700 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4701 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4702 ctxt->sax->error(ctxt->userData,
4703 "XML conditional section not closed\n");
4704 ctxt->wellFormed = 0;
4705 ctxt->disableSAX = 1;
4706 } else {
4707 SKIP(3);
4708 }
4709}
4710
4711/**
4712 * xmlParseExternalSubset:
4713 * @ctxt: an XML parser context
4714 * @ExternalID: the external identifier
4715 * @SystemID: the system identifier (or URL)
4716 *
4717 * parse Markup declarations from an external subset
4718 *
4719 * [30] extSubset ::= textDecl? extSubsetDecl
4720 *
4721 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4722 */
4723void
4724xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4725 const xmlChar *SystemID) {
4726 GROW;
4727 if ((RAW == '<') && (NXT(1) == '?') &&
4728 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4729 (NXT(4) == 'l')) {
4730 xmlParseTextDecl(ctxt);
4731 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4732 /*
4733 * The XML REC instructs us to stop parsing right here
4734 */
4735 ctxt->instate = XML_PARSER_EOF;
4736 return;
4737 }
4738 }
4739 if (ctxt->myDoc == NULL) {
4740 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4741 }
4742 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4743 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4744
4745 ctxt->instate = XML_PARSER_DTD;
4746 ctxt->external = 1;
4747 while (((RAW == '<') && (NXT(1) == '?')) ||
4748 ((RAW == '<') && (NXT(1) == '!')) ||
4749 IS_BLANK(CUR)) {
4750 const xmlChar *check = CUR_PTR;
4751 int cons = ctxt->input->consumed;
4752 int tok = ctxt->token;
4753
4754 GROW;
4755 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4756 xmlParseConditionalSections(ctxt);
4757 } else if (IS_BLANK(CUR)) {
4758 NEXT;
4759 } else if (RAW == '%') {
4760 xmlParsePEReference(ctxt);
4761 } else
4762 xmlParseMarkupDecl(ctxt);
4763
4764 /*
4765 * Pop-up of finished entities.
4766 */
4767 while ((RAW == 0) && (ctxt->inputNr > 1))
4768 xmlPopInput(ctxt);
4769
4770 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4771 (tok == ctxt->token)) {
4772 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4774 ctxt->sax->error(ctxt->userData,
4775 "Content error in the external subset\n");
4776 ctxt->wellFormed = 0;
4777 ctxt->disableSAX = 1;
4778 break;
4779 }
4780 }
4781
4782 if (RAW != 0) {
4783 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4785 ctxt->sax->error(ctxt->userData,
4786 "Extra content at the end of the document\n");
4787 ctxt->wellFormed = 0;
4788 ctxt->disableSAX = 1;
4789 }
4790
4791}
4792
4793/**
4794 * xmlParseReference:
4795 * @ctxt: an XML parser context
4796 *
4797 * parse and handle entity references in content, depending on the SAX
4798 * interface, this may end-up in a call to character() if this is a
4799 * CharRef, a predefined entity, if there is no reference() callback.
4800 * or if the parser was asked to switch to that mode.
4801 *
4802 * [67] Reference ::= EntityRef | CharRef
4803 */
4804void
4805xmlParseReference(xmlParserCtxtPtr ctxt) {
4806 xmlEntityPtr ent;
4807 xmlChar *val;
4808 if (RAW != '&') return;
4809
4810 if (NXT(1) == '#') {
4811 int i = 0;
4812 xmlChar out[10];
4813 int hex = NXT(2);
4814 int val = xmlParseCharRef(ctxt);
4815
4816 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4817 /*
4818 * So we are using non-UTF-8 buffers
4819 * Check that the char fit on 8bits, if not
4820 * generate a CharRef.
4821 */
4822 if (val <= 0xFF) {
4823 out[0] = val;
4824 out[1] = 0;
4825 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4826 (!ctxt->disableSAX))
4827 ctxt->sax->characters(ctxt->userData, out, 1);
4828 } else {
4829 if ((hex == 'x') || (hex == 'X'))
4830 sprintf((char *)out, "#x%X", val);
4831 else
4832 sprintf((char *)out, "#%d", val);
4833 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4834 (!ctxt->disableSAX))
4835 ctxt->sax->reference(ctxt->userData, out);
4836 }
4837 } else {
4838 /*
4839 * Just encode the value in UTF-8
4840 */
4841 COPY_BUF(0 ,out, i, val);
4842 out[i] = 0;
4843 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4844 (!ctxt->disableSAX))
4845 ctxt->sax->characters(ctxt->userData, out, i);
4846 }
4847 } else {
4848 ent = xmlParseEntityRef(ctxt);
4849 if (ent == NULL) return;
4850 if ((ent->name != NULL) &&
4851 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4852 xmlNodePtr list = NULL;
4853 int ret;
4854
4855
4856 /*
4857 * The first reference to the entity trigger a parsing phase
4858 * where the ent->children is filled with the result from
4859 * the parsing.
4860 */
4861 if (ent->children == NULL) {
4862 xmlChar *value;
4863 value = ent->content;
4864
4865 /*
4866 * Check that this entity is well formed
4867 */
4868 if ((value != NULL) &&
4869 (value[1] == 0) && (value[0] == '<') &&
4870 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4871 /*
4872 * DONE: get definite answer on this !!!
4873 * Lots of entity decls are used to declare a single
4874 * char
4875 * <!ENTITY lt "<">
4876 * Which seems to be valid since
4877 * 2.4: The ampersand character (&) and the left angle
4878 * bracket (<) may appear in their literal form only
4879 * when used ... They are also legal within the literal
4880 * entity value of an internal entity declaration;i
4881 * see "4.3.2 Well-Formed Parsed Entities".
4882 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4883 * Looking at the OASIS test suite and James Clark
4884 * tests, this is broken. However the XML REC uses
4885 * it. Is the XML REC not well-formed ????
4886 * This is a hack to avoid this problem
4887 *
4888 * ANSWER: since lt gt amp .. are already defined,
4889 * this is a redefinition and hence the fact that the
4890 * contentis not well balanced is not a Wf error, this
4891 * is lousy but acceptable.
4892 */
4893 list = xmlNewDocText(ctxt->myDoc, value);
4894 if (list != NULL) {
4895 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4896 (ent->children == NULL)) {
4897 ent->children = list;
4898 ent->last = list;
4899 list->parent = (xmlNodePtr) ent;
4900 } else {
4901 xmlFreeNodeList(list);
4902 }
4903 } else if (list != NULL) {
4904 xmlFreeNodeList(list);
4905 }
4906 } else {
4907 /*
4908 * 4.3.2: An internal general parsed entity is well-formed
4909 * if its replacement text matches the production labeled
4910 * content.
4911 */
4912 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4913 ctxt->depth++;
4914 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
4915 ctxt->sax, NULL, ctxt->depth,
4916 value, &list);
4917 ctxt->depth--;
4918 } else if (ent->etype ==
4919 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4920 ctxt->depth++;
4921 ret = xmlParseExternalEntity(ctxt->myDoc,
4922 ctxt->sax, NULL, ctxt->depth,
4923 ent->URI, ent->ExternalID, &list);
4924 ctxt->depth--;
4925 } else {
4926 ret = -1;
4927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4928 ctxt->sax->error(ctxt->userData,
4929 "Internal: invalid entity type\n");
4930 }
4931 if (ret == XML_ERR_ENTITY_LOOP) {
4932 ctxt->errNo = XML_ERR_ENTITY_LOOP;
4933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4934 ctxt->sax->error(ctxt->userData,
4935 "Detected entity reference loop\n");
4936 ctxt->wellFormed = 0;
4937 ctxt->disableSAX = 1;
4938 } else if ((ret == 0) && (list != NULL)) {
4939 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4940 (ent->children == NULL)) {
4941 ent->children = list;
4942 while (list != NULL) {
4943 list->parent = (xmlNodePtr) ent;
4944 if (list->next == NULL)
4945 ent->last = list;
4946 list = list->next;
4947 }
4948 } else {
4949 xmlFreeNodeList(list);
4950 }
4951 } else if (ret > 0) {
4952 ctxt->errNo = ret;
4953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4954 ctxt->sax->error(ctxt->userData,
4955 "Entity value required\n");
4956 ctxt->wellFormed = 0;
4957 ctxt->disableSAX = 1;
4958 } else if (list != NULL) {
4959 xmlFreeNodeList(list);
4960 }
4961 }
4962 }
4963 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4964 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
4965 /*
4966 * Create a node.
4967 */
4968 ctxt->sax->reference(ctxt->userData, ent->name);
4969 return;
4970 } else if (ctxt->replaceEntities) {
4971 if ((ctxt->node != NULL) && (ent->children != NULL)) {
4972 /*
4973 * Seems we are generating the DOM content, do
4974 * a simple tree copy
4975 */
4976 xmlNodePtr new;
4977 new = xmlCopyNodeList(ent->children);
4978
4979 xmlAddChildList(ctxt->node, new);
4980 /*
4981 * This is to avoid a nasty side effect, see
4982 * characters() in SAX.c
4983 */
4984 ctxt->nodemem = 0;
4985 ctxt->nodelen = 0;
4986 return;
4987 } else {
4988 /*
4989 * Probably running in SAX mode
4990 */
4991 xmlParserInputPtr input;
4992
4993 input = xmlNewEntityInputStream(ctxt, ent);
4994 xmlPushInput(ctxt, input);
4995 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
4996 (RAW == '<') && (NXT(1) == '?') &&
4997 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4998 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4999 xmlParseTextDecl(ctxt);
5000 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5001 /*
5002 * The XML REC instructs us to stop parsing right here
5003 */
5004 ctxt->instate = XML_PARSER_EOF;
5005 return;
5006 }
5007 if (input->standalone == 1) {
5008 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5010 ctxt->sax->error(ctxt->userData,
5011 "external parsed entities cannot be standalone\n");
5012 ctxt->wellFormed = 0;
5013 ctxt->disableSAX = 1;
5014 }
5015 }
5016 return;
5017 }
5018 }
5019 } else {
5020 val = ent->content;
5021 if (val == NULL) return;
5022 /*
5023 * inline the entity.
5024 */
5025 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5026 (!ctxt->disableSAX))
5027 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5028 }
5029 }
5030}
5031
5032/**
5033 * xmlParseEntityRef:
5034 * @ctxt: an XML parser context
5035 *
5036 * parse ENTITY references declarations
5037 *
5038 * [68] EntityRef ::= '&' Name ';'
5039 *
5040 * [ WFC: Entity Declared ]
5041 * In a document without any DTD, a document with only an internal DTD
5042 * subset which contains no parameter entity references, or a document
5043 * with "standalone='yes'", the Name given in the entity reference
5044 * must match that in an entity declaration, except that well-formed
5045 * documents need not declare any of the following entities: amp, lt,
5046 * gt, apos, quot. The declaration of a parameter entity must precede
5047 * any reference to it. Similarly, the declaration of a general entity
5048 * must precede any reference to it which appears in a default value in an
5049 * attribute-list declaration. Note that if entities are declared in the
5050 * external subset or in external parameter entities, a non-validating
5051 * processor is not obligated to read and process their declarations;
5052 * for such documents, the rule that an entity must be declared is a
5053 * well-formedness constraint only if standalone='yes'.
5054 *
5055 * [ WFC: Parsed Entity ]
5056 * An entity reference must not contain the name of an unparsed entity
5057 *
5058 * Returns the xmlEntityPtr if found, or NULL otherwise.
5059 */
5060xmlEntityPtr
5061xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5062 xmlChar *name;
5063 xmlEntityPtr ent = NULL;
5064
5065 GROW;
5066
5067 if (RAW == '&') {
5068 NEXT;
5069 name = xmlParseName(ctxt);
5070 if (name == NULL) {
5071 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5073 ctxt->sax->error(ctxt->userData,
5074 "xmlParseEntityRef: no name\n");
5075 ctxt->wellFormed = 0;
5076 ctxt->disableSAX = 1;
5077 } else {
5078 if (RAW == ';') {
5079 NEXT;
5080 /*
5081 * Ask first SAX for entity resolution, otherwise try the
5082 * predefined set.
5083 */
5084 if (ctxt->sax != NULL) {
5085 if (ctxt->sax->getEntity != NULL)
5086 ent = ctxt->sax->getEntity(ctxt->userData, name);
5087 if (ent == NULL)
5088 ent = xmlGetPredefinedEntity(name);
5089 }
5090 /*
5091 * [ WFC: Entity Declared ]
5092 * In a document without any DTD, a document with only an
5093 * internal DTD subset which contains no parameter entity
5094 * references, or a document with "standalone='yes'", the
5095 * Name given in the entity reference must match that in an
5096 * entity declaration, except that well-formed documents
5097 * need not declare any of the following entities: amp, lt,
5098 * gt, apos, quot.
5099 * The declaration of a parameter entity must precede any
5100 * reference to it.
5101 * Similarly, the declaration of a general entity must
5102 * precede any reference to it which appears in a default
5103 * value in an attribute-list declaration. Note that if
5104 * entities are declared in the external subset or in
5105 * external parameter entities, a non-validating processor
5106 * is not obligated to read and process their declarations;
5107 * for such documents, the rule that an entity must be
5108 * declared is a well-formedness constraint only if
5109 * standalone='yes'.
5110 */
5111 if (ent == NULL) {
5112 if ((ctxt->standalone == 1) ||
5113 ((ctxt->hasExternalSubset == 0) &&
5114 (ctxt->hasPErefs == 0))) {
5115 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5116 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5117 ctxt->sax->error(ctxt->userData,
5118 "Entity '%s' not defined\n", name);
5119 ctxt->wellFormed = 0;
5120 ctxt->disableSAX = 1;
5121 } else {
5122 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5123 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5124 ctxt->sax->warning(ctxt->userData,
5125 "Entity '%s' not defined\n", name);
5126 }
5127 }
5128
5129 /*
5130 * [ WFC: Parsed Entity ]
5131 * An entity reference must not contain the name of an
5132 * unparsed entity
5133 */
5134 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5135 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5136 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5137 ctxt->sax->error(ctxt->userData,
5138 "Entity reference to unparsed entity %s\n", name);
5139 ctxt->wellFormed = 0;
5140 ctxt->disableSAX = 1;
5141 }
5142
5143 /*
5144 * [ WFC: No External Entity References ]
5145 * Attribute values cannot contain direct or indirect
5146 * entity references to external entities.
5147 */
5148 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5149 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5150 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5152 ctxt->sax->error(ctxt->userData,
5153 "Attribute references external entity '%s'\n", name);
5154 ctxt->wellFormed = 0;
5155 ctxt->disableSAX = 1;
5156 }
5157 /*
5158 * [ WFC: No < in Attribute Values ]
5159 * The replacement text of any entity referred to directly or
5160 * indirectly in an attribute value (other than "&lt;") must
5161 * not contain a <.
5162 */
5163 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5164 (ent != NULL) &&
5165 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5166 (ent->content != NULL) &&
5167 (xmlStrchr(ent->content, '<'))) {
5168 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5169 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5170 ctxt->sax->error(ctxt->userData,
5171 "'<' in entity '%s' is not allowed in attributes values\n", name);
5172 ctxt->wellFormed = 0;
5173 ctxt->disableSAX = 1;
5174 }
5175
5176 /*
5177 * Internal check, no parameter entities here ...
5178 */
5179 else {
5180 switch (ent->etype) {
5181 case XML_INTERNAL_PARAMETER_ENTITY:
5182 case XML_EXTERNAL_PARAMETER_ENTITY:
5183 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5185 ctxt->sax->error(ctxt->userData,
5186 "Attempt to reference the parameter entity '%s'\n", name);
5187 ctxt->wellFormed = 0;
5188 ctxt->disableSAX = 1;
5189 break;
5190 default:
5191 break;
5192 }
5193 }
5194
5195 /*
5196 * [ WFC: No Recursion ]
5197 * A parsed entity must not contain a recursive reference
5198 * to itself, either directly or indirectly.
5199 * Done somewhere else
5200 */
5201
5202 } else {
5203 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5205 ctxt->sax->error(ctxt->userData,
5206 "xmlParseEntityRef: expecting ';'\n");
5207 ctxt->wellFormed = 0;
5208 ctxt->disableSAX = 1;
5209 }
5210 xmlFree(name);
5211 }
5212 }
5213 return(ent);
5214}
5215
5216/**
5217 * xmlParseStringEntityRef:
5218 * @ctxt: an XML parser context
5219 * @str: a pointer to an index in the string
5220 *
5221 * parse ENTITY references declarations, but this version parses it from
5222 * a string value.
5223 *
5224 * [68] EntityRef ::= '&' Name ';'
5225 *
5226 * [ WFC: Entity Declared ]
5227 * In a document without any DTD, a document with only an internal DTD
5228 * subset which contains no parameter entity references, or a document
5229 * with "standalone='yes'", the Name given in the entity reference
5230 * must match that in an entity declaration, except that well-formed
5231 * documents need not declare any of the following entities: amp, lt,
5232 * gt, apos, quot. The declaration of a parameter entity must precede
5233 * any reference to it. Similarly, the declaration of a general entity
5234 * must precede any reference to it which appears in a default value in an
5235 * attribute-list declaration. Note that if entities are declared in the
5236 * external subset or in external parameter entities, a non-validating
5237 * processor is not obligated to read and process their declarations;
5238 * for such documents, the rule that an entity must be declared is a
5239 * well-formedness constraint only if standalone='yes'.
5240 *
5241 * [ WFC: Parsed Entity ]
5242 * An entity reference must not contain the name of an unparsed entity
5243 *
5244 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5245 * is updated to the current location in the string.
5246 */
5247xmlEntityPtr
5248xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5249 xmlChar *name;
5250 const xmlChar *ptr;
5251 xmlChar cur;
5252 xmlEntityPtr ent = NULL;
5253
5254 if ((str == NULL) || (*str == NULL))
5255 return(NULL);
5256 ptr = *str;
5257 cur = *ptr;
5258 if (cur == '&') {
5259 ptr++;
5260 cur = *ptr;
5261 name = xmlParseStringName(ctxt, &ptr);
5262 if (name == NULL) {
5263 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5264 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5265 ctxt->sax->error(ctxt->userData,
5266 "xmlParseEntityRef: no name\n");
5267 ctxt->wellFormed = 0;
5268 ctxt->disableSAX = 1;
5269 } else {
5270 if (*ptr == ';') {
5271 ptr++;
5272 /*
5273 * Ask first SAX for entity resolution, otherwise try the
5274 * predefined set.
5275 */
5276 if (ctxt->sax != NULL) {
5277 if (ctxt->sax->getEntity != NULL)
5278 ent = ctxt->sax->getEntity(ctxt->userData, name);
5279 if (ent == NULL)
5280 ent = xmlGetPredefinedEntity(name);
5281 }
5282 /*
5283 * [ WFC: Entity Declared ]
5284 * In a document without any DTD, a document with only an
5285 * internal DTD subset which contains no parameter entity
5286 * references, or a document with "standalone='yes'", the
5287 * Name given in the entity reference must match that in an
5288 * entity declaration, except that well-formed documents
5289 * need not declare any of the following entities: amp, lt,
5290 * gt, apos, quot.
5291 * The declaration of a parameter entity must precede any
5292 * reference to it.
5293 * Similarly, the declaration of a general entity must
5294 * precede any reference to it which appears in a default
5295 * value in an attribute-list declaration. Note that if
5296 * entities are declared in the external subset or in
5297 * external parameter entities, a non-validating processor
5298 * is not obligated to read and process their declarations;
5299 * for such documents, the rule that an entity must be
5300 * declared is a well-formedness constraint only if
5301 * standalone='yes'.
5302 */
5303 if (ent == NULL) {
5304 if ((ctxt->standalone == 1) ||
5305 ((ctxt->hasExternalSubset == 0) &&
5306 (ctxt->hasPErefs == 0))) {
5307 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5308 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5309 ctxt->sax->error(ctxt->userData,
5310 "Entity '%s' not defined\n", name);
5311 ctxt->wellFormed = 0;
5312 ctxt->disableSAX = 1;
5313 } else {
5314 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5315 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5316 ctxt->sax->warning(ctxt->userData,
5317 "Entity '%s' not defined\n", name);
5318 }
5319 }
5320
5321 /*
5322 * [ WFC: Parsed Entity ]
5323 * An entity reference must not contain the name of an
5324 * unparsed entity
5325 */
5326 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5327 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5329 ctxt->sax->error(ctxt->userData,
5330 "Entity reference to unparsed entity %s\n", name);
5331 ctxt->wellFormed = 0;
5332 ctxt->disableSAX = 1;
5333 }
5334
5335 /*
5336 * [ WFC: No External Entity References ]
5337 * Attribute values cannot contain direct or indirect
5338 * entity references to external entities.
5339 */
5340 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5341 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5342 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5344 ctxt->sax->error(ctxt->userData,
5345 "Attribute references external entity '%s'\n", name);
5346 ctxt->wellFormed = 0;
5347 ctxt->disableSAX = 1;
5348 }
5349 /*
5350 * [ WFC: No < in Attribute Values ]
5351 * The replacement text of any entity referred to directly or
5352 * indirectly in an attribute value (other than "&lt;") must
5353 * not contain a <.
5354 */
5355 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5356 (ent != NULL) &&
5357 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5358 (ent->content != NULL) &&
5359 (xmlStrchr(ent->content, '<'))) {
5360 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5362 ctxt->sax->error(ctxt->userData,
5363 "'<' in entity '%s' is not allowed in attributes values\n", name);
5364 ctxt->wellFormed = 0;
5365 ctxt->disableSAX = 1;
5366 }
5367
5368 /*
5369 * Internal check, no parameter entities here ...
5370 */
5371 else {
5372 switch (ent->etype) {
5373 case XML_INTERNAL_PARAMETER_ENTITY:
5374 case XML_EXTERNAL_PARAMETER_ENTITY:
5375 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5377 ctxt->sax->error(ctxt->userData,
5378 "Attempt to reference the parameter entity '%s'\n", name);
5379 ctxt->wellFormed = 0;
5380 ctxt->disableSAX = 1;
5381 break;
5382 default:
5383 break;
5384 }
5385 }
5386
5387 /*
5388 * [ WFC: No Recursion ]
5389 * A parsed entity must not contain a recursive reference
5390 * to itself, either directly or indirectly.
5391 * Done somewhwere else
5392 */
5393
5394 } else {
5395 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5397 ctxt->sax->error(ctxt->userData,
5398 "xmlParseEntityRef: expecting ';'\n");
5399 ctxt->wellFormed = 0;
5400 ctxt->disableSAX = 1;
5401 }
5402 xmlFree(name);
5403 }
5404 }
5405 *str = ptr;
5406 return(ent);
5407}
5408
5409/**
5410 * xmlParsePEReference:
5411 * @ctxt: an XML parser context
5412 *
5413 * parse PEReference declarations
5414 * The entity content is handled directly by pushing it's content as
5415 * a new input stream.
5416 *
5417 * [69] PEReference ::= '%' Name ';'
5418 *
5419 * [ WFC: No Recursion ]
5420 * A parsed entity must not contain a recursive
5421 * reference to itself, either directly or indirectly.
5422 *
5423 * [ WFC: Entity Declared ]
5424 * In a document without any DTD, a document with only an internal DTD
5425 * subset which contains no parameter entity references, or a document
5426 * with "standalone='yes'", ... ... The declaration of a parameter
5427 * entity must precede any reference to it...
5428 *
5429 * [ VC: Entity Declared ]
5430 * In a document with an external subset or external parameter entities
5431 * with "standalone='no'", ... ... The declaration of a parameter entity
5432 * must precede any reference to it...
5433 *
5434 * [ WFC: In DTD ]
5435 * Parameter-entity references may only appear in the DTD.
5436 * NOTE: misleading but this is handled.
5437 */
5438void
5439xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5440 xmlChar *name;
5441 xmlEntityPtr entity = NULL;
5442 xmlParserInputPtr input;
5443
5444 if (RAW == '%') {
5445 NEXT;
Daniel Veillard29631a82001-03-05 09:49:20 +00005446 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005447 if (name == NULL) {
5448 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5450 ctxt->sax->error(ctxt->userData,
5451 "xmlParsePEReference: no name\n");
5452 ctxt->wellFormed = 0;
5453 ctxt->disableSAX = 1;
5454 } else {
5455 if (RAW == ';') {
5456 NEXT;
5457 if ((ctxt->sax != NULL) &&
5458 (ctxt->sax->getParameterEntity != NULL))
5459 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5460 name);
5461 if (entity == NULL) {
5462 /*
5463 * [ WFC: Entity Declared ]
5464 * In a document without any DTD, a document with only an
5465 * internal DTD subset which contains no parameter entity
5466 * references, or a document with "standalone='yes'", ...
5467 * ... The declaration of a parameter entity must precede
5468 * any reference to it...
5469 */
5470 if ((ctxt->standalone == 1) ||
5471 ((ctxt->hasExternalSubset == 0) &&
5472 (ctxt->hasPErefs == 0))) {
5473 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5474 if ((!ctxt->disableSAX) &&
5475 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5476 ctxt->sax->error(ctxt->userData,
5477 "PEReference: %%%s; not found\n", name);
5478 ctxt->wellFormed = 0;
5479 ctxt->disableSAX = 1;
5480 } else {
5481 /*
5482 * [ VC: Entity Declared ]
5483 * In a document with an external subset or external
5484 * parameter entities with "standalone='no'", ...
5485 * ... The declaration of a parameter entity must precede
5486 * any reference to it...
5487 */
5488 if ((!ctxt->disableSAX) &&
5489 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5490 ctxt->sax->warning(ctxt->userData,
5491 "PEReference: %%%s; not found\n", name);
5492 ctxt->valid = 0;
5493 }
5494 } else {
5495 /*
5496 * Internal checking in case the entity quest barfed
5497 */
5498 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5499 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5500 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5501 ctxt->sax->warning(ctxt->userData,
5502 "Internal: %%%s; is not a parameter entity\n", name);
5503 } else {
5504 /*
5505 * TODO !!!
5506 * handle the extra spaces added before and after
5507 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5508 */
5509 input = xmlNewEntityInputStream(ctxt, entity);
5510 xmlPushInput(ctxt, input);
5511 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5512 (RAW == '<') && (NXT(1) == '?') &&
5513 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5514 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5515 xmlParseTextDecl(ctxt);
5516 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5517 /*
5518 * The XML REC instructs us to stop parsing
5519 * right here
5520 */
5521 ctxt->instate = XML_PARSER_EOF;
5522 xmlFree(name);
5523 return;
5524 }
5525 }
5526 if (ctxt->token == 0)
5527 ctxt->token = ' ';
5528 }
5529 }
5530 ctxt->hasPErefs = 1;
5531 } else {
5532 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5534 ctxt->sax->error(ctxt->userData,
5535 "xmlParsePEReference: expecting ';'\n");
5536 ctxt->wellFormed = 0;
5537 ctxt->disableSAX = 1;
5538 }
5539 xmlFree(name);
5540 }
5541 }
5542}
5543
5544/**
5545 * xmlParseStringPEReference:
5546 * @ctxt: an XML parser context
5547 * @str: a pointer to an index in the string
5548 *
5549 * parse PEReference declarations
5550 *
5551 * [69] PEReference ::= '%' Name ';'
5552 *
5553 * [ WFC: No Recursion ]
5554 * A parsed entity must not contain a recursive
5555 * reference to itself, either directly or indirectly.
5556 *
5557 * [ WFC: Entity Declared ]
5558 * In a document without any DTD, a document with only an internal DTD
5559 * subset which contains no parameter entity references, or a document
5560 * with "standalone='yes'", ... ... The declaration of a parameter
5561 * entity must precede any reference to it...
5562 *
5563 * [ VC: Entity Declared ]
5564 * In a document with an external subset or external parameter entities
5565 * with "standalone='no'", ... ... The declaration of a parameter entity
5566 * must precede any reference to it...
5567 *
5568 * [ WFC: In DTD ]
5569 * Parameter-entity references may only appear in the DTD.
5570 * NOTE: misleading but this is handled.
5571 *
5572 * Returns the string of the entity content.
5573 * str is updated to the current value of the index
5574 */
5575xmlEntityPtr
5576xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5577 const xmlChar *ptr;
5578 xmlChar cur;
5579 xmlChar *name;
5580 xmlEntityPtr entity = NULL;
5581
5582 if ((str == NULL) || (*str == NULL)) return(NULL);
5583 ptr = *str;
5584 cur = *ptr;
5585 if (cur == '%') {
5586 ptr++;
5587 cur = *ptr;
5588 name = xmlParseStringName(ctxt, &ptr);
5589 if (name == NULL) {
5590 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5592 ctxt->sax->error(ctxt->userData,
5593 "xmlParseStringPEReference: no name\n");
5594 ctxt->wellFormed = 0;
5595 ctxt->disableSAX = 1;
5596 } else {
5597 cur = *ptr;
5598 if (cur == ';') {
5599 ptr++;
5600 cur = *ptr;
5601 if ((ctxt->sax != NULL) &&
5602 (ctxt->sax->getParameterEntity != NULL))
5603 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5604 name);
5605 if (entity == NULL) {
5606 /*
5607 * [ WFC: Entity Declared ]
5608 * In a document without any DTD, a document with only an
5609 * internal DTD subset which contains no parameter entity
5610 * references, or a document with "standalone='yes'", ...
5611 * ... The declaration of a parameter entity must precede
5612 * any reference to it...
5613 */
5614 if ((ctxt->standalone == 1) ||
5615 ((ctxt->hasExternalSubset == 0) &&
5616 (ctxt->hasPErefs == 0))) {
5617 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5619 ctxt->sax->error(ctxt->userData,
5620 "PEReference: %%%s; not found\n", name);
5621 ctxt->wellFormed = 0;
5622 ctxt->disableSAX = 1;
5623 } else {
5624 /*
5625 * [ VC: Entity Declared ]
5626 * In a document with an external subset or external
5627 * parameter entities with "standalone='no'", ...
5628 * ... The declaration of a parameter entity must
5629 * precede any reference to it...
5630 */
5631 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5632 ctxt->sax->warning(ctxt->userData,
5633 "PEReference: %%%s; not found\n", name);
5634 ctxt->valid = 0;
5635 }
5636 } else {
5637 /*
5638 * Internal checking in case the entity quest barfed
5639 */
5640 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5641 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5642 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5643 ctxt->sax->warning(ctxt->userData,
5644 "Internal: %%%s; is not a parameter entity\n", name);
5645 }
5646 }
5647 ctxt->hasPErefs = 1;
5648 } else {
5649 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5651 ctxt->sax->error(ctxt->userData,
5652 "xmlParseStringPEReference: expecting ';'\n");
5653 ctxt->wellFormed = 0;
5654 ctxt->disableSAX = 1;
5655 }
5656 xmlFree(name);
5657 }
5658 }
5659 *str = ptr;
5660 return(entity);
5661}
5662
5663/**
5664 * xmlParseDocTypeDecl:
5665 * @ctxt: an XML parser context
5666 *
5667 * parse a DOCTYPE declaration
5668 *
5669 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5670 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5671 *
5672 * [ VC: Root Element Type ]
5673 * The Name in the document type declaration must match the element
5674 * type of the root element.
5675 */
5676
5677void
5678xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5679 xmlChar *name = NULL;
5680 xmlChar *ExternalID = NULL;
5681 xmlChar *URI = NULL;
5682
5683 /*
5684 * We know that '<!DOCTYPE' has been detected.
5685 */
5686 SKIP(9);
5687
5688 SKIP_BLANKS;
5689
5690 /*
5691 * Parse the DOCTYPE name.
5692 */
5693 name = xmlParseName(ctxt);
5694 if (name == NULL) {
5695 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5697 ctxt->sax->error(ctxt->userData,
5698 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5699 ctxt->wellFormed = 0;
5700 ctxt->disableSAX = 1;
5701 }
5702 ctxt->intSubName = name;
5703
5704 SKIP_BLANKS;
5705
5706 /*
5707 * Check for SystemID and ExternalID
5708 */
5709 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5710
5711 if ((URI != NULL) || (ExternalID != NULL)) {
5712 ctxt->hasExternalSubset = 1;
5713 }
5714 ctxt->extSubURI = URI;
5715 ctxt->extSubSystem = ExternalID;
5716
5717 SKIP_BLANKS;
5718
5719 /*
5720 * Create and update the internal subset.
5721 */
5722 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5723 (!ctxt->disableSAX))
5724 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5725
5726 /*
5727 * Is there any internal subset declarations ?
5728 * they are handled separately in xmlParseInternalSubset()
5729 */
5730 if (RAW == '[')
5731 return;
5732
5733 /*
5734 * We should be at the end of the DOCTYPE declaration.
5735 */
5736 if (RAW != '>') {
5737 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5739 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5740 ctxt->wellFormed = 0;
5741 ctxt->disableSAX = 1;
5742 }
5743 NEXT;
5744}
5745
5746/**
5747 * xmlParseInternalsubset:
5748 * @ctxt: an XML parser context
5749 *
5750 * parse the internal subset declaration
5751 *
5752 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5753 */
5754
5755void
5756xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5757 /*
5758 * Is there any DTD definition ?
5759 */
5760 if (RAW == '[') {
5761 ctxt->instate = XML_PARSER_DTD;
5762 NEXT;
5763 /*
5764 * Parse the succession of Markup declarations and
5765 * PEReferences.
5766 * Subsequence (markupdecl | PEReference | S)*
5767 */
5768 while (RAW != ']') {
5769 const xmlChar *check = CUR_PTR;
5770 int cons = ctxt->input->consumed;
5771
5772 SKIP_BLANKS;
5773 xmlParseMarkupDecl(ctxt);
5774 xmlParsePEReference(ctxt);
5775
5776 /*
5777 * Pop-up of finished entities.
5778 */
5779 while ((RAW == 0) && (ctxt->inputNr > 1))
5780 xmlPopInput(ctxt);
5781
5782 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5783 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5785 ctxt->sax->error(ctxt->userData,
5786 "xmlParseInternalSubset: error detected in Markup declaration\n");
5787 ctxt->wellFormed = 0;
5788 ctxt->disableSAX = 1;
5789 break;
5790 }
5791 }
5792 if (RAW == ']') {
5793 NEXT;
5794 SKIP_BLANKS;
5795 }
5796 }
5797
5798 /*
5799 * We should be at the end of the DOCTYPE declaration.
5800 */
5801 if (RAW != '>') {
5802 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5803 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5804 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5805 ctxt->wellFormed = 0;
5806 ctxt->disableSAX = 1;
5807 }
5808 NEXT;
5809}
5810
5811/**
5812 * xmlParseAttribute:
5813 * @ctxt: an XML parser context
5814 * @value: a xmlChar ** used to store the value of the attribute
5815 *
5816 * parse an attribute
5817 *
5818 * [41] Attribute ::= Name Eq AttValue
5819 *
5820 * [ WFC: No External Entity References ]
5821 * Attribute values cannot contain direct or indirect entity references
5822 * to external entities.
5823 *
5824 * [ WFC: No < in Attribute Values ]
5825 * The replacement text of any entity referred to directly or indirectly in
5826 * an attribute value (other than "&lt;") must not contain a <.
5827 *
5828 * [ VC: Attribute Value Type ]
5829 * The attribute must have been declared; the value must be of the type
5830 * declared for it.
5831 *
5832 * [25] Eq ::= S? '=' S?
5833 *
5834 * With namespace:
5835 *
5836 * [NS 11] Attribute ::= QName Eq AttValue
5837 *
5838 * Also the case QName == xmlns:??? is handled independently as a namespace
5839 * definition.
5840 *
5841 * Returns the attribute name, and the value in *value.
5842 */
5843
5844xmlChar *
5845xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5846 xmlChar *name, *val;
5847
5848 *value = NULL;
5849 name = xmlParseName(ctxt);
5850 if (name == NULL) {
5851 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5852 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5853 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5854 ctxt->wellFormed = 0;
5855 ctxt->disableSAX = 1;
5856 return(NULL);
5857 }
5858
5859 /*
5860 * read the value
5861 */
5862 SKIP_BLANKS;
5863 if (RAW == '=') {
5864 NEXT;
5865 SKIP_BLANKS;
5866 val = xmlParseAttValue(ctxt);
5867 ctxt->instate = XML_PARSER_CONTENT;
5868 } else {
5869 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5871 ctxt->sax->error(ctxt->userData,
5872 "Specification mandate value for attribute %s\n", name);
5873 ctxt->wellFormed = 0;
5874 ctxt->disableSAX = 1;
5875 xmlFree(name);
5876 return(NULL);
5877 }
5878
5879 /*
5880 * Check that xml:lang conforms to the specification
5881 * No more registered as an error, just generate a warning now
5882 * since this was deprecated in XML second edition
5883 */
5884 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5885 if (!xmlCheckLanguageID(val)) {
5886 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5887 ctxt->sax->warning(ctxt->userData,
5888 "Malformed value for xml:lang : %s\n", val);
5889 }
5890 }
5891
5892 /*
5893 * Check that xml:space conforms to the specification
5894 */
5895 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5896 if (xmlStrEqual(val, BAD_CAST "default"))
5897 *(ctxt->space) = 0;
5898 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5899 *(ctxt->space) = 1;
5900 else {
5901 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5903 ctxt->sax->error(ctxt->userData,
5904"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5905 val);
5906 ctxt->wellFormed = 0;
5907 ctxt->disableSAX = 1;
5908 }
5909 }
5910
5911 *value = val;
5912 return(name);
5913}
5914
5915/**
5916 * xmlParseStartTag:
5917 * @ctxt: an XML parser context
5918 *
5919 * parse a start of tag either for rule element or
5920 * EmptyElement. In both case we don't parse the tag closing chars.
5921 *
5922 * [40] STag ::= '<' Name (S Attribute)* S? '>'
5923 *
5924 * [ WFC: Unique Att Spec ]
5925 * No attribute name may appear more than once in the same start-tag or
5926 * empty-element tag.
5927 *
5928 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5929 *
5930 * [ WFC: Unique Att Spec ]
5931 * No attribute name may appear more than once in the same start-tag or
5932 * empty-element tag.
5933 *
5934 * With namespace:
5935 *
5936 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5937 *
5938 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
5939 *
5940 * Returns the element name parsed
5941 */
5942
5943xmlChar *
5944xmlParseStartTag(xmlParserCtxtPtr ctxt) {
5945 xmlChar *name;
5946 xmlChar *attname;
5947 xmlChar *attvalue;
5948 const xmlChar **atts = NULL;
5949 int nbatts = 0;
5950 int maxatts = 0;
5951 int i;
5952
5953 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00005954 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00005955
5956 name = xmlParseName(ctxt);
5957 if (name == NULL) {
5958 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5960 ctxt->sax->error(ctxt->userData,
5961 "xmlParseStartTag: invalid element name\n");
5962 ctxt->wellFormed = 0;
5963 ctxt->disableSAX = 1;
5964 return(NULL);
5965 }
5966
5967 /*
5968 * Now parse the attributes, it ends up with the ending
5969 *
5970 * (S Attribute)* S?
5971 */
5972 SKIP_BLANKS;
5973 GROW;
5974
Daniel Veillard21a0f912001-02-25 19:54:14 +00005975 while ((RAW != '>') &&
5976 ((RAW != '/') || (NXT(1) != '>')) &&
5977 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005978 const xmlChar *q = CUR_PTR;
5979 int cons = ctxt->input->consumed;
5980
5981 attname = xmlParseAttribute(ctxt, &attvalue);
5982 if ((attname != NULL) && (attvalue != NULL)) {
5983 /*
5984 * [ WFC: Unique Att Spec ]
5985 * No attribute name may appear more than once in the same
5986 * start-tag or empty-element tag.
5987 */
5988 for (i = 0; i < nbatts;i += 2) {
5989 if (xmlStrEqual(atts[i], attname)) {
5990 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
5991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5992 ctxt->sax->error(ctxt->userData,
5993 "Attribute %s redefined\n",
5994 attname);
5995 ctxt->wellFormed = 0;
5996 ctxt->disableSAX = 1;
5997 xmlFree(attname);
5998 xmlFree(attvalue);
5999 goto failed;
6000 }
6001 }
6002
6003 /*
6004 * Add the pair to atts
6005 */
6006 if (atts == NULL) {
6007 maxatts = 10;
6008 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6009 if (atts == NULL) {
6010 xmlGenericError(xmlGenericErrorContext,
6011 "malloc of %ld byte failed\n",
6012 maxatts * (long)sizeof(xmlChar *));
6013 return(NULL);
6014 }
6015 } else if (nbatts + 4 > maxatts) {
6016 maxatts *= 2;
6017 atts = (const xmlChar **) xmlRealloc((void *) atts,
6018 maxatts * sizeof(xmlChar *));
6019 if (atts == NULL) {
6020 xmlGenericError(xmlGenericErrorContext,
6021 "realloc of %ld byte failed\n",
6022 maxatts * (long)sizeof(xmlChar *));
6023 return(NULL);
6024 }
6025 }
6026 atts[nbatts++] = attname;
6027 atts[nbatts++] = attvalue;
6028 atts[nbatts] = NULL;
6029 atts[nbatts + 1] = NULL;
6030 } else {
6031 if (attname != NULL)
6032 xmlFree(attname);
6033 if (attvalue != NULL)
6034 xmlFree(attvalue);
6035 }
6036
6037failed:
6038
6039 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6040 break;
6041 if (!IS_BLANK(RAW)) {
6042 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6044 ctxt->sax->error(ctxt->userData,
6045 "attributes construct error\n");
6046 ctxt->wellFormed = 0;
6047 ctxt->disableSAX = 1;
6048 }
6049 SKIP_BLANKS;
6050 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6051 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6053 ctxt->sax->error(ctxt->userData,
6054 "xmlParseStartTag: problem parsing attributes\n");
6055 ctxt->wellFormed = 0;
6056 ctxt->disableSAX = 1;
6057 break;
6058 }
6059 GROW;
6060 }
6061
6062 /*
6063 * SAX: Start of Element !
6064 */
6065 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6066 (!ctxt->disableSAX))
6067 ctxt->sax->startElement(ctxt->userData, name, atts);
6068
6069 if (atts != NULL) {
6070 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6071 xmlFree((void *) atts);
6072 }
6073 return(name);
6074}
6075
6076/**
6077 * xmlParseEndTag:
6078 * @ctxt: an XML parser context
6079 *
6080 * parse an end of tag
6081 *
6082 * [42] ETag ::= '</' Name S? '>'
6083 *
6084 * With namespace
6085 *
6086 * [NS 9] ETag ::= '</' QName S? '>'
6087 */
6088
6089void
6090xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6091 xmlChar *name;
6092 xmlChar *oldname;
6093
6094 GROW;
6095 if ((RAW != '<') || (NXT(1) != '/')) {
6096 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6098 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6099 ctxt->wellFormed = 0;
6100 ctxt->disableSAX = 1;
6101 return;
6102 }
6103 SKIP(2);
6104
6105 name = xmlParseName(ctxt);
6106
6107 /*
6108 * We should definitely be at the ending "S? '>'" part
6109 */
6110 GROW;
6111 SKIP_BLANKS;
6112 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6113 ctxt->errNo = XML_ERR_GT_REQUIRED;
6114 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6115 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6116 ctxt->wellFormed = 0;
6117 ctxt->disableSAX = 1;
6118 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006119 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006120
6121 /*
6122 * [ WFC: Element Type Match ]
6123 * The Name in an element's end-tag must match the element type in the
6124 * start-tag.
6125 *
6126 */
6127 if ((name == NULL) || (ctxt->name == NULL) ||
6128 (!xmlStrEqual(name, ctxt->name))) {
6129 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6130 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6131 if ((name != NULL) && (ctxt->name != NULL)) {
6132 ctxt->sax->error(ctxt->userData,
6133 "Opening and ending tag mismatch: %s and %s\n",
6134 ctxt->name, name);
6135 } else if (ctxt->name != NULL) {
6136 ctxt->sax->error(ctxt->userData,
6137 "Ending tag eror for: %s\n", ctxt->name);
6138 } else {
6139 ctxt->sax->error(ctxt->userData,
6140 "Ending tag error: internal error ???\n");
6141 }
6142
6143 }
6144 ctxt->wellFormed = 0;
6145 ctxt->disableSAX = 1;
6146 }
6147
6148 /*
6149 * SAX: End of Tag
6150 */
6151 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6152 (!ctxt->disableSAX))
6153 ctxt->sax->endElement(ctxt->userData, name);
6154
6155 if (name != NULL)
6156 xmlFree(name);
6157 oldname = namePop(ctxt);
6158 spacePop(ctxt);
6159 if (oldname != NULL) {
6160#ifdef DEBUG_STACK
6161 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6162#endif
6163 xmlFree(oldname);
6164 }
6165 return;
6166}
6167
6168/**
6169 * xmlParseCDSect:
6170 * @ctxt: an XML parser context
6171 *
6172 * Parse escaped pure raw content.
6173 *
6174 * [18] CDSect ::= CDStart CData CDEnd
6175 *
6176 * [19] CDStart ::= '<![CDATA['
6177 *
6178 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6179 *
6180 * [21] CDEnd ::= ']]>'
6181 */
6182void
6183xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6184 xmlChar *buf = NULL;
6185 int len = 0;
6186 int size = XML_PARSER_BUFFER_SIZE;
6187 int r, rl;
6188 int s, sl;
6189 int cur, l;
6190 int count = 0;
6191
6192 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6193 (NXT(2) == '[') && (NXT(3) == 'C') &&
6194 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6195 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6196 (NXT(8) == '[')) {
6197 SKIP(9);
6198 } else
6199 return;
6200
6201 ctxt->instate = XML_PARSER_CDATA_SECTION;
6202 r = CUR_CHAR(rl);
6203 if (!IS_CHAR(r)) {
6204 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6206 ctxt->sax->error(ctxt->userData,
6207 "CData section not finished\n");
6208 ctxt->wellFormed = 0;
6209 ctxt->disableSAX = 1;
6210 ctxt->instate = XML_PARSER_CONTENT;
6211 return;
6212 }
6213 NEXTL(rl);
6214 s = CUR_CHAR(sl);
6215 if (!IS_CHAR(s)) {
6216 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6217 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6218 ctxt->sax->error(ctxt->userData,
6219 "CData section not finished\n");
6220 ctxt->wellFormed = 0;
6221 ctxt->disableSAX = 1;
6222 ctxt->instate = XML_PARSER_CONTENT;
6223 return;
6224 }
6225 NEXTL(sl);
6226 cur = CUR_CHAR(l);
6227 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6228 if (buf == NULL) {
6229 xmlGenericError(xmlGenericErrorContext,
6230 "malloc of %d byte failed\n", size);
6231 return;
6232 }
6233 while (IS_CHAR(cur) &&
6234 ((r != ']') || (s != ']') || (cur != '>'))) {
6235 if (len + 5 >= size) {
6236 size *= 2;
6237 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6238 if (buf == NULL) {
6239 xmlGenericError(xmlGenericErrorContext,
6240 "realloc of %d byte failed\n", size);
6241 return;
6242 }
6243 }
6244 COPY_BUF(rl,buf,len,r);
6245 r = s;
6246 rl = sl;
6247 s = cur;
6248 sl = l;
6249 count++;
6250 if (count > 50) {
6251 GROW;
6252 count = 0;
6253 }
6254 NEXTL(l);
6255 cur = CUR_CHAR(l);
6256 }
6257 buf[len] = 0;
6258 ctxt->instate = XML_PARSER_CONTENT;
6259 if (cur != '>') {
6260 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6262 ctxt->sax->error(ctxt->userData,
6263 "CData section not finished\n%.50s\n", buf);
6264 ctxt->wellFormed = 0;
6265 ctxt->disableSAX = 1;
6266 xmlFree(buf);
6267 return;
6268 }
6269 NEXTL(l);
6270
6271 /*
6272 * Ok the buffer is to be consumed as cdata.
6273 */
6274 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6275 if (ctxt->sax->cdataBlock != NULL)
6276 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6277 }
6278 xmlFree(buf);
6279}
6280
6281/**
6282 * xmlParseContent:
6283 * @ctxt: an XML parser context
6284 *
6285 * Parse a content:
6286 *
6287 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6288 */
6289
6290void
6291xmlParseContent(xmlParserCtxtPtr ctxt) {
6292 GROW;
6293 while (((RAW != 0) || (ctxt->token != 0)) &&
6294 ((RAW != '<') || (NXT(1) != '/'))) {
6295 const xmlChar *test = CUR_PTR;
6296 int cons = ctxt->input->consumed;
6297 xmlChar tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006298 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006299
6300 /*
6301 * Handle possible processed charrefs.
6302 */
6303 if (ctxt->token != 0) {
6304 xmlParseCharData(ctxt, 0);
6305 }
6306 /*
6307 * First case : a Processing Instruction.
6308 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006309 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006310 xmlParsePI(ctxt);
6311 }
6312
6313 /*
6314 * Second case : a CDSection
6315 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006316 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006317 (NXT(2) == '[') && (NXT(3) == 'C') &&
6318 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6319 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6320 (NXT(8) == '[')) {
6321 xmlParseCDSect(ctxt);
6322 }
6323
6324 /*
6325 * Third case : a comment
6326 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006327 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006328 (NXT(2) == '-') && (NXT(3) == '-')) {
6329 xmlParseComment(ctxt);
6330 ctxt->instate = XML_PARSER_CONTENT;
6331 }
6332
6333 /*
6334 * Fourth case : a sub-element.
6335 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006336 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006337 xmlParseElement(ctxt);
6338 }
6339
6340 /*
6341 * Fifth case : a reference. If if has not been resolved,
6342 * parsing returns it's Name, create the node
6343 */
6344
Daniel Veillard21a0f912001-02-25 19:54:14 +00006345 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006346 xmlParseReference(ctxt);
6347 }
6348
6349 /*
6350 * Last case, text. Note that References are handled directly.
6351 */
6352 else {
6353 xmlParseCharData(ctxt, 0);
6354 }
6355
6356 GROW;
6357 /*
6358 * Pop-up of finished entities.
6359 */
6360 while ((RAW == 0) && (ctxt->inputNr > 1))
6361 xmlPopInput(ctxt);
6362 SHRINK;
6363
6364 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6365 (tok == ctxt->token)) {
6366 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6367 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6368 ctxt->sax->error(ctxt->userData,
6369 "detected an error in element content\n");
6370 ctxt->wellFormed = 0;
6371 ctxt->disableSAX = 1;
6372 ctxt->instate = XML_PARSER_EOF;
6373 break;
6374 }
6375 }
6376}
6377
6378/**
6379 * xmlParseElement:
6380 * @ctxt: an XML parser context
6381 *
6382 * parse an XML element, this is highly recursive
6383 *
6384 * [39] element ::= EmptyElemTag | STag content ETag
6385 *
6386 * [ WFC: Element Type Match ]
6387 * The Name in an element's end-tag must match the element type in the
6388 * start-tag.
6389 *
6390 * [ VC: Element Valid ]
6391 * An element is valid if there is a declaration matching elementdecl
6392 * where the Name matches the element type and one of the following holds:
6393 * - The declaration matches EMPTY and the element has no content.
6394 * - The declaration matches children and the sequence of child elements
6395 * belongs to the language generated by the regular expression in the
6396 * content model, with optional white space (characters matching the
6397 * nonterminal S) between each pair of child elements.
6398 * - The declaration matches Mixed and the content consists of character
6399 * data and child elements whose types match names in the content model.
6400 * - The declaration matches ANY, and the types of any child elements have
6401 * been declared.
6402 */
6403
6404void
6405xmlParseElement(xmlParserCtxtPtr ctxt) {
6406 const xmlChar *openTag = CUR_PTR;
6407 xmlChar *name;
6408 xmlChar *oldname;
6409 xmlParserNodeInfo node_info;
6410 xmlNodePtr ret;
6411
6412 /* Capture start position */
6413 if (ctxt->record_info) {
6414 node_info.begin_pos = ctxt->input->consumed +
6415 (CUR_PTR - ctxt->input->base);
6416 node_info.begin_line = ctxt->input->line;
6417 }
6418
6419 if (ctxt->spaceNr == 0)
6420 spacePush(ctxt, -1);
6421 else
6422 spacePush(ctxt, *ctxt->space);
6423
6424 name = xmlParseStartTag(ctxt);
6425 if (name == NULL) {
6426 spacePop(ctxt);
6427 return;
6428 }
6429 namePush(ctxt, name);
6430 ret = ctxt->node;
6431
6432 /*
6433 * [ VC: Root Element Type ]
6434 * The Name in the document type declaration must match the element
6435 * type of the root element.
6436 */
6437 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6438 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6439 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6440
6441 /*
6442 * Check for an Empty Element.
6443 */
6444 if ((RAW == '/') && (NXT(1) == '>')) {
6445 SKIP(2);
6446 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6447 (!ctxt->disableSAX))
6448 ctxt->sax->endElement(ctxt->userData, name);
6449 oldname = namePop(ctxt);
6450 spacePop(ctxt);
6451 if (oldname != NULL) {
6452#ifdef DEBUG_STACK
6453 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6454#endif
6455 xmlFree(oldname);
6456 }
6457 if ( ret != NULL && ctxt->record_info ) {
6458 node_info.end_pos = ctxt->input->consumed +
6459 (CUR_PTR - ctxt->input->base);
6460 node_info.end_line = ctxt->input->line;
6461 node_info.node = ret;
6462 xmlParserAddNodeInfo(ctxt, &node_info);
6463 }
6464 return;
6465 }
6466 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006467 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006468 } else {
6469 ctxt->errNo = XML_ERR_GT_REQUIRED;
6470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6471 ctxt->sax->error(ctxt->userData,
6472 "Couldn't find end of Start Tag\n%.30s\n",
6473 openTag);
6474 ctxt->wellFormed = 0;
6475 ctxt->disableSAX = 1;
6476
6477 /*
6478 * end of parsing of this node.
6479 */
6480 nodePop(ctxt);
6481 oldname = namePop(ctxt);
6482 spacePop(ctxt);
6483 if (oldname != NULL) {
6484#ifdef DEBUG_STACK
6485 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6486#endif
6487 xmlFree(oldname);
6488 }
6489
6490 /*
6491 * Capture end position and add node
6492 */
6493 if ( ret != NULL && ctxt->record_info ) {
6494 node_info.end_pos = ctxt->input->consumed +
6495 (CUR_PTR - ctxt->input->base);
6496 node_info.end_line = ctxt->input->line;
6497 node_info.node = ret;
6498 xmlParserAddNodeInfo(ctxt, &node_info);
6499 }
6500 return;
6501 }
6502
6503 /*
6504 * Parse the content of the element:
6505 */
6506 xmlParseContent(ctxt);
6507 if (!IS_CHAR(RAW)) {
6508 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6510 ctxt->sax->error(ctxt->userData,
6511 "Premature end of data in tag %.30s\n", openTag);
6512 ctxt->wellFormed = 0;
6513 ctxt->disableSAX = 1;
6514
6515 /*
6516 * end of parsing of this node.
6517 */
6518 nodePop(ctxt);
6519 oldname = namePop(ctxt);
6520 spacePop(ctxt);
6521 if (oldname != NULL) {
6522#ifdef DEBUG_STACK
6523 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6524#endif
6525 xmlFree(oldname);
6526 }
6527 return;
6528 }
6529
6530 /*
6531 * parse the end of tag: '</' should be here.
6532 */
6533 xmlParseEndTag(ctxt);
6534
6535 /*
6536 * Capture end position and add node
6537 */
6538 if ( ret != NULL && ctxt->record_info ) {
6539 node_info.end_pos = ctxt->input->consumed +
6540 (CUR_PTR - ctxt->input->base);
6541 node_info.end_line = ctxt->input->line;
6542 node_info.node = ret;
6543 xmlParserAddNodeInfo(ctxt, &node_info);
6544 }
6545}
6546
6547/**
6548 * xmlParseVersionNum:
6549 * @ctxt: an XML parser context
6550 *
6551 * parse the XML version value.
6552 *
6553 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6554 *
6555 * Returns the string giving the XML version number, or NULL
6556 */
6557xmlChar *
6558xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6559 xmlChar *buf = NULL;
6560 int len = 0;
6561 int size = 10;
6562 xmlChar cur;
6563
6564 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6565 if (buf == NULL) {
6566 xmlGenericError(xmlGenericErrorContext,
6567 "malloc of %d byte failed\n", size);
6568 return(NULL);
6569 }
6570 cur = CUR;
6571 while (((cur >= 'a') && (cur <= 'z')) ||
6572 ((cur >= 'A') && (cur <= 'Z')) ||
6573 ((cur >= '0') && (cur <= '9')) ||
6574 (cur == '_') || (cur == '.') ||
6575 (cur == ':') || (cur == '-')) {
6576 if (len + 1 >= size) {
6577 size *= 2;
6578 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6579 if (buf == NULL) {
6580 xmlGenericError(xmlGenericErrorContext,
6581 "realloc of %d byte failed\n", size);
6582 return(NULL);
6583 }
6584 }
6585 buf[len++] = cur;
6586 NEXT;
6587 cur=CUR;
6588 }
6589 buf[len] = 0;
6590 return(buf);
6591}
6592
6593/**
6594 * xmlParseVersionInfo:
6595 * @ctxt: an XML parser context
6596 *
6597 * parse the XML version.
6598 *
6599 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6600 *
6601 * [25] Eq ::= S? '=' S?
6602 *
6603 * Returns the version string, e.g. "1.0"
6604 */
6605
6606xmlChar *
6607xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6608 xmlChar *version = NULL;
6609 const xmlChar *q;
6610
6611 if ((RAW == 'v') && (NXT(1) == 'e') &&
6612 (NXT(2) == 'r') && (NXT(3) == 's') &&
6613 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6614 (NXT(6) == 'n')) {
6615 SKIP(7);
6616 SKIP_BLANKS;
6617 if (RAW != '=') {
6618 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6620 ctxt->sax->error(ctxt->userData,
6621 "xmlParseVersionInfo : expected '='\n");
6622 ctxt->wellFormed = 0;
6623 ctxt->disableSAX = 1;
6624 return(NULL);
6625 }
6626 NEXT;
6627 SKIP_BLANKS;
6628 if (RAW == '"') {
6629 NEXT;
6630 q = CUR_PTR;
6631 version = xmlParseVersionNum(ctxt);
6632 if (RAW != '"') {
6633 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6635 ctxt->sax->error(ctxt->userData,
6636 "String not closed\n%.50s\n", q);
6637 ctxt->wellFormed = 0;
6638 ctxt->disableSAX = 1;
6639 } else
6640 NEXT;
6641 } else if (RAW == '\''){
6642 NEXT;
6643 q = CUR_PTR;
6644 version = xmlParseVersionNum(ctxt);
6645 if (RAW != '\'') {
6646 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6648 ctxt->sax->error(ctxt->userData,
6649 "String not closed\n%.50s\n", q);
6650 ctxt->wellFormed = 0;
6651 ctxt->disableSAX = 1;
6652 } else
6653 NEXT;
6654 } else {
6655 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6657 ctxt->sax->error(ctxt->userData,
6658 "xmlParseVersionInfo : expected ' or \"\n");
6659 ctxt->wellFormed = 0;
6660 ctxt->disableSAX = 1;
6661 }
6662 }
6663 return(version);
6664}
6665
6666/**
6667 * xmlParseEncName:
6668 * @ctxt: an XML parser context
6669 *
6670 * parse the XML encoding name
6671 *
6672 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6673 *
6674 * Returns the encoding name value or NULL
6675 */
6676xmlChar *
6677xmlParseEncName(xmlParserCtxtPtr ctxt) {
6678 xmlChar *buf = NULL;
6679 int len = 0;
6680 int size = 10;
6681 xmlChar cur;
6682
6683 cur = CUR;
6684 if (((cur >= 'a') && (cur <= 'z')) ||
6685 ((cur >= 'A') && (cur <= 'Z'))) {
6686 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6687 if (buf == NULL) {
6688 xmlGenericError(xmlGenericErrorContext,
6689 "malloc of %d byte failed\n", size);
6690 return(NULL);
6691 }
6692
6693 buf[len++] = cur;
6694 NEXT;
6695 cur = CUR;
6696 while (((cur >= 'a') && (cur <= 'z')) ||
6697 ((cur >= 'A') && (cur <= 'Z')) ||
6698 ((cur >= '0') && (cur <= '9')) ||
6699 (cur == '.') || (cur == '_') ||
6700 (cur == '-')) {
6701 if (len + 1 >= size) {
6702 size *= 2;
6703 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6704 if (buf == NULL) {
6705 xmlGenericError(xmlGenericErrorContext,
6706 "realloc of %d byte failed\n", size);
6707 return(NULL);
6708 }
6709 }
6710 buf[len++] = cur;
6711 NEXT;
6712 cur = CUR;
6713 if (cur == 0) {
6714 SHRINK;
6715 GROW;
6716 cur = CUR;
6717 }
6718 }
6719 buf[len] = 0;
6720 } else {
6721 ctxt->errNo = XML_ERR_ENCODING_NAME;
6722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6723 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6724 ctxt->wellFormed = 0;
6725 ctxt->disableSAX = 1;
6726 }
6727 return(buf);
6728}
6729
6730/**
6731 * xmlParseEncodingDecl:
6732 * @ctxt: an XML parser context
6733 *
6734 * parse the XML encoding declaration
6735 *
6736 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6737 *
6738 * this setups the conversion filters.
6739 *
6740 * Returns the encoding value or NULL
6741 */
6742
6743xmlChar *
6744xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6745 xmlChar *encoding = NULL;
6746 const xmlChar *q;
6747
6748 SKIP_BLANKS;
6749 if ((RAW == 'e') && (NXT(1) == 'n') &&
6750 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6751 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6752 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6753 SKIP(8);
6754 SKIP_BLANKS;
6755 if (RAW != '=') {
6756 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6758 ctxt->sax->error(ctxt->userData,
6759 "xmlParseEncodingDecl : expected '='\n");
6760 ctxt->wellFormed = 0;
6761 ctxt->disableSAX = 1;
6762 return(NULL);
6763 }
6764 NEXT;
6765 SKIP_BLANKS;
6766 if (RAW == '"') {
6767 NEXT;
6768 q = CUR_PTR;
6769 encoding = xmlParseEncName(ctxt);
6770 if (RAW != '"') {
6771 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6773 ctxt->sax->error(ctxt->userData,
6774 "String not closed\n%.50s\n", q);
6775 ctxt->wellFormed = 0;
6776 ctxt->disableSAX = 1;
6777 } else
6778 NEXT;
6779 } else if (RAW == '\''){
6780 NEXT;
6781 q = CUR_PTR;
6782 encoding = xmlParseEncName(ctxt);
6783 if (RAW != '\'') {
6784 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6786 ctxt->sax->error(ctxt->userData,
6787 "String not closed\n%.50s\n", q);
6788 ctxt->wellFormed = 0;
6789 ctxt->disableSAX = 1;
6790 } else
6791 NEXT;
6792 } else if (RAW == '"'){
6793 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6795 ctxt->sax->error(ctxt->userData,
6796 "xmlParseEncodingDecl : expected ' or \"\n");
6797 ctxt->wellFormed = 0;
6798 ctxt->disableSAX = 1;
6799 }
6800 if (encoding != NULL) {
6801 xmlCharEncoding enc;
6802 xmlCharEncodingHandlerPtr handler;
6803
6804 if (ctxt->input->encoding != NULL)
6805 xmlFree((xmlChar *) ctxt->input->encoding);
6806 ctxt->input->encoding = encoding;
6807
6808 enc = xmlParseCharEncoding((const char *) encoding);
6809 /*
6810 * registered set of known encodings
6811 */
6812 if (enc != XML_CHAR_ENCODING_ERROR) {
6813 xmlSwitchEncoding(ctxt, enc);
6814 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6815 xmlFree(encoding);
6816 return(NULL);
6817 }
6818 } else {
6819 /*
6820 * fallback for unknown encodings
6821 */
6822 handler = xmlFindCharEncodingHandler((const char *) encoding);
6823 if (handler != NULL) {
6824 xmlSwitchToEncoding(ctxt, handler);
6825 } else {
6826 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6828 ctxt->sax->error(ctxt->userData,
6829 "Unsupported encoding %s\n", encoding);
6830 return(NULL);
6831 }
6832 }
6833 }
6834 }
6835 return(encoding);
6836}
6837
6838/**
6839 * xmlParseSDDecl:
6840 * @ctxt: an XML parser context
6841 *
6842 * parse the XML standalone declaration
6843 *
6844 * [32] SDDecl ::= S 'standalone' Eq
6845 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6846 *
6847 * [ VC: Standalone Document Declaration ]
6848 * TODO The standalone document declaration must have the value "no"
6849 * if any external markup declarations contain declarations of:
6850 * - attributes with default values, if elements to which these
6851 * attributes apply appear in the document without specifications
6852 * of values for these attributes, or
6853 * - entities (other than amp, lt, gt, apos, quot), if references
6854 * to those entities appear in the document, or
6855 * - attributes with values subject to normalization, where the
6856 * attribute appears in the document with a value which will change
6857 * as a result of normalization, or
6858 * - element types with element content, if white space occurs directly
6859 * within any instance of those types.
6860 *
6861 * Returns 1 if standalone, 0 otherwise
6862 */
6863
6864int
6865xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6866 int standalone = -1;
6867
6868 SKIP_BLANKS;
6869 if ((RAW == 's') && (NXT(1) == 't') &&
6870 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6871 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6872 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6873 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6874 SKIP(10);
6875 SKIP_BLANKS;
6876 if (RAW != '=') {
6877 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6878 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6879 ctxt->sax->error(ctxt->userData,
6880 "XML standalone declaration : expected '='\n");
6881 ctxt->wellFormed = 0;
6882 ctxt->disableSAX = 1;
6883 return(standalone);
6884 }
6885 NEXT;
6886 SKIP_BLANKS;
6887 if (RAW == '\''){
6888 NEXT;
6889 if ((RAW == 'n') && (NXT(1) == 'o')) {
6890 standalone = 0;
6891 SKIP(2);
6892 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6893 (NXT(2) == 's')) {
6894 standalone = 1;
6895 SKIP(3);
6896 } else {
6897 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6899 ctxt->sax->error(ctxt->userData,
6900 "standalone accepts only 'yes' or 'no'\n");
6901 ctxt->wellFormed = 0;
6902 ctxt->disableSAX = 1;
6903 }
6904 if (RAW != '\'') {
6905 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6907 ctxt->sax->error(ctxt->userData, "String not closed\n");
6908 ctxt->wellFormed = 0;
6909 ctxt->disableSAX = 1;
6910 } else
6911 NEXT;
6912 } else if (RAW == '"'){
6913 NEXT;
6914 if ((RAW == 'n') && (NXT(1) == 'o')) {
6915 standalone = 0;
6916 SKIP(2);
6917 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6918 (NXT(2) == 's')) {
6919 standalone = 1;
6920 SKIP(3);
6921 } else {
6922 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6924 ctxt->sax->error(ctxt->userData,
6925 "standalone accepts only 'yes' or 'no'\n");
6926 ctxt->wellFormed = 0;
6927 ctxt->disableSAX = 1;
6928 }
6929 if (RAW != '"') {
6930 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6932 ctxt->sax->error(ctxt->userData, "String not closed\n");
6933 ctxt->wellFormed = 0;
6934 ctxt->disableSAX = 1;
6935 } else
6936 NEXT;
6937 } else {
6938 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6940 ctxt->sax->error(ctxt->userData,
6941 "Standalone value not found\n");
6942 ctxt->wellFormed = 0;
6943 ctxt->disableSAX = 1;
6944 }
6945 }
6946 return(standalone);
6947}
6948
6949/**
6950 * xmlParseXMLDecl:
6951 * @ctxt: an XML parser context
6952 *
6953 * parse an XML declaration header
6954 *
6955 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
6956 */
6957
6958void
6959xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
6960 xmlChar *version;
6961
6962 /*
6963 * We know that '<?xml' is here.
6964 */
6965 SKIP(5);
6966
6967 if (!IS_BLANK(RAW)) {
6968 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6969 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6970 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
6971 ctxt->wellFormed = 0;
6972 ctxt->disableSAX = 1;
6973 }
6974 SKIP_BLANKS;
6975
6976 /*
6977 * We should have the VersionInfo here.
6978 */
6979 version = xmlParseVersionInfo(ctxt);
6980 if (version == NULL)
6981 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6982 ctxt->version = xmlStrdup(version);
6983 xmlFree(version);
6984
6985 /*
6986 * We may have the encoding declaration
6987 */
6988 if (!IS_BLANK(RAW)) {
6989 if ((RAW == '?') && (NXT(1) == '>')) {
6990 SKIP(2);
6991 return;
6992 }
6993 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6995 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
6996 ctxt->wellFormed = 0;
6997 ctxt->disableSAX = 1;
6998 }
6999 xmlParseEncodingDecl(ctxt);
7000 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7001 /*
7002 * The XML REC instructs us to stop parsing right here
7003 */
7004 return;
7005 }
7006
7007 /*
7008 * We may have the standalone status.
7009 */
7010 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7011 if ((RAW == '?') && (NXT(1) == '>')) {
7012 SKIP(2);
7013 return;
7014 }
7015 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7017 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7018 ctxt->wellFormed = 0;
7019 ctxt->disableSAX = 1;
7020 }
7021 SKIP_BLANKS;
7022 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7023
7024 SKIP_BLANKS;
7025 if ((RAW == '?') && (NXT(1) == '>')) {
7026 SKIP(2);
7027 } else if (RAW == '>') {
7028 /* Deprecated old WD ... */
7029 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7030 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7031 ctxt->sax->error(ctxt->userData,
7032 "XML declaration must end-up with '?>'\n");
7033 ctxt->wellFormed = 0;
7034 ctxt->disableSAX = 1;
7035 NEXT;
7036 } else {
7037 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7039 ctxt->sax->error(ctxt->userData,
7040 "parsing XML declaration: '?>' expected\n");
7041 ctxt->wellFormed = 0;
7042 ctxt->disableSAX = 1;
7043 MOVETO_ENDTAG(CUR_PTR);
7044 NEXT;
7045 }
7046}
7047
7048/**
7049 * xmlParseMisc:
7050 * @ctxt: an XML parser context
7051 *
7052 * parse an XML Misc* optionnal field.
7053 *
7054 * [27] Misc ::= Comment | PI | S
7055 */
7056
7057void
7058xmlParseMisc(xmlParserCtxtPtr ctxt) {
7059 while (((RAW == '<') && (NXT(1) == '?')) ||
7060 ((RAW == '<') && (NXT(1) == '!') &&
7061 (NXT(2) == '-') && (NXT(3) == '-')) ||
7062 IS_BLANK(CUR)) {
7063 if ((RAW == '<') && (NXT(1) == '?')) {
7064 xmlParsePI(ctxt);
7065 } else if (IS_BLANK(CUR)) {
7066 NEXT;
7067 } else
7068 xmlParseComment(ctxt);
7069 }
7070}
7071
7072/**
7073 * xmlParseDocument:
7074 * @ctxt: an XML parser context
7075 *
7076 * parse an XML document (and build a tree if using the standard SAX
7077 * interface).
7078 *
7079 * [1] document ::= prolog element Misc*
7080 *
7081 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7082 *
7083 * Returns 0, -1 in case of error. the parser context is augmented
7084 * as a result of the parsing.
7085 */
7086
7087int
7088xmlParseDocument(xmlParserCtxtPtr ctxt) {
7089 xmlChar start[4];
7090 xmlCharEncoding enc;
7091
7092 xmlInitParser();
7093
7094 GROW;
7095
7096 /*
7097 * SAX: beginning of the document processing.
7098 */
7099 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7100 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7101
7102 /*
7103 * Get the 4 first bytes and decode the charset
7104 * if enc != XML_CHAR_ENCODING_NONE
7105 * plug some encoding conversion routines.
7106 */
7107 start[0] = RAW;
7108 start[1] = NXT(1);
7109 start[2] = NXT(2);
7110 start[3] = NXT(3);
7111 enc = xmlDetectCharEncoding(start, 4);
7112 if (enc != XML_CHAR_ENCODING_NONE) {
7113 xmlSwitchEncoding(ctxt, enc);
7114 }
7115
7116
7117 if (CUR == 0) {
7118 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7119 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7120 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7121 ctxt->wellFormed = 0;
7122 ctxt->disableSAX = 1;
7123 }
7124
7125 /*
7126 * Check for the XMLDecl in the Prolog.
7127 */
7128 GROW;
7129 if ((RAW == '<') && (NXT(1) == '?') &&
7130 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7131 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7132
7133 /*
7134 * Note that we will switch encoding on the fly.
7135 */
7136 xmlParseXMLDecl(ctxt);
7137 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7138 /*
7139 * The XML REC instructs us to stop parsing right here
7140 */
7141 return(-1);
7142 }
7143 ctxt->standalone = ctxt->input->standalone;
7144 SKIP_BLANKS;
7145 } else {
7146 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7147 }
7148 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7149 ctxt->sax->startDocument(ctxt->userData);
7150
7151 /*
7152 * The Misc part of the Prolog
7153 */
7154 GROW;
7155 xmlParseMisc(ctxt);
7156
7157 /*
7158 * Then possibly doc type declaration(s) and more Misc
7159 * (doctypedecl Misc*)?
7160 */
7161 GROW;
7162 if ((RAW == '<') && (NXT(1) == '!') &&
7163 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7164 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7165 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7166 (NXT(8) == 'E')) {
7167
7168 ctxt->inSubset = 1;
7169 xmlParseDocTypeDecl(ctxt);
7170 if (RAW == '[') {
7171 ctxt->instate = XML_PARSER_DTD;
7172 xmlParseInternalSubset(ctxt);
7173 }
7174
7175 /*
7176 * Create and update the external subset.
7177 */
7178 ctxt->inSubset = 2;
7179 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7180 (!ctxt->disableSAX))
7181 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7182 ctxt->extSubSystem, ctxt->extSubURI);
7183 ctxt->inSubset = 0;
7184
7185
7186 ctxt->instate = XML_PARSER_PROLOG;
7187 xmlParseMisc(ctxt);
7188 }
7189
7190 /*
7191 * Time to start parsing the tree itself
7192 */
7193 GROW;
7194 if (RAW != '<') {
7195 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7197 ctxt->sax->error(ctxt->userData,
7198 "Start tag expected, '<' not found\n");
7199 ctxt->wellFormed = 0;
7200 ctxt->disableSAX = 1;
7201 ctxt->instate = XML_PARSER_EOF;
7202 } else {
7203 ctxt->instate = XML_PARSER_CONTENT;
7204 xmlParseElement(ctxt);
7205 ctxt->instate = XML_PARSER_EPILOG;
7206
7207
7208 /*
7209 * The Misc part at the end
7210 */
7211 xmlParseMisc(ctxt);
7212
7213 if (RAW != 0) {
7214 ctxt->errNo = XML_ERR_DOCUMENT_END;
7215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7216 ctxt->sax->error(ctxt->userData,
7217 "Extra content at the end of the document\n");
7218 ctxt->wellFormed = 0;
7219 ctxt->disableSAX = 1;
7220 }
7221 ctxt->instate = XML_PARSER_EOF;
7222 }
7223
7224 /*
7225 * SAX: end of the document processing.
7226 */
7227 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7228 (!ctxt->disableSAX))
7229 ctxt->sax->endDocument(ctxt->userData);
7230
7231 if (! ctxt->wellFormed) return(-1);
7232 return(0);
7233}
7234
7235/**
7236 * xmlParseExtParsedEnt:
7237 * @ctxt: an XML parser context
7238 *
7239 * parse a genreral parsed entity
7240 * An external general parsed entity is well-formed if it matches the
7241 * production labeled extParsedEnt.
7242 *
7243 * [78] extParsedEnt ::= TextDecl? content
7244 *
7245 * Returns 0, -1 in case of error. the parser context is augmented
7246 * as a result of the parsing.
7247 */
7248
7249int
7250xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7251 xmlChar start[4];
7252 xmlCharEncoding enc;
7253
7254 xmlDefaultSAXHandlerInit();
7255
7256 GROW;
7257
7258 /*
7259 * SAX: beginning of the document processing.
7260 */
7261 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7262 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7263
7264 /*
7265 * Get the 4 first bytes and decode the charset
7266 * if enc != XML_CHAR_ENCODING_NONE
7267 * plug some encoding conversion routines.
7268 */
7269 start[0] = RAW;
7270 start[1] = NXT(1);
7271 start[2] = NXT(2);
7272 start[3] = NXT(3);
7273 enc = xmlDetectCharEncoding(start, 4);
7274 if (enc != XML_CHAR_ENCODING_NONE) {
7275 xmlSwitchEncoding(ctxt, enc);
7276 }
7277
7278
7279 if (CUR == 0) {
7280 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7281 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7282 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7283 ctxt->wellFormed = 0;
7284 ctxt->disableSAX = 1;
7285 }
7286
7287 /*
7288 * Check for the XMLDecl in the Prolog.
7289 */
7290 GROW;
7291 if ((RAW == '<') && (NXT(1) == '?') &&
7292 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7293 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7294
7295 /*
7296 * Note that we will switch encoding on the fly.
7297 */
7298 xmlParseXMLDecl(ctxt);
7299 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7300 /*
7301 * The XML REC instructs us to stop parsing right here
7302 */
7303 return(-1);
7304 }
7305 SKIP_BLANKS;
7306 } else {
7307 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7308 }
7309 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7310 ctxt->sax->startDocument(ctxt->userData);
7311
7312 /*
7313 * Doing validity checking on chunk doesn't make sense
7314 */
7315 ctxt->instate = XML_PARSER_CONTENT;
7316 ctxt->validate = 0;
7317 ctxt->loadsubset = 0;
7318 ctxt->depth = 0;
7319
7320 xmlParseContent(ctxt);
7321
7322 if ((RAW == '<') && (NXT(1) == '/')) {
7323 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7325 ctxt->sax->error(ctxt->userData,
7326 "chunk is not well balanced\n");
7327 ctxt->wellFormed = 0;
7328 ctxt->disableSAX = 1;
7329 } else if (RAW != 0) {
7330 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7332 ctxt->sax->error(ctxt->userData,
7333 "extra content at the end of well balanced chunk\n");
7334 ctxt->wellFormed = 0;
7335 ctxt->disableSAX = 1;
7336 }
7337
7338 /*
7339 * SAX: end of the document processing.
7340 */
7341 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7342 (!ctxt->disableSAX))
7343 ctxt->sax->endDocument(ctxt->userData);
7344
7345 if (! ctxt->wellFormed) return(-1);
7346 return(0);
7347}
7348
7349/************************************************************************
7350 * *
7351 * Progressive parsing interfaces *
7352 * *
7353 ************************************************************************/
7354
7355/**
7356 * xmlParseLookupSequence:
7357 * @ctxt: an XML parser context
7358 * @first: the first char to lookup
7359 * @next: the next char to lookup or zero
7360 * @third: the next char to lookup or zero
7361 *
7362 * Try to find if a sequence (first, next, third) or just (first next) or
7363 * (first) is available in the input stream.
7364 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7365 * to avoid rescanning sequences of bytes, it DOES change the state of the
7366 * parser, do not use liberally.
7367 *
7368 * Returns the index to the current parsing point if the full sequence
7369 * is available, -1 otherwise.
7370 */
7371int
7372xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7373 xmlChar next, xmlChar third) {
7374 int base, len;
7375 xmlParserInputPtr in;
7376 const xmlChar *buf;
7377
7378 in = ctxt->input;
7379 if (in == NULL) return(-1);
7380 base = in->cur - in->base;
7381 if (base < 0) return(-1);
7382 if (ctxt->checkIndex > base)
7383 base = ctxt->checkIndex;
7384 if (in->buf == NULL) {
7385 buf = in->base;
7386 len = in->length;
7387 } else {
7388 buf = in->buf->buffer->content;
7389 len = in->buf->buffer->use;
7390 }
7391 /* take into account the sequence length */
7392 if (third) len -= 2;
7393 else if (next) len --;
7394 for (;base < len;base++) {
7395 if (buf[base] == first) {
7396 if (third != 0) {
7397 if ((buf[base + 1] != next) ||
7398 (buf[base + 2] != third)) continue;
7399 } else if (next != 0) {
7400 if (buf[base + 1] != next) continue;
7401 }
7402 ctxt->checkIndex = 0;
7403#ifdef DEBUG_PUSH
7404 if (next == 0)
7405 xmlGenericError(xmlGenericErrorContext,
7406 "PP: lookup '%c' found at %d\n",
7407 first, base);
7408 else if (third == 0)
7409 xmlGenericError(xmlGenericErrorContext,
7410 "PP: lookup '%c%c' found at %d\n",
7411 first, next, base);
7412 else
7413 xmlGenericError(xmlGenericErrorContext,
7414 "PP: lookup '%c%c%c' found at %d\n",
7415 first, next, third, base);
7416#endif
7417 return(base - (in->cur - in->base));
7418 }
7419 }
7420 ctxt->checkIndex = base;
7421#ifdef DEBUG_PUSH
7422 if (next == 0)
7423 xmlGenericError(xmlGenericErrorContext,
7424 "PP: lookup '%c' failed\n", first);
7425 else if (third == 0)
7426 xmlGenericError(xmlGenericErrorContext,
7427 "PP: lookup '%c%c' failed\n", first, next);
7428 else
7429 xmlGenericError(xmlGenericErrorContext,
7430 "PP: lookup '%c%c%c' failed\n", first, next, third);
7431#endif
7432 return(-1);
7433}
7434
7435/**
7436 * xmlParseTryOrFinish:
7437 * @ctxt: an XML parser context
7438 * @terminate: last chunk indicator
7439 *
7440 * Try to progress on parsing
7441 *
7442 * Returns zero if no parsing was possible
7443 */
7444int
7445xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7446 int ret = 0;
7447 int avail;
7448 xmlChar cur, next;
7449
7450#ifdef DEBUG_PUSH
7451 switch (ctxt->instate) {
7452 case XML_PARSER_EOF:
7453 xmlGenericError(xmlGenericErrorContext,
7454 "PP: try EOF\n"); break;
7455 case XML_PARSER_START:
7456 xmlGenericError(xmlGenericErrorContext,
7457 "PP: try START\n"); break;
7458 case XML_PARSER_MISC:
7459 xmlGenericError(xmlGenericErrorContext,
7460 "PP: try MISC\n");break;
7461 case XML_PARSER_COMMENT:
7462 xmlGenericError(xmlGenericErrorContext,
7463 "PP: try COMMENT\n");break;
7464 case XML_PARSER_PROLOG:
7465 xmlGenericError(xmlGenericErrorContext,
7466 "PP: try PROLOG\n");break;
7467 case XML_PARSER_START_TAG:
7468 xmlGenericError(xmlGenericErrorContext,
7469 "PP: try START_TAG\n");break;
7470 case XML_PARSER_CONTENT:
7471 xmlGenericError(xmlGenericErrorContext,
7472 "PP: try CONTENT\n");break;
7473 case XML_PARSER_CDATA_SECTION:
7474 xmlGenericError(xmlGenericErrorContext,
7475 "PP: try CDATA_SECTION\n");break;
7476 case XML_PARSER_END_TAG:
7477 xmlGenericError(xmlGenericErrorContext,
7478 "PP: try END_TAG\n");break;
7479 case XML_PARSER_ENTITY_DECL:
7480 xmlGenericError(xmlGenericErrorContext,
7481 "PP: try ENTITY_DECL\n");break;
7482 case XML_PARSER_ENTITY_VALUE:
7483 xmlGenericError(xmlGenericErrorContext,
7484 "PP: try ENTITY_VALUE\n");break;
7485 case XML_PARSER_ATTRIBUTE_VALUE:
7486 xmlGenericError(xmlGenericErrorContext,
7487 "PP: try ATTRIBUTE_VALUE\n");break;
7488 case XML_PARSER_DTD:
7489 xmlGenericError(xmlGenericErrorContext,
7490 "PP: try DTD\n");break;
7491 case XML_PARSER_EPILOG:
7492 xmlGenericError(xmlGenericErrorContext,
7493 "PP: try EPILOG\n");break;
7494 case XML_PARSER_PI:
7495 xmlGenericError(xmlGenericErrorContext,
7496 "PP: try PI\n");break;
7497 case XML_PARSER_IGNORE:
7498 xmlGenericError(xmlGenericErrorContext,
7499 "PP: try IGNORE\n");break;
7500 }
7501#endif
7502
7503 while (1) {
7504 /*
7505 * Pop-up of finished entities.
7506 */
7507 while ((RAW == 0) && (ctxt->inputNr > 1))
7508 xmlPopInput(ctxt);
7509
7510 if (ctxt->input ==NULL) break;
7511 if (ctxt->input->buf == NULL)
7512 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7513 else
7514 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7515 if (avail < 1)
7516 goto done;
7517 switch (ctxt->instate) {
7518 case XML_PARSER_EOF:
7519 /*
7520 * Document parsing is done !
7521 */
7522 goto done;
7523 case XML_PARSER_START:
7524 /*
7525 * Very first chars read from the document flow.
7526 */
7527 cur = ctxt->input->cur[0];
7528 if (IS_BLANK(cur)) {
7529 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7530 ctxt->sax->setDocumentLocator(ctxt->userData,
7531 &xmlDefaultSAXLocator);
7532 ctxt->errNo = XML_ERR_DOCUMENT_START;
7533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7534 ctxt->sax->error(ctxt->userData,
7535 "Extra spaces at the beginning of the document are not allowed\n");
7536 ctxt->wellFormed = 0;
7537 ctxt->disableSAX = 1;
7538 SKIP_BLANKS;
7539 ret++;
7540 if (ctxt->input->buf == NULL)
7541 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7542 else
7543 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7544 }
7545 if (avail < 2)
7546 goto done;
7547
7548 cur = ctxt->input->cur[0];
7549 next = ctxt->input->cur[1];
7550 if (cur == 0) {
7551 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7552 ctxt->sax->setDocumentLocator(ctxt->userData,
7553 &xmlDefaultSAXLocator);
7554 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7556 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7557 ctxt->wellFormed = 0;
7558 ctxt->disableSAX = 1;
7559 ctxt->instate = XML_PARSER_EOF;
7560#ifdef DEBUG_PUSH
7561 xmlGenericError(xmlGenericErrorContext,
7562 "PP: entering EOF\n");
7563#endif
7564 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7565 ctxt->sax->endDocument(ctxt->userData);
7566 goto done;
7567 }
7568 if ((cur == '<') && (next == '?')) {
7569 /* PI or XML decl */
7570 if (avail < 5) return(ret);
7571 if ((!terminate) &&
7572 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7573 return(ret);
7574 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7575 ctxt->sax->setDocumentLocator(ctxt->userData,
7576 &xmlDefaultSAXLocator);
7577 if ((ctxt->input->cur[2] == 'x') &&
7578 (ctxt->input->cur[3] == 'm') &&
7579 (ctxt->input->cur[4] == 'l') &&
7580 (IS_BLANK(ctxt->input->cur[5]))) {
7581 ret += 5;
7582#ifdef DEBUG_PUSH
7583 xmlGenericError(xmlGenericErrorContext,
7584 "PP: Parsing XML Decl\n");
7585#endif
7586 xmlParseXMLDecl(ctxt);
7587 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7588 /*
7589 * The XML REC instructs us to stop parsing right
7590 * here
7591 */
7592 ctxt->instate = XML_PARSER_EOF;
7593 return(0);
7594 }
7595 ctxt->standalone = ctxt->input->standalone;
7596 if ((ctxt->encoding == NULL) &&
7597 (ctxt->input->encoding != NULL))
7598 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7599 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7600 (!ctxt->disableSAX))
7601 ctxt->sax->startDocument(ctxt->userData);
7602 ctxt->instate = XML_PARSER_MISC;
7603#ifdef DEBUG_PUSH
7604 xmlGenericError(xmlGenericErrorContext,
7605 "PP: entering MISC\n");
7606#endif
7607 } else {
7608 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7609 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7610 (!ctxt->disableSAX))
7611 ctxt->sax->startDocument(ctxt->userData);
7612 ctxt->instate = XML_PARSER_MISC;
7613#ifdef DEBUG_PUSH
7614 xmlGenericError(xmlGenericErrorContext,
7615 "PP: entering MISC\n");
7616#endif
7617 }
7618 } else {
7619 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7620 ctxt->sax->setDocumentLocator(ctxt->userData,
7621 &xmlDefaultSAXLocator);
7622 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7623 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7624 (!ctxt->disableSAX))
7625 ctxt->sax->startDocument(ctxt->userData);
7626 ctxt->instate = XML_PARSER_MISC;
7627#ifdef DEBUG_PUSH
7628 xmlGenericError(xmlGenericErrorContext,
7629 "PP: entering MISC\n");
7630#endif
7631 }
7632 break;
7633 case XML_PARSER_MISC:
7634 SKIP_BLANKS;
7635 if (ctxt->input->buf == NULL)
7636 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7637 else
7638 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7639 if (avail < 2)
7640 goto done;
7641 cur = ctxt->input->cur[0];
7642 next = ctxt->input->cur[1];
7643 if ((cur == '<') && (next == '?')) {
7644 if ((!terminate) &&
7645 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7646 goto done;
7647#ifdef DEBUG_PUSH
7648 xmlGenericError(xmlGenericErrorContext,
7649 "PP: Parsing PI\n");
7650#endif
7651 xmlParsePI(ctxt);
7652 } else if ((cur == '<') && (next == '!') &&
7653 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7654 if ((!terminate) &&
7655 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7656 goto done;
7657#ifdef DEBUG_PUSH
7658 xmlGenericError(xmlGenericErrorContext,
7659 "PP: Parsing Comment\n");
7660#endif
7661 xmlParseComment(ctxt);
7662 ctxt->instate = XML_PARSER_MISC;
7663 } else if ((cur == '<') && (next == '!') &&
7664 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7665 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7666 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7667 (ctxt->input->cur[8] == 'E')) {
7668 if ((!terminate) &&
7669 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7670 goto done;
7671#ifdef DEBUG_PUSH
7672 xmlGenericError(xmlGenericErrorContext,
7673 "PP: Parsing internal subset\n");
7674#endif
7675 ctxt->inSubset = 1;
7676 xmlParseDocTypeDecl(ctxt);
7677 if (RAW == '[') {
7678 ctxt->instate = XML_PARSER_DTD;
7679#ifdef DEBUG_PUSH
7680 xmlGenericError(xmlGenericErrorContext,
7681 "PP: entering DTD\n");
7682#endif
7683 } else {
7684 /*
7685 * Create and update the external subset.
7686 */
7687 ctxt->inSubset = 2;
7688 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7689 (ctxt->sax->externalSubset != NULL))
7690 ctxt->sax->externalSubset(ctxt->userData,
7691 ctxt->intSubName, ctxt->extSubSystem,
7692 ctxt->extSubURI);
7693 ctxt->inSubset = 0;
7694 ctxt->instate = XML_PARSER_PROLOG;
7695#ifdef DEBUG_PUSH
7696 xmlGenericError(xmlGenericErrorContext,
7697 "PP: entering PROLOG\n");
7698#endif
7699 }
7700 } else if ((cur == '<') && (next == '!') &&
7701 (avail < 9)) {
7702 goto done;
7703 } else {
7704 ctxt->instate = XML_PARSER_START_TAG;
7705#ifdef DEBUG_PUSH
7706 xmlGenericError(xmlGenericErrorContext,
7707 "PP: entering START_TAG\n");
7708#endif
7709 }
7710 break;
7711 case XML_PARSER_IGNORE:
7712 xmlGenericError(xmlGenericErrorContext,
7713 "PP: internal error, state == IGNORE");
7714 ctxt->instate = XML_PARSER_DTD;
7715#ifdef DEBUG_PUSH
7716 xmlGenericError(xmlGenericErrorContext,
7717 "PP: entering DTD\n");
7718#endif
7719 break;
7720 case XML_PARSER_PROLOG:
7721 SKIP_BLANKS;
7722 if (ctxt->input->buf == NULL)
7723 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7724 else
7725 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7726 if (avail < 2)
7727 goto done;
7728 cur = ctxt->input->cur[0];
7729 next = ctxt->input->cur[1];
7730 if ((cur == '<') && (next == '?')) {
7731 if ((!terminate) &&
7732 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7733 goto done;
7734#ifdef DEBUG_PUSH
7735 xmlGenericError(xmlGenericErrorContext,
7736 "PP: Parsing PI\n");
7737#endif
7738 xmlParsePI(ctxt);
7739 } else if ((cur == '<') && (next == '!') &&
7740 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7741 if ((!terminate) &&
7742 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7743 goto done;
7744#ifdef DEBUG_PUSH
7745 xmlGenericError(xmlGenericErrorContext,
7746 "PP: Parsing Comment\n");
7747#endif
7748 xmlParseComment(ctxt);
7749 ctxt->instate = XML_PARSER_PROLOG;
7750 } else if ((cur == '<') && (next == '!') &&
7751 (avail < 4)) {
7752 goto done;
7753 } else {
7754 ctxt->instate = XML_PARSER_START_TAG;
7755#ifdef DEBUG_PUSH
7756 xmlGenericError(xmlGenericErrorContext,
7757 "PP: entering START_TAG\n");
7758#endif
7759 }
7760 break;
7761 case XML_PARSER_EPILOG:
7762 SKIP_BLANKS;
7763 if (ctxt->input->buf == NULL)
7764 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7765 else
7766 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7767 if (avail < 2)
7768 goto done;
7769 cur = ctxt->input->cur[0];
7770 next = ctxt->input->cur[1];
7771 if ((cur == '<') && (next == '?')) {
7772 if ((!terminate) &&
7773 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7774 goto done;
7775#ifdef DEBUG_PUSH
7776 xmlGenericError(xmlGenericErrorContext,
7777 "PP: Parsing PI\n");
7778#endif
7779 xmlParsePI(ctxt);
7780 ctxt->instate = XML_PARSER_EPILOG;
7781 } else if ((cur == '<') && (next == '!') &&
7782 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7783 if ((!terminate) &&
7784 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7785 goto done;
7786#ifdef DEBUG_PUSH
7787 xmlGenericError(xmlGenericErrorContext,
7788 "PP: Parsing Comment\n");
7789#endif
7790 xmlParseComment(ctxt);
7791 ctxt->instate = XML_PARSER_EPILOG;
7792 } else if ((cur == '<') && (next == '!') &&
7793 (avail < 4)) {
7794 goto done;
7795 } else {
7796 ctxt->errNo = XML_ERR_DOCUMENT_END;
7797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7798 ctxt->sax->error(ctxt->userData,
7799 "Extra content at the end of the document\n");
7800 ctxt->wellFormed = 0;
7801 ctxt->disableSAX = 1;
7802 ctxt->instate = XML_PARSER_EOF;
7803#ifdef DEBUG_PUSH
7804 xmlGenericError(xmlGenericErrorContext,
7805 "PP: entering EOF\n");
7806#endif
7807 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7808 (!ctxt->disableSAX))
7809 ctxt->sax->endDocument(ctxt->userData);
7810 goto done;
7811 }
7812 break;
7813 case XML_PARSER_START_TAG: {
7814 xmlChar *name, *oldname;
7815
7816 if ((avail < 2) && (ctxt->inputNr == 1))
7817 goto done;
7818 cur = ctxt->input->cur[0];
7819 if (cur != '<') {
7820 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7821 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7822 ctxt->sax->error(ctxt->userData,
7823 "Start tag expect, '<' not found\n");
7824 ctxt->wellFormed = 0;
7825 ctxt->disableSAX = 1;
7826 ctxt->instate = XML_PARSER_EOF;
7827#ifdef DEBUG_PUSH
7828 xmlGenericError(xmlGenericErrorContext,
7829 "PP: entering EOF\n");
7830#endif
7831 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7832 (!ctxt->disableSAX))
7833 ctxt->sax->endDocument(ctxt->userData);
7834 goto done;
7835 }
7836 if ((!terminate) &&
7837 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7838 goto done;
7839 if (ctxt->spaceNr == 0)
7840 spacePush(ctxt, -1);
7841 else
7842 spacePush(ctxt, *ctxt->space);
7843 name = xmlParseStartTag(ctxt);
7844 if (name == NULL) {
7845 spacePop(ctxt);
7846 ctxt->instate = XML_PARSER_EOF;
7847#ifdef DEBUG_PUSH
7848 xmlGenericError(xmlGenericErrorContext,
7849 "PP: entering EOF\n");
7850#endif
7851 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7852 (!ctxt->disableSAX))
7853 ctxt->sax->endDocument(ctxt->userData);
7854 goto done;
7855 }
7856 namePush(ctxt, xmlStrdup(name));
7857
7858 /*
7859 * [ VC: Root Element Type ]
7860 * The Name in the document type declaration must match
7861 * the element type of the root element.
7862 */
7863 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7864 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7865 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7866
7867 /*
7868 * Check for an Empty Element.
7869 */
7870 if ((RAW == '/') && (NXT(1) == '>')) {
7871 SKIP(2);
7872 if ((ctxt->sax != NULL) &&
7873 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7874 ctxt->sax->endElement(ctxt->userData, name);
7875 xmlFree(name);
7876 oldname = namePop(ctxt);
7877 spacePop(ctxt);
7878 if (oldname != NULL) {
7879#ifdef DEBUG_STACK
7880 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7881#endif
7882 xmlFree(oldname);
7883 }
7884 if (ctxt->name == NULL) {
7885 ctxt->instate = XML_PARSER_EPILOG;
7886#ifdef DEBUG_PUSH
7887 xmlGenericError(xmlGenericErrorContext,
7888 "PP: entering EPILOG\n");
7889#endif
7890 } else {
7891 ctxt->instate = XML_PARSER_CONTENT;
7892#ifdef DEBUG_PUSH
7893 xmlGenericError(xmlGenericErrorContext,
7894 "PP: entering CONTENT\n");
7895#endif
7896 }
7897 break;
7898 }
7899 if (RAW == '>') {
7900 NEXT;
7901 } else {
7902 ctxt->errNo = XML_ERR_GT_REQUIRED;
7903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7904 ctxt->sax->error(ctxt->userData,
7905 "Couldn't find end of Start Tag %s\n",
7906 name);
7907 ctxt->wellFormed = 0;
7908 ctxt->disableSAX = 1;
7909
7910 /*
7911 * end of parsing of this node.
7912 */
7913 nodePop(ctxt);
7914 oldname = namePop(ctxt);
7915 spacePop(ctxt);
7916 if (oldname != NULL) {
7917#ifdef DEBUG_STACK
7918 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7919#endif
7920 xmlFree(oldname);
7921 }
7922 }
7923 xmlFree(name);
7924 ctxt->instate = XML_PARSER_CONTENT;
7925#ifdef DEBUG_PUSH
7926 xmlGenericError(xmlGenericErrorContext,
7927 "PP: entering CONTENT\n");
7928#endif
7929 break;
7930 }
7931 case XML_PARSER_CONTENT: {
7932 const xmlChar *test;
7933 int cons;
7934 xmlChar tok;
7935
7936 /*
7937 * Handle preparsed entities and charRef
7938 */
7939 if (ctxt->token != 0) {
7940 xmlChar cur[2] = { 0 , 0 } ;
7941
7942 cur[0] = (xmlChar) ctxt->token;
7943 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7944 (ctxt->sax->characters != NULL))
7945 ctxt->sax->characters(ctxt->userData, cur, 1);
7946 ctxt->token = 0;
7947 }
7948 if ((avail < 2) && (ctxt->inputNr == 1))
7949 goto done;
7950 cur = ctxt->input->cur[0];
7951 next = ctxt->input->cur[1];
7952
7953 test = CUR_PTR;
7954 cons = ctxt->input->consumed;
7955 tok = ctxt->token;
7956 if ((cur == '<') && (next == '?')) {
7957 if ((!terminate) &&
7958 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7959 goto done;
7960#ifdef DEBUG_PUSH
7961 xmlGenericError(xmlGenericErrorContext,
7962 "PP: Parsing PI\n");
7963#endif
7964 xmlParsePI(ctxt);
7965 } else if ((cur == '<') && (next == '!') &&
7966 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7967 if ((!terminate) &&
7968 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7969 goto done;
7970#ifdef DEBUG_PUSH
7971 xmlGenericError(xmlGenericErrorContext,
7972 "PP: Parsing Comment\n");
7973#endif
7974 xmlParseComment(ctxt);
7975 ctxt->instate = XML_PARSER_CONTENT;
7976 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7977 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7978 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7979 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7980 (ctxt->input->cur[8] == '[')) {
7981 SKIP(9);
7982 ctxt->instate = XML_PARSER_CDATA_SECTION;
7983#ifdef DEBUG_PUSH
7984 xmlGenericError(xmlGenericErrorContext,
7985 "PP: entering CDATA_SECTION\n");
7986#endif
7987 break;
7988 } else if ((cur == '<') && (next == '!') &&
7989 (avail < 9)) {
7990 goto done;
7991 } else if ((cur == '<') && (next == '/')) {
7992 ctxt->instate = XML_PARSER_END_TAG;
7993#ifdef DEBUG_PUSH
7994 xmlGenericError(xmlGenericErrorContext,
7995 "PP: entering END_TAG\n");
7996#endif
7997 break;
7998 } else if (cur == '<') {
7999 ctxt->instate = XML_PARSER_START_TAG;
8000#ifdef DEBUG_PUSH
8001 xmlGenericError(xmlGenericErrorContext,
8002 "PP: entering START_TAG\n");
8003#endif
8004 break;
8005 } else if (cur == '&') {
8006 if ((!terminate) &&
8007 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8008 goto done;
8009#ifdef DEBUG_PUSH
8010 xmlGenericError(xmlGenericErrorContext,
8011 "PP: Parsing Reference\n");
8012#endif
8013 xmlParseReference(ctxt);
8014 } else {
8015 /* TODO Avoid the extra copy, handle directly !!! */
8016 /*
8017 * Goal of the following test is:
8018 * - minimize calls to the SAX 'character' callback
8019 * when they are mergeable
8020 * - handle an problem for isBlank when we only parse
8021 * a sequence of blank chars and the next one is
8022 * not available to check against '<' presence.
8023 * - tries to homogenize the differences in SAX
8024 * callbacks beween the push and pull versions
8025 * of the parser.
8026 */
8027 if ((ctxt->inputNr == 1) &&
8028 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8029 if ((!terminate) &&
8030 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8031 goto done;
8032 }
8033 ctxt->checkIndex = 0;
8034#ifdef DEBUG_PUSH
8035 xmlGenericError(xmlGenericErrorContext,
8036 "PP: Parsing char data\n");
8037#endif
8038 xmlParseCharData(ctxt, 0);
8039 }
8040 /*
8041 * Pop-up of finished entities.
8042 */
8043 while ((RAW == 0) && (ctxt->inputNr > 1))
8044 xmlPopInput(ctxt);
8045 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8046 (tok == ctxt->token)) {
8047 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8049 ctxt->sax->error(ctxt->userData,
8050 "detected an error in element content\n");
8051 ctxt->wellFormed = 0;
8052 ctxt->disableSAX = 1;
8053 ctxt->instate = XML_PARSER_EOF;
8054 break;
8055 }
8056 break;
8057 }
8058 case XML_PARSER_CDATA_SECTION: {
8059 /*
8060 * The Push mode need to have the SAX callback for
8061 * cdataBlock merge back contiguous callbacks.
8062 */
8063 int base;
8064
8065 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8066 if (base < 0) {
8067 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8068 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8069 if (ctxt->sax->cdataBlock != NULL)
8070 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8071 XML_PARSER_BIG_BUFFER_SIZE);
8072 }
8073 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8074 ctxt->checkIndex = 0;
8075 }
8076 goto done;
8077 } else {
8078 if ((ctxt->sax != NULL) && (base > 0) &&
8079 (!ctxt->disableSAX)) {
8080 if (ctxt->sax->cdataBlock != NULL)
8081 ctxt->sax->cdataBlock(ctxt->userData,
8082 ctxt->input->cur, base);
8083 }
8084 SKIP(base + 3);
8085 ctxt->checkIndex = 0;
8086 ctxt->instate = XML_PARSER_CONTENT;
8087#ifdef DEBUG_PUSH
8088 xmlGenericError(xmlGenericErrorContext,
8089 "PP: entering CONTENT\n");
8090#endif
8091 }
8092 break;
8093 }
8094 case XML_PARSER_END_TAG:
8095 if (avail < 2)
8096 goto done;
8097 if ((!terminate) &&
8098 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8099 goto done;
8100 xmlParseEndTag(ctxt);
8101 if (ctxt->name == NULL) {
8102 ctxt->instate = XML_PARSER_EPILOG;
8103#ifdef DEBUG_PUSH
8104 xmlGenericError(xmlGenericErrorContext,
8105 "PP: entering EPILOG\n");
8106#endif
8107 } else {
8108 ctxt->instate = XML_PARSER_CONTENT;
8109#ifdef DEBUG_PUSH
8110 xmlGenericError(xmlGenericErrorContext,
8111 "PP: entering CONTENT\n");
8112#endif
8113 }
8114 break;
8115 case XML_PARSER_DTD: {
8116 /*
8117 * Sorry but progressive parsing of the internal subset
8118 * is not expected to be supported. We first check that
8119 * the full content of the internal subset is available and
8120 * the parsing is launched only at that point.
8121 * Internal subset ends up with "']' S? '>'" in an unescaped
8122 * section and not in a ']]>' sequence which are conditional
8123 * sections (whoever argued to keep that crap in XML deserve
8124 * a place in hell !).
8125 */
8126 int base, i;
8127 xmlChar *buf;
8128 xmlChar quote = 0;
8129
8130 base = ctxt->input->cur - ctxt->input->base;
8131 if (base < 0) return(0);
8132 if (ctxt->checkIndex > base)
8133 base = ctxt->checkIndex;
8134 buf = ctxt->input->buf->buffer->content;
8135 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8136 base++) {
8137 if (quote != 0) {
8138 if (buf[base] == quote)
8139 quote = 0;
8140 continue;
8141 }
8142 if (buf[base] == '"') {
8143 quote = '"';
8144 continue;
8145 }
8146 if (buf[base] == '\'') {
8147 quote = '\'';
8148 continue;
8149 }
8150 if (buf[base] == ']') {
8151 if ((unsigned int) base +1 >=
8152 ctxt->input->buf->buffer->use)
8153 break;
8154 if (buf[base + 1] == ']') {
8155 /* conditional crap, skip both ']' ! */
8156 base++;
8157 continue;
8158 }
8159 for (i = 0;
8160 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8161 i++) {
8162 if (buf[base + i] == '>')
8163 goto found_end_int_subset;
8164 }
8165 break;
8166 }
8167 }
8168 /*
8169 * We didn't found the end of the Internal subset
8170 */
8171 if (quote == 0)
8172 ctxt->checkIndex = base;
8173#ifdef DEBUG_PUSH
8174 if (next == 0)
8175 xmlGenericError(xmlGenericErrorContext,
8176 "PP: lookup of int subset end filed\n");
8177#endif
8178 goto done;
8179
8180found_end_int_subset:
8181 xmlParseInternalSubset(ctxt);
8182 ctxt->inSubset = 2;
8183 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8184 (ctxt->sax->externalSubset != NULL))
8185 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8186 ctxt->extSubSystem, ctxt->extSubURI);
8187 ctxt->inSubset = 0;
8188 ctxt->instate = XML_PARSER_PROLOG;
8189 ctxt->checkIndex = 0;
8190#ifdef DEBUG_PUSH
8191 xmlGenericError(xmlGenericErrorContext,
8192 "PP: entering PROLOG\n");
8193#endif
8194 break;
8195 }
8196 case XML_PARSER_COMMENT:
8197 xmlGenericError(xmlGenericErrorContext,
8198 "PP: internal error, state == COMMENT\n");
8199 ctxt->instate = XML_PARSER_CONTENT;
8200#ifdef DEBUG_PUSH
8201 xmlGenericError(xmlGenericErrorContext,
8202 "PP: entering CONTENT\n");
8203#endif
8204 break;
8205 case XML_PARSER_PI:
8206 xmlGenericError(xmlGenericErrorContext,
8207 "PP: internal error, state == PI\n");
8208 ctxt->instate = XML_PARSER_CONTENT;
8209#ifdef DEBUG_PUSH
8210 xmlGenericError(xmlGenericErrorContext,
8211 "PP: entering CONTENT\n");
8212#endif
8213 break;
8214 case XML_PARSER_ENTITY_DECL:
8215 xmlGenericError(xmlGenericErrorContext,
8216 "PP: internal error, state == ENTITY_DECL\n");
8217 ctxt->instate = XML_PARSER_DTD;
8218#ifdef DEBUG_PUSH
8219 xmlGenericError(xmlGenericErrorContext,
8220 "PP: entering DTD\n");
8221#endif
8222 break;
8223 case XML_PARSER_ENTITY_VALUE:
8224 xmlGenericError(xmlGenericErrorContext,
8225 "PP: internal error, state == ENTITY_VALUE\n");
8226 ctxt->instate = XML_PARSER_CONTENT;
8227#ifdef DEBUG_PUSH
8228 xmlGenericError(xmlGenericErrorContext,
8229 "PP: entering DTD\n");
8230#endif
8231 break;
8232 case XML_PARSER_ATTRIBUTE_VALUE:
8233 xmlGenericError(xmlGenericErrorContext,
8234 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8235 ctxt->instate = XML_PARSER_START_TAG;
8236#ifdef DEBUG_PUSH
8237 xmlGenericError(xmlGenericErrorContext,
8238 "PP: entering START_TAG\n");
8239#endif
8240 break;
8241 case XML_PARSER_SYSTEM_LITERAL:
8242 xmlGenericError(xmlGenericErrorContext,
8243 "PP: internal error, state == SYSTEM_LITERAL\n");
8244 ctxt->instate = XML_PARSER_START_TAG;
8245#ifdef DEBUG_PUSH
8246 xmlGenericError(xmlGenericErrorContext,
8247 "PP: entering START_TAG\n");
8248#endif
8249 break;
8250 }
8251 }
8252done:
8253#ifdef DEBUG_PUSH
8254 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8255#endif
8256 return(ret);
8257}
8258
8259/**
8260 * xmlParseTry:
8261 * @ctxt: an XML parser context
8262 *
8263 * Try to progress on parsing
8264 *
8265 * Returns zero if no parsing was possible
8266 */
8267int
8268xmlParseTry(xmlParserCtxtPtr ctxt) {
8269 return(xmlParseTryOrFinish(ctxt, 0));
8270}
8271
8272/**
8273 * xmlParseChunk:
8274 * @ctxt: an XML parser context
8275 * @chunk: an char array
8276 * @size: the size in byte of the chunk
8277 * @terminate: last chunk indicator
8278 *
8279 * Parse a Chunk of memory
8280 *
8281 * Returns zero if no error, the xmlParserErrors otherwise.
8282 */
8283int
8284xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8285 int terminate) {
8286 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8287 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8288 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8289 int cur = ctxt->input->cur - ctxt->input->base;
8290
8291 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8292 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8293 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008294 ctxt->input->end =
8295 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008296#ifdef DEBUG_PUSH
8297 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8298#endif
8299
8300 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8301 xmlParseTryOrFinish(ctxt, terminate);
8302 } else if (ctxt->instate != XML_PARSER_EOF) {
8303 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8304 xmlParserInputBufferPtr in = ctxt->input->buf;
8305 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8306 (in->raw != NULL)) {
8307 int nbchars;
8308
8309 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8310 if (nbchars < 0) {
8311 xmlGenericError(xmlGenericErrorContext,
8312 "xmlParseChunk: encoder error\n");
8313 return(XML_ERR_INVALID_ENCODING);
8314 }
8315 }
8316 }
8317 }
8318 xmlParseTryOrFinish(ctxt, terminate);
8319 if (terminate) {
8320 /*
8321 * Check for termination
8322 */
8323 if ((ctxt->instate != XML_PARSER_EOF) &&
8324 (ctxt->instate != XML_PARSER_EPILOG)) {
8325 ctxt->errNo = XML_ERR_DOCUMENT_END;
8326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8327 ctxt->sax->error(ctxt->userData,
8328 "Extra content at the end of the document\n");
8329 ctxt->wellFormed = 0;
8330 ctxt->disableSAX = 1;
8331 }
8332 if (ctxt->instate != XML_PARSER_EOF) {
8333 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8334 (!ctxt->disableSAX))
8335 ctxt->sax->endDocument(ctxt->userData);
8336 }
8337 ctxt->instate = XML_PARSER_EOF;
8338 }
8339 return((xmlParserErrors) ctxt->errNo);
8340}
8341
8342/************************************************************************
8343 * *
8344 * I/O front end functions to the parser *
8345 * *
8346 ************************************************************************/
8347
8348/**
8349 * xmlStopParser:
8350 * @ctxt: an XML parser context
8351 *
8352 * Blocks further parser processing
8353 */
8354void
8355xmlStopParser(xmlParserCtxtPtr ctxt) {
8356 ctxt->instate = XML_PARSER_EOF;
8357 if (ctxt->input != NULL)
8358 ctxt->input->cur = BAD_CAST"";
8359}
8360
8361/**
8362 * xmlCreatePushParserCtxt:
8363 * @sax: a SAX handler
8364 * @user_data: The user data returned on SAX callbacks
8365 * @chunk: a pointer to an array of chars
8366 * @size: number of chars in the array
8367 * @filename: an optional file name or URI
8368 *
8369 * Create a parser context for using the XML parser in push mode
8370 * To allow content encoding detection, @size should be >= 4
8371 * The value of @filename is used for fetching external entities
8372 * and error/warning reports.
8373 *
8374 * Returns the new parser context or NULL
8375 */
8376xmlParserCtxtPtr
8377xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8378 const char *chunk, int size, const char *filename) {
8379 xmlParserCtxtPtr ctxt;
8380 xmlParserInputPtr inputStream;
8381 xmlParserInputBufferPtr buf;
8382 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8383
8384 /*
8385 * plug some encoding conversion routines
8386 */
8387 if ((chunk != NULL) && (size >= 4))
8388 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8389
8390 buf = xmlAllocParserInputBuffer(enc);
8391 if (buf == NULL) return(NULL);
8392
8393 ctxt = xmlNewParserCtxt();
8394 if (ctxt == NULL) {
8395 xmlFree(buf);
8396 return(NULL);
8397 }
8398 if (sax != NULL) {
8399 if (ctxt->sax != &xmlDefaultSAXHandler)
8400 xmlFree(ctxt->sax);
8401 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8402 if (ctxt->sax == NULL) {
8403 xmlFree(buf);
8404 xmlFree(ctxt);
8405 return(NULL);
8406 }
8407 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8408 if (user_data != NULL)
8409 ctxt->userData = user_data;
8410 }
8411 if (filename == NULL) {
8412 ctxt->directory = NULL;
8413 } else {
8414 ctxt->directory = xmlParserGetDirectory(filename);
8415 }
8416
8417 inputStream = xmlNewInputStream(ctxt);
8418 if (inputStream == NULL) {
8419 xmlFreeParserCtxt(ctxt);
8420 return(NULL);
8421 }
8422
8423 if (filename == NULL)
8424 inputStream->filename = NULL;
8425 else
8426 inputStream->filename = xmlMemStrdup(filename);
8427 inputStream->buf = buf;
8428 inputStream->base = inputStream->buf->buffer->content;
8429 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008430 inputStream->end =
8431 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008432 if (enc != XML_CHAR_ENCODING_NONE) {
8433 xmlSwitchEncoding(ctxt, enc);
8434 }
8435
8436 inputPush(ctxt, inputStream);
8437
8438 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8439 (ctxt->input->buf != NULL)) {
8440 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8441#ifdef DEBUG_PUSH
8442 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8443#endif
8444 }
8445
8446 return(ctxt);
8447}
8448
8449/**
8450 * xmlCreateIOParserCtxt:
8451 * @sax: a SAX handler
8452 * @user_data: The user data returned on SAX callbacks
8453 * @ioread: an I/O read function
8454 * @ioclose: an I/O close function
8455 * @ioctx: an I/O handler
8456 * @enc: the charset encoding if known
8457 *
8458 * Create a parser context for using the XML parser with an existing
8459 * I/O stream
8460 *
8461 * Returns the new parser context or NULL
8462 */
8463xmlParserCtxtPtr
8464xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8465 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8466 void *ioctx, xmlCharEncoding enc) {
8467 xmlParserCtxtPtr ctxt;
8468 xmlParserInputPtr inputStream;
8469 xmlParserInputBufferPtr buf;
8470
8471 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8472 if (buf == NULL) return(NULL);
8473
8474 ctxt = xmlNewParserCtxt();
8475 if (ctxt == NULL) {
8476 xmlFree(buf);
8477 return(NULL);
8478 }
8479 if (sax != NULL) {
8480 if (ctxt->sax != &xmlDefaultSAXHandler)
8481 xmlFree(ctxt->sax);
8482 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8483 if (ctxt->sax == NULL) {
8484 xmlFree(buf);
8485 xmlFree(ctxt);
8486 return(NULL);
8487 }
8488 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8489 if (user_data != NULL)
8490 ctxt->userData = user_data;
8491 }
8492
8493 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8494 if (inputStream == NULL) {
8495 xmlFreeParserCtxt(ctxt);
8496 return(NULL);
8497 }
8498 inputPush(ctxt, inputStream);
8499
8500 return(ctxt);
8501}
8502
8503/************************************************************************
8504 * *
8505 * Front ends when parsing a Dtd *
8506 * *
8507 ************************************************************************/
8508
8509/**
8510 * xmlIOParseDTD:
8511 * @sax: the SAX handler block or NULL
8512 * @input: an Input Buffer
8513 * @enc: the charset encoding if known
8514 *
8515 * Load and parse a DTD
8516 *
8517 * Returns the resulting xmlDtdPtr or NULL in case of error.
8518 * @input will be freed at parsing end.
8519 */
8520
8521xmlDtdPtr
8522xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8523 xmlCharEncoding enc) {
8524 xmlDtdPtr ret = NULL;
8525 xmlParserCtxtPtr ctxt;
8526 xmlParserInputPtr pinput = NULL;
8527
8528 if (input == NULL)
8529 return(NULL);
8530
8531 ctxt = xmlNewParserCtxt();
8532 if (ctxt == NULL) {
8533 return(NULL);
8534 }
8535
8536 /*
8537 * Set-up the SAX context
8538 */
8539 if (sax != NULL) {
8540 if (ctxt->sax != NULL)
8541 xmlFree(ctxt->sax);
8542 ctxt->sax = sax;
8543 ctxt->userData = NULL;
8544 }
8545
8546 /*
8547 * generate a parser input from the I/O handler
8548 */
8549
8550 pinput = xmlNewIOInputStream(ctxt, input, enc);
8551 if (pinput == NULL) {
8552 if (sax != NULL) ctxt->sax = NULL;
8553 xmlFreeParserCtxt(ctxt);
8554 return(NULL);
8555 }
8556
8557 /*
8558 * plug some encoding conversion routines here.
8559 */
8560 xmlPushInput(ctxt, pinput);
8561
8562 pinput->filename = NULL;
8563 pinput->line = 1;
8564 pinput->col = 1;
8565 pinput->base = ctxt->input->cur;
8566 pinput->cur = ctxt->input->cur;
8567 pinput->free = NULL;
8568
8569 /*
8570 * let's parse that entity knowing it's an external subset.
8571 */
8572 ctxt->inSubset = 2;
8573 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8574 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8575 BAD_CAST "none", BAD_CAST "none");
8576 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8577
8578 if (ctxt->myDoc != NULL) {
8579 if (ctxt->wellFormed) {
8580 ret = ctxt->myDoc->extSubset;
8581 ctxt->myDoc->extSubset = NULL;
8582 } else {
8583 ret = NULL;
8584 }
8585 xmlFreeDoc(ctxt->myDoc);
8586 ctxt->myDoc = NULL;
8587 }
8588 if (sax != NULL) ctxt->sax = NULL;
8589 xmlFreeParserCtxt(ctxt);
8590
8591 return(ret);
8592}
8593
8594/**
8595 * xmlSAXParseDTD:
8596 * @sax: the SAX handler block
8597 * @ExternalID: a NAME* containing the External ID of the DTD
8598 * @SystemID: a NAME* containing the URL to the DTD
8599 *
8600 * Load and parse an external subset.
8601 *
8602 * Returns the resulting xmlDtdPtr or NULL in case of error.
8603 */
8604
8605xmlDtdPtr
8606xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8607 const xmlChar *SystemID) {
8608 xmlDtdPtr ret = NULL;
8609 xmlParserCtxtPtr ctxt;
8610 xmlParserInputPtr input = NULL;
8611 xmlCharEncoding enc;
8612
8613 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8614
8615 ctxt = xmlNewParserCtxt();
8616 if (ctxt == NULL) {
8617 return(NULL);
8618 }
8619
8620 /*
8621 * Set-up the SAX context
8622 */
8623 if (sax != NULL) {
8624 if (ctxt->sax != NULL)
8625 xmlFree(ctxt->sax);
8626 ctxt->sax = sax;
8627 ctxt->userData = NULL;
8628 }
8629
8630 /*
8631 * Ask the Entity resolver to load the damn thing
8632 */
8633
8634 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8635 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8636 if (input == NULL) {
8637 if (sax != NULL) ctxt->sax = NULL;
8638 xmlFreeParserCtxt(ctxt);
8639 return(NULL);
8640 }
8641
8642 /*
8643 * plug some encoding conversion routines here.
8644 */
8645 xmlPushInput(ctxt, input);
8646 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8647 xmlSwitchEncoding(ctxt, enc);
8648
8649 if (input->filename == NULL)
8650 input->filename = (char *) xmlStrdup(SystemID);
8651 input->line = 1;
8652 input->col = 1;
8653 input->base = ctxt->input->cur;
8654 input->cur = ctxt->input->cur;
8655 input->free = NULL;
8656
8657 /*
8658 * let's parse that entity knowing it's an external subset.
8659 */
8660 ctxt->inSubset = 2;
8661 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8662 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8663 ExternalID, SystemID);
8664 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8665
8666 if (ctxt->myDoc != NULL) {
8667 if (ctxt->wellFormed) {
8668 ret = ctxt->myDoc->extSubset;
8669 ctxt->myDoc->extSubset = NULL;
8670 } else {
8671 ret = NULL;
8672 }
8673 xmlFreeDoc(ctxt->myDoc);
8674 ctxt->myDoc = NULL;
8675 }
8676 if (sax != NULL) ctxt->sax = NULL;
8677 xmlFreeParserCtxt(ctxt);
8678
8679 return(ret);
8680}
8681
8682/**
8683 * xmlParseDTD:
8684 * @ExternalID: a NAME* containing the External ID of the DTD
8685 * @SystemID: a NAME* containing the URL to the DTD
8686 *
8687 * Load and parse an external subset.
8688 *
8689 * Returns the resulting xmlDtdPtr or NULL in case of error.
8690 */
8691
8692xmlDtdPtr
8693xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8694 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8695}
8696
8697/************************************************************************
8698 * *
8699 * Front ends when parsing an Entity *
8700 * *
8701 ************************************************************************/
8702
8703/**
8704 * xmlSAXParseBalancedChunk:
8705 * @ctx: an XML parser context (possibly NULL)
8706 * @sax: the SAX handler bloc (possibly NULL)
8707 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8708 * @input: a parser input stream
8709 * @enc: the encoding
8710 *
8711 * Parse a well-balanced chunk of an XML document
8712 * The user has to provide SAX callback block whose routines will be
8713 * called by the parser
8714 * The allowed sequence for the Well Balanced Chunk is the one defined by
8715 * the content production in the XML grammar:
8716 *
8717 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8718 *
8719 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
8720 * the error code otherwise
8721 */
8722
8723int
8724xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8725 void *user_data, xmlParserInputPtr input,
8726 xmlCharEncoding enc) {
8727 xmlParserCtxtPtr ctxt;
8728 int ret;
8729
8730 if (input == NULL) return(-1);
8731
8732 if (ctx != NULL)
8733 ctxt = ctx;
8734 else {
8735 ctxt = xmlNewParserCtxt();
8736 if (ctxt == NULL)
8737 return(-1);
8738 if (sax == NULL)
8739 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8740 }
8741
8742 /*
8743 * Set-up the SAX context
8744 */
8745 if (sax != NULL) {
8746 if (ctxt->sax != NULL)
8747 xmlFree(ctxt->sax);
8748 ctxt->sax = sax;
8749 ctxt->userData = user_data;
8750 }
8751
8752 /*
8753 * plug some encoding conversion routines here.
8754 */
8755 xmlPushInput(ctxt, input);
8756 if (enc != XML_CHAR_ENCODING_NONE)
8757 xmlSwitchEncoding(ctxt, enc);
8758
8759 /*
8760 * let's parse that entity knowing it's an external subset.
8761 */
8762 xmlParseContent(ctxt);
8763 ret = ctxt->errNo;
8764
8765 if (ctx == NULL) {
8766 if (sax != NULL)
8767 ctxt->sax = NULL;
8768 else
8769 xmlFreeDoc(ctxt->myDoc);
8770 xmlFreeParserCtxt(ctxt);
8771 }
8772 return(ret);
8773}
8774
8775/**
8776 * xmlParseCtxtExternalEntity:
8777 * @ctx: the existing parsing context
8778 * @URL: the URL for the entity to load
8779 * @ID: the System ID for the entity to load
8780 * @list: the return value for the set of parsed nodes
8781 *
8782 * Parse an external general entity within an existing parsing context
8783 * An external general parsed entity is well-formed if it matches the
8784 * production labeled extParsedEnt.
8785 *
8786 * [78] extParsedEnt ::= TextDecl? content
8787 *
8788 * Returns 0 if the entity is well formed, -1 in case of args problem and
8789 * the parser error code otherwise
8790 */
8791
8792int
8793xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8794 const xmlChar *ID, xmlNodePtr *list) {
8795 xmlParserCtxtPtr ctxt;
8796 xmlDocPtr newDoc;
8797 xmlSAXHandlerPtr oldsax = NULL;
8798 int ret = 0;
8799
8800 if (ctx->depth > 40) {
8801 return(XML_ERR_ENTITY_LOOP);
8802 }
8803
8804 if (list != NULL)
8805 *list = NULL;
8806 if ((URL == NULL) && (ID == NULL))
8807 return(-1);
8808 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8809 return(-1);
8810
8811
8812 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8813 if (ctxt == NULL) return(-1);
8814 ctxt->userData = ctxt;
8815 oldsax = ctxt->sax;
8816 ctxt->sax = ctx->sax;
8817 newDoc = xmlNewDoc(BAD_CAST "1.0");
8818 if (newDoc == NULL) {
8819 xmlFreeParserCtxt(ctxt);
8820 return(-1);
8821 }
8822 if (ctx->myDoc != NULL) {
8823 newDoc->intSubset = ctx->myDoc->intSubset;
8824 newDoc->extSubset = ctx->myDoc->extSubset;
8825 }
8826 if (ctx->myDoc->URL != NULL) {
8827 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8828 }
8829 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8830 if (newDoc->children == NULL) {
8831 ctxt->sax = oldsax;
8832 xmlFreeParserCtxt(ctxt);
8833 newDoc->intSubset = NULL;
8834 newDoc->extSubset = NULL;
8835 xmlFreeDoc(newDoc);
8836 return(-1);
8837 }
8838 nodePush(ctxt, newDoc->children);
8839 if (ctx->myDoc == NULL) {
8840 ctxt->myDoc = newDoc;
8841 } else {
8842 ctxt->myDoc = ctx->myDoc;
8843 newDoc->children->doc = ctx->myDoc;
8844 }
8845
8846 /*
8847 * Parse a possible text declaration first
8848 */
8849 GROW;
8850 if ((RAW == '<') && (NXT(1) == '?') &&
8851 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8852 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8853 xmlParseTextDecl(ctxt);
8854 }
8855
8856 /*
8857 * Doing validity checking on chunk doesn't make sense
8858 */
8859 ctxt->instate = XML_PARSER_CONTENT;
8860 ctxt->validate = ctx->validate;
8861 ctxt->loadsubset = ctx->loadsubset;
8862 ctxt->depth = ctx->depth + 1;
8863 ctxt->replaceEntities = ctx->replaceEntities;
8864 if (ctxt->validate) {
8865 ctxt->vctxt.error = ctx->vctxt.error;
8866 ctxt->vctxt.warning = ctx->vctxt.warning;
8867 /* Allocate the Node stack */
8868 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8869 if (ctxt->vctxt.nodeTab == NULL) {
8870 xmlGenericError(xmlGenericErrorContext,
8871 "xmlParseCtxtExternalEntity: out of memory\n");
8872 ctxt->validate = 0;
8873 ctxt->vctxt.error = NULL;
8874 ctxt->vctxt.warning = NULL;
8875 } else {
8876 ctxt->vctxt.nodeNr = 0;
8877 ctxt->vctxt.nodeMax = 4;
8878 ctxt->vctxt.node = NULL;
8879 }
8880 } else {
8881 ctxt->vctxt.error = NULL;
8882 ctxt->vctxt.warning = NULL;
8883 }
8884
8885 xmlParseContent(ctxt);
8886
8887 if ((RAW == '<') && (NXT(1) == '/')) {
8888 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8890 ctxt->sax->error(ctxt->userData,
8891 "chunk is not well balanced\n");
8892 ctxt->wellFormed = 0;
8893 ctxt->disableSAX = 1;
8894 } else if (RAW != 0) {
8895 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8896 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8897 ctxt->sax->error(ctxt->userData,
8898 "extra content at the end of well balanced chunk\n");
8899 ctxt->wellFormed = 0;
8900 ctxt->disableSAX = 1;
8901 }
8902 if (ctxt->node != newDoc->children) {
8903 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8904 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8905 ctxt->sax->error(ctxt->userData,
8906 "chunk is not well balanced\n");
8907 ctxt->wellFormed = 0;
8908 ctxt->disableSAX = 1;
8909 }
8910
8911 if (!ctxt->wellFormed) {
8912 if (ctxt->errNo == 0)
8913 ret = 1;
8914 else
8915 ret = ctxt->errNo;
8916 } else {
8917 if (list != NULL) {
8918 xmlNodePtr cur;
8919
8920 /*
8921 * Return the newly created nodeset after unlinking it from
8922 * they pseudo parent.
8923 */
8924 cur = newDoc->children->children;
8925 *list = cur;
8926 while (cur != NULL) {
8927 cur->parent = NULL;
8928 cur = cur->next;
8929 }
8930 newDoc->children->children = NULL;
8931 }
8932 ret = 0;
8933 }
8934 ctxt->sax = oldsax;
8935 xmlFreeParserCtxt(ctxt);
8936 newDoc->intSubset = NULL;
8937 newDoc->extSubset = NULL;
8938 xmlFreeDoc(newDoc);
8939
8940 return(ret);
8941}
8942
8943/**
8944 * xmlParseExternalEntity:
8945 * @doc: the document the chunk pertains to
8946 * @sax: the SAX handler bloc (possibly NULL)
8947 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8948 * @depth: Used for loop detection, use 0
8949 * @URL: the URL for the entity to load
8950 * @ID: the System ID for the entity to load
8951 * @list: the return value for the set of parsed nodes
8952 *
8953 * Parse an external general entity
8954 * An external general parsed entity is well-formed if it matches the
8955 * production labeled extParsedEnt.
8956 *
8957 * [78] extParsedEnt ::= TextDecl? content
8958 *
8959 * Returns 0 if the entity is well formed, -1 in case of args problem and
8960 * the parser error code otherwise
8961 */
8962
8963int
8964xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
8965 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
8966 xmlParserCtxtPtr ctxt;
8967 xmlDocPtr newDoc;
8968 xmlSAXHandlerPtr oldsax = NULL;
8969 int ret = 0;
8970
8971 if (depth > 40) {
8972 return(XML_ERR_ENTITY_LOOP);
8973 }
8974
8975
8976
8977 if (list != NULL)
8978 *list = NULL;
8979 if ((URL == NULL) && (ID == NULL))
8980 return(-1);
8981 if (doc == NULL) /* @@ relax but check for dereferences */
8982 return(-1);
8983
8984
8985 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8986 if (ctxt == NULL) return(-1);
8987 ctxt->userData = ctxt;
8988 if (sax != NULL) {
8989 oldsax = ctxt->sax;
8990 ctxt->sax = sax;
8991 if (user_data != NULL)
8992 ctxt->userData = user_data;
8993 }
8994 newDoc = xmlNewDoc(BAD_CAST "1.0");
8995 if (newDoc == NULL) {
8996 xmlFreeParserCtxt(ctxt);
8997 return(-1);
8998 }
8999 if (doc != NULL) {
9000 newDoc->intSubset = doc->intSubset;
9001 newDoc->extSubset = doc->extSubset;
9002 }
9003 if (doc->URL != NULL) {
9004 newDoc->URL = xmlStrdup(doc->URL);
9005 }
9006 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9007 if (newDoc->children == NULL) {
9008 if (sax != NULL)
9009 ctxt->sax = oldsax;
9010 xmlFreeParserCtxt(ctxt);
9011 newDoc->intSubset = NULL;
9012 newDoc->extSubset = NULL;
9013 xmlFreeDoc(newDoc);
9014 return(-1);
9015 }
9016 nodePush(ctxt, newDoc->children);
9017 if (doc == NULL) {
9018 ctxt->myDoc = newDoc;
9019 } else {
9020 ctxt->myDoc = doc;
9021 newDoc->children->doc = doc;
9022 }
9023
9024 /*
9025 * Parse a possible text declaration first
9026 */
9027 GROW;
9028 if ((RAW == '<') && (NXT(1) == '?') &&
9029 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9030 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9031 xmlParseTextDecl(ctxt);
9032 }
9033
9034 /*
9035 * Doing validity checking on chunk doesn't make sense
9036 */
9037 ctxt->instate = XML_PARSER_CONTENT;
9038 ctxt->validate = 0;
9039 ctxt->loadsubset = 0;
9040 ctxt->depth = depth;
9041
9042 xmlParseContent(ctxt);
9043
9044 if ((RAW == '<') && (NXT(1) == '/')) {
9045 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9047 ctxt->sax->error(ctxt->userData,
9048 "chunk is not well balanced\n");
9049 ctxt->wellFormed = 0;
9050 ctxt->disableSAX = 1;
9051 } else if (RAW != 0) {
9052 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9054 ctxt->sax->error(ctxt->userData,
9055 "extra content at the end of well balanced chunk\n");
9056 ctxt->wellFormed = 0;
9057 ctxt->disableSAX = 1;
9058 }
9059 if (ctxt->node != newDoc->children) {
9060 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9062 ctxt->sax->error(ctxt->userData,
9063 "chunk is not well balanced\n");
9064 ctxt->wellFormed = 0;
9065 ctxt->disableSAX = 1;
9066 }
9067
9068 if (!ctxt->wellFormed) {
9069 if (ctxt->errNo == 0)
9070 ret = 1;
9071 else
9072 ret = ctxt->errNo;
9073 } else {
9074 if (list != NULL) {
9075 xmlNodePtr cur;
9076
9077 /*
9078 * Return the newly created nodeset after unlinking it from
9079 * they pseudo parent.
9080 */
9081 cur = newDoc->children->children;
9082 *list = cur;
9083 while (cur != NULL) {
9084 cur->parent = NULL;
9085 cur = cur->next;
9086 }
9087 newDoc->children->children = NULL;
9088 }
9089 ret = 0;
9090 }
9091 if (sax != NULL)
9092 ctxt->sax = oldsax;
9093 xmlFreeParserCtxt(ctxt);
9094 newDoc->intSubset = NULL;
9095 newDoc->extSubset = NULL;
9096 xmlFreeDoc(newDoc);
9097
9098 return(ret);
9099}
9100
9101/**
9102 * xmlParseBalancedChunk:
9103 * @doc: the document the chunk pertains to
9104 * @sax: the SAX handler bloc (possibly NULL)
9105 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9106 * @depth: Used for loop detection, use 0
9107 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9108 * @list: the return value for the set of parsed nodes
9109 *
9110 * Parse a well-balanced chunk of an XML document
9111 * called by the parser
9112 * The allowed sequence for the Well Balanced Chunk is the one defined by
9113 * the content production in the XML grammar:
9114 *
9115 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9116 *
9117 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9118 * the parser error code otherwise
9119 */
9120
9121int
9122xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9123 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9124 xmlParserCtxtPtr ctxt;
9125 xmlDocPtr newDoc;
9126 xmlSAXHandlerPtr oldsax = NULL;
9127 int size;
9128 int ret = 0;
9129
9130 if (depth > 40) {
9131 return(XML_ERR_ENTITY_LOOP);
9132 }
9133
9134
9135 if (list != NULL)
9136 *list = NULL;
9137 if (string == NULL)
9138 return(-1);
9139
9140 size = xmlStrlen(string);
9141
9142 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9143 if (ctxt == NULL) return(-1);
9144 ctxt->userData = ctxt;
9145 if (sax != NULL) {
9146 oldsax = ctxt->sax;
9147 ctxt->sax = sax;
9148 if (user_data != NULL)
9149 ctxt->userData = user_data;
9150 }
9151 newDoc = xmlNewDoc(BAD_CAST "1.0");
9152 if (newDoc == NULL) {
9153 xmlFreeParserCtxt(ctxt);
9154 return(-1);
9155 }
9156 if (doc != NULL) {
9157 newDoc->intSubset = doc->intSubset;
9158 newDoc->extSubset = doc->extSubset;
9159 }
9160 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9161 if (newDoc->children == NULL) {
9162 if (sax != NULL)
9163 ctxt->sax = oldsax;
9164 xmlFreeParserCtxt(ctxt);
9165 newDoc->intSubset = NULL;
9166 newDoc->extSubset = NULL;
9167 xmlFreeDoc(newDoc);
9168 return(-1);
9169 }
9170 nodePush(ctxt, newDoc->children);
9171 if (doc == NULL) {
9172 ctxt->myDoc = newDoc;
9173 } else {
9174 ctxt->myDoc = doc;
9175 newDoc->children->doc = doc;
9176 }
9177 ctxt->instate = XML_PARSER_CONTENT;
9178 ctxt->depth = depth;
9179
9180 /*
9181 * Doing validity checking on chunk doesn't make sense
9182 */
9183 ctxt->validate = 0;
9184 ctxt->loadsubset = 0;
9185
9186 xmlParseContent(ctxt);
9187
9188 if ((RAW == '<') && (NXT(1) == '/')) {
9189 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9191 ctxt->sax->error(ctxt->userData,
9192 "chunk is not well balanced\n");
9193 ctxt->wellFormed = 0;
9194 ctxt->disableSAX = 1;
9195 } else if (RAW != 0) {
9196 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9198 ctxt->sax->error(ctxt->userData,
9199 "extra content at the end of well balanced chunk\n");
9200 ctxt->wellFormed = 0;
9201 ctxt->disableSAX = 1;
9202 }
9203 if (ctxt->node != newDoc->children) {
9204 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9206 ctxt->sax->error(ctxt->userData,
9207 "chunk is not well balanced\n");
9208 ctxt->wellFormed = 0;
9209 ctxt->disableSAX = 1;
9210 }
9211
9212 if (!ctxt->wellFormed) {
9213 if (ctxt->errNo == 0)
9214 ret = 1;
9215 else
9216 ret = ctxt->errNo;
9217 } else {
9218 if (list != NULL) {
9219 xmlNodePtr cur;
9220
9221 /*
9222 * Return the newly created nodeset after unlinking it from
9223 * they pseudo parent.
9224 */
9225 cur = newDoc->children->children;
9226 *list = cur;
9227 while (cur != NULL) {
9228 cur->parent = NULL;
9229 cur = cur->next;
9230 }
9231 newDoc->children->children = NULL;
9232 }
9233 ret = 0;
9234 }
9235 if (sax != NULL)
9236 ctxt->sax = oldsax;
9237 xmlFreeParserCtxt(ctxt);
9238 newDoc->intSubset = NULL;
9239 newDoc->extSubset = NULL;
9240 xmlFreeDoc(newDoc);
9241
9242 return(ret);
9243}
9244
9245/**
9246 * xmlSAXParseEntity:
9247 * @sax: the SAX handler block
9248 * @filename: the filename
9249 *
9250 * parse an XML external entity out of context and build a tree.
9251 * It use the given SAX function block to handle the parsing callback.
9252 * If sax is NULL, fallback to the default DOM tree building routines.
9253 *
9254 * [78] extParsedEnt ::= TextDecl? content
9255 *
9256 * This correspond to a "Well Balanced" chunk
9257 *
9258 * Returns the resulting document tree
9259 */
9260
9261xmlDocPtr
9262xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9263 xmlDocPtr ret;
9264 xmlParserCtxtPtr ctxt;
9265 char *directory = NULL;
9266
9267 ctxt = xmlCreateFileParserCtxt(filename);
9268 if (ctxt == NULL) {
9269 return(NULL);
9270 }
9271 if (sax != NULL) {
9272 if (ctxt->sax != NULL)
9273 xmlFree(ctxt->sax);
9274 ctxt->sax = sax;
9275 ctxt->userData = NULL;
9276 }
9277
9278 if ((ctxt->directory == NULL) && (directory == NULL))
9279 directory = xmlParserGetDirectory(filename);
9280
9281 xmlParseExtParsedEnt(ctxt);
9282
9283 if (ctxt->wellFormed)
9284 ret = ctxt->myDoc;
9285 else {
9286 ret = NULL;
9287 xmlFreeDoc(ctxt->myDoc);
9288 ctxt->myDoc = NULL;
9289 }
9290 if (sax != NULL)
9291 ctxt->sax = NULL;
9292 xmlFreeParserCtxt(ctxt);
9293
9294 return(ret);
9295}
9296
9297/**
9298 * xmlParseEntity:
9299 * @filename: the filename
9300 *
9301 * parse an XML external entity out of context and build a tree.
9302 *
9303 * [78] extParsedEnt ::= TextDecl? content
9304 *
9305 * This correspond to a "Well Balanced" chunk
9306 *
9307 * Returns the resulting document tree
9308 */
9309
9310xmlDocPtr
9311xmlParseEntity(const char *filename) {
9312 return(xmlSAXParseEntity(NULL, filename));
9313}
9314
9315/**
9316 * xmlCreateEntityParserCtxt:
9317 * @URL: the entity URL
9318 * @ID: the entity PUBLIC ID
9319 * @base: a posible base for the target URI
9320 *
9321 * Create a parser context for an external entity
9322 * Automatic support for ZLIB/Compress compressed document is provided
9323 * by default if found at compile-time.
9324 *
9325 * Returns the new parser context or NULL
9326 */
9327xmlParserCtxtPtr
9328xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9329 const xmlChar *base) {
9330 xmlParserCtxtPtr ctxt;
9331 xmlParserInputPtr inputStream;
9332 char *directory = NULL;
9333 xmlChar *uri;
9334
9335 ctxt = xmlNewParserCtxt();
9336 if (ctxt == NULL) {
9337 return(NULL);
9338 }
9339
9340 uri = xmlBuildURI(URL, base);
9341
9342 if (uri == NULL) {
9343 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9344 if (inputStream == NULL) {
9345 xmlFreeParserCtxt(ctxt);
9346 return(NULL);
9347 }
9348
9349 inputPush(ctxt, inputStream);
9350
9351 if ((ctxt->directory == NULL) && (directory == NULL))
9352 directory = xmlParserGetDirectory((char *)URL);
9353 if ((ctxt->directory == NULL) && (directory != NULL))
9354 ctxt->directory = directory;
9355 } else {
9356 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9357 if (inputStream == NULL) {
9358 xmlFree(uri);
9359 xmlFreeParserCtxt(ctxt);
9360 return(NULL);
9361 }
9362
9363 inputPush(ctxt, inputStream);
9364
9365 if ((ctxt->directory == NULL) && (directory == NULL))
9366 directory = xmlParserGetDirectory((char *)uri);
9367 if ((ctxt->directory == NULL) && (directory != NULL))
9368 ctxt->directory = directory;
9369 xmlFree(uri);
9370 }
9371
9372 return(ctxt);
9373}
9374
9375/************************************************************************
9376 * *
9377 * Front ends when parsing from a file *
9378 * *
9379 ************************************************************************/
9380
9381/**
9382 * xmlCreateFileParserCtxt:
9383 * @filename: the filename
9384 *
9385 * Create a parser context for a file content.
9386 * Automatic support for ZLIB/Compress compressed document is provided
9387 * by default if found at compile-time.
9388 *
9389 * Returns the new parser context or NULL
9390 */
9391xmlParserCtxtPtr
9392xmlCreateFileParserCtxt(const char *filename)
9393{
9394 xmlParserCtxtPtr ctxt;
9395 xmlParserInputPtr inputStream;
9396 xmlParserInputBufferPtr buf;
9397 char *directory = NULL;
9398
9399 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9400 if (buf == NULL) {
9401 return(NULL);
9402 }
9403
9404 ctxt = xmlNewParserCtxt();
9405 if (ctxt == NULL) {
9406 if (xmlDefaultSAXHandler.error != NULL) {
9407 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9408 }
9409 return(NULL);
9410 }
9411
9412 inputStream = xmlNewInputStream(ctxt);
9413 if (inputStream == NULL) {
9414 xmlFreeParserCtxt(ctxt);
9415 return(NULL);
9416 }
9417
9418 inputStream->filename = xmlMemStrdup(filename);
9419 inputStream->buf = buf;
9420 inputStream->base = inputStream->buf->buffer->content;
9421 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009422 inputStream->end =
9423 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009424
9425 inputPush(ctxt, inputStream);
9426 if ((ctxt->directory == NULL) && (directory == NULL))
9427 directory = xmlParserGetDirectory(filename);
9428 if ((ctxt->directory == NULL) && (directory != NULL))
9429 ctxt->directory = directory;
9430
9431 return(ctxt);
9432}
9433
9434/**
9435 * xmlSAXParseFile:
9436 * @sax: the SAX handler block
9437 * @filename: the filename
9438 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9439 * documents
9440 *
9441 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9442 * compressed document is provided by default if found at compile-time.
9443 * It use the given SAX function block to handle the parsing callback.
9444 * If sax is NULL, fallback to the default DOM tree building routines.
9445 *
9446 * Returns the resulting document tree
9447 */
9448
9449xmlDocPtr
9450xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9451 int recovery) {
9452 xmlDocPtr ret;
9453 xmlParserCtxtPtr ctxt;
9454 char *directory = NULL;
9455
9456 ctxt = xmlCreateFileParserCtxt(filename);
9457 if (ctxt == NULL) {
9458 return(NULL);
9459 }
9460 if (sax != NULL) {
9461 if (ctxt->sax != NULL)
9462 xmlFree(ctxt->sax);
9463 ctxt->sax = sax;
9464 ctxt->userData = NULL;
9465 }
9466
9467 if ((ctxt->directory == NULL) && (directory == NULL))
9468 directory = xmlParserGetDirectory(filename);
9469 if ((ctxt->directory == NULL) && (directory != NULL))
9470 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9471
9472 xmlParseDocument(ctxt);
9473
9474 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9475 else {
9476 ret = NULL;
9477 xmlFreeDoc(ctxt->myDoc);
9478 ctxt->myDoc = NULL;
9479 }
9480 if (sax != NULL)
9481 ctxt->sax = NULL;
9482 xmlFreeParserCtxt(ctxt);
9483
9484 return(ret);
9485}
9486
9487/**
9488 * xmlRecoverDoc:
9489 * @cur: a pointer to an array of xmlChar
9490 *
9491 * parse an XML in-memory document and build a tree.
9492 * In the case the document is not Well Formed, a tree is built anyway
9493 *
9494 * Returns the resulting document tree
9495 */
9496
9497xmlDocPtr
9498xmlRecoverDoc(xmlChar *cur) {
9499 return(xmlSAXParseDoc(NULL, cur, 1));
9500}
9501
9502/**
9503 * xmlParseFile:
9504 * @filename: the filename
9505 *
9506 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9507 * compressed document is provided by default if found at compile-time.
9508 *
9509 * Returns the resulting document tree
9510 */
9511
9512xmlDocPtr
9513xmlParseFile(const char *filename) {
9514 return(xmlSAXParseFile(NULL, filename, 0));
9515}
9516
9517/**
9518 * xmlRecoverFile:
9519 * @filename: the filename
9520 *
9521 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9522 * compressed document is provided by default if found at compile-time.
9523 * In the case the document is not Well Formed, a tree is built anyway
9524 *
9525 * Returns the resulting document tree
9526 */
9527
9528xmlDocPtr
9529xmlRecoverFile(const char *filename) {
9530 return(xmlSAXParseFile(NULL, filename, 1));
9531}
9532
9533
9534/**
9535 * xmlSetupParserForBuffer:
9536 * @ctxt: an XML parser context
9537 * @buffer: a xmlChar * buffer
9538 * @filename: a file name
9539 *
9540 * Setup the parser context to parse a new buffer; Clears any prior
9541 * contents from the parser context. The buffer parameter must not be
9542 * NULL, but the filename parameter can be
9543 */
9544void
9545xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9546 const char* filename)
9547{
9548 xmlParserInputPtr input;
9549
9550 input = xmlNewInputStream(ctxt);
9551 if (input == NULL) {
9552 perror("malloc");
9553 xmlFree(ctxt);
9554 return;
9555 }
9556
9557 xmlClearParserCtxt(ctxt);
9558 if (filename != NULL)
9559 input->filename = xmlMemStrdup(filename);
9560 input->base = buffer;
9561 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009562 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009563 inputPush(ctxt, input);
9564}
9565
9566/**
9567 * xmlSAXUserParseFile:
9568 * @sax: a SAX handler
9569 * @user_data: The user data returned on SAX callbacks
9570 * @filename: a file name
9571 *
9572 * parse an XML file and call the given SAX handler routines.
9573 * Automatic support for ZLIB/Compress compressed document is provided
9574 *
9575 * Returns 0 in case of success or a error number otherwise
9576 */
9577int
9578xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9579 const char *filename) {
9580 int ret = 0;
9581 xmlParserCtxtPtr ctxt;
9582
9583 ctxt = xmlCreateFileParserCtxt(filename);
9584 if (ctxt == NULL) return -1;
9585 if (ctxt->sax != &xmlDefaultSAXHandler)
9586 xmlFree(ctxt->sax);
9587 ctxt->sax = sax;
9588 if (user_data != NULL)
9589 ctxt->userData = user_data;
9590
9591 xmlParseDocument(ctxt);
9592
9593 if (ctxt->wellFormed)
9594 ret = 0;
9595 else {
9596 if (ctxt->errNo != 0)
9597 ret = ctxt->errNo;
9598 else
9599 ret = -1;
9600 }
9601 if (sax != NULL)
9602 ctxt->sax = NULL;
9603 xmlFreeParserCtxt(ctxt);
9604
9605 return ret;
9606}
9607
9608/************************************************************************
9609 * *
9610 * Front ends when parsing from memory *
9611 * *
9612 ************************************************************************/
9613
9614/**
9615 * xmlCreateMemoryParserCtxt:
9616 * @buffer: a pointer to a char array
9617 * @size: the size of the array
9618 *
9619 * Create a parser context for an XML in-memory document.
9620 *
9621 * Returns the new parser context or NULL
9622 */
9623xmlParserCtxtPtr
9624xmlCreateMemoryParserCtxt(char *buffer, int size) {
9625 xmlParserCtxtPtr ctxt;
9626 xmlParserInputPtr input;
9627 xmlParserInputBufferPtr buf;
9628
9629 if (buffer == NULL)
9630 return(NULL);
9631 if (size <= 0)
9632 return(NULL);
9633
9634 ctxt = xmlNewParserCtxt();
9635 if (ctxt == NULL)
9636 return(NULL);
9637
9638 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9639 if (buf == NULL) return(NULL);
9640
9641 input = xmlNewInputStream(ctxt);
9642 if (input == NULL) {
9643 xmlFreeParserCtxt(ctxt);
9644 return(NULL);
9645 }
9646
9647 input->filename = NULL;
9648 input->buf = buf;
9649 input->base = input->buf->buffer->content;
9650 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009651 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009652
9653 inputPush(ctxt, input);
9654 return(ctxt);
9655}
9656
9657/**
9658 * xmlSAXParseMemory:
9659 * @sax: the SAX handler block
9660 * @buffer: an pointer to a char array
9661 * @size: the size of the array
9662 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9663 * documents
9664 *
9665 * parse an XML in-memory block and use the given SAX function block
9666 * to handle the parsing callback. If sax is NULL, fallback to the default
9667 * DOM tree building routines.
9668 *
9669 * Returns the resulting document tree
9670 */
9671xmlDocPtr
9672xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9673 xmlDocPtr ret;
9674 xmlParserCtxtPtr ctxt;
9675
9676 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9677 if (ctxt == NULL) return(NULL);
9678 if (sax != NULL) {
9679 ctxt->sax = sax;
9680 ctxt->userData = NULL;
9681 }
9682
9683 xmlParseDocument(ctxt);
9684
9685 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9686 else {
9687 ret = NULL;
9688 xmlFreeDoc(ctxt->myDoc);
9689 ctxt->myDoc = NULL;
9690 }
9691 if (sax != NULL)
9692 ctxt->sax = NULL;
9693 xmlFreeParserCtxt(ctxt);
9694
9695 return(ret);
9696}
9697
9698/**
9699 * xmlParseMemory:
9700 * @buffer: an pointer to a char array
9701 * @size: the size of the array
9702 *
9703 * parse an XML in-memory block and build a tree.
9704 *
9705 * Returns the resulting document tree
9706 */
9707
9708xmlDocPtr xmlParseMemory(char *buffer, int size) {
9709 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9710}
9711
9712/**
9713 * xmlRecoverMemory:
9714 * @buffer: an pointer to a char array
9715 * @size: the size of the array
9716 *
9717 * parse an XML in-memory block and build a tree.
9718 * In the case the document is not Well Formed, a tree is built anyway
9719 *
9720 * Returns the resulting document tree
9721 */
9722
9723xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9724 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9725}
9726
9727/**
9728 * xmlSAXUserParseMemory:
9729 * @sax: a SAX handler
9730 * @user_data: The user data returned on SAX callbacks
9731 * @buffer: an in-memory XML document input
9732 * @size: the length of the XML document in bytes
9733 *
9734 * A better SAX parsing routine.
9735 * parse an XML in-memory buffer and call the given SAX handler routines.
9736 *
9737 * Returns 0 in case of success or a error number otherwise
9738 */
9739int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9740 char *buffer, int size) {
9741 int ret = 0;
9742 xmlParserCtxtPtr ctxt;
9743 xmlSAXHandlerPtr oldsax = NULL;
9744
9745 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9746 if (ctxt == NULL) return -1;
9747 if (sax != NULL) {
9748 oldsax = ctxt->sax;
9749 ctxt->sax = sax;
9750 }
9751 ctxt->userData = user_data;
9752
9753 xmlParseDocument(ctxt);
9754
9755 if (ctxt->wellFormed)
9756 ret = 0;
9757 else {
9758 if (ctxt->errNo != 0)
9759 ret = ctxt->errNo;
9760 else
9761 ret = -1;
9762 }
9763 if (sax != NULL) {
9764 ctxt->sax = oldsax;
9765 }
9766 xmlFreeParserCtxt(ctxt);
9767
9768 return ret;
9769}
9770
9771/**
9772 * xmlCreateDocParserCtxt:
9773 * @cur: a pointer to an array of xmlChar
9774 *
9775 * Creates a parser context for an XML in-memory document.
9776 *
9777 * Returns the new parser context or NULL
9778 */
9779xmlParserCtxtPtr
9780xmlCreateDocParserCtxt(xmlChar *cur) {
9781 int len;
9782
9783 if (cur == NULL)
9784 return(NULL);
9785 len = xmlStrlen(cur);
9786 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9787}
9788
9789/**
9790 * xmlSAXParseDoc:
9791 * @sax: the SAX handler block
9792 * @cur: a pointer to an array of xmlChar
9793 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9794 * documents
9795 *
9796 * parse an XML in-memory document and build a tree.
9797 * It use the given SAX function block to handle the parsing callback.
9798 * If sax is NULL, fallback to the default DOM tree building routines.
9799 *
9800 * Returns the resulting document tree
9801 */
9802
9803xmlDocPtr
9804xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9805 xmlDocPtr ret;
9806 xmlParserCtxtPtr ctxt;
9807
9808 if (cur == NULL) return(NULL);
9809
9810
9811 ctxt = xmlCreateDocParserCtxt(cur);
9812 if (ctxt == NULL) return(NULL);
9813 if (sax != NULL) {
9814 ctxt->sax = sax;
9815 ctxt->userData = NULL;
9816 }
9817
9818 xmlParseDocument(ctxt);
9819 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9820 else {
9821 ret = NULL;
9822 xmlFreeDoc(ctxt->myDoc);
9823 ctxt->myDoc = NULL;
9824 }
9825 if (sax != NULL)
9826 ctxt->sax = NULL;
9827 xmlFreeParserCtxt(ctxt);
9828
9829 return(ret);
9830}
9831
9832/**
9833 * xmlParseDoc:
9834 * @cur: a pointer to an array of xmlChar
9835 *
9836 * parse an XML in-memory document and build a tree.
9837 *
9838 * Returns the resulting document tree
9839 */
9840
9841xmlDocPtr
9842xmlParseDoc(xmlChar *cur) {
9843 return(xmlSAXParseDoc(NULL, cur, 0));
9844}
9845
9846
9847/************************************************************************
9848 * *
9849 * Miscellaneous *
9850 * *
9851 ************************************************************************/
9852
9853#ifdef LIBXML_XPATH_ENABLED
9854#include <libxml/xpath.h>
9855#endif
9856
9857static int xmlParserInitialized = 0;
9858
9859/**
9860 * xmlInitParser:
9861 *
9862 * Initialization function for the XML parser.
9863 * This is not reentrant. Call once before processing in case of
9864 * use in multithreaded programs.
9865 */
9866
9867void
9868xmlInitParser(void) {
9869 if (xmlParserInitialized) return;
9870
9871 xmlInitCharEncodingHandlers();
9872 xmlInitializePredefinedEntities();
9873 xmlDefaultSAXHandlerInit();
9874 xmlRegisterDefaultInputCallbacks();
9875 xmlRegisterDefaultOutputCallbacks();
9876#ifdef LIBXML_HTML_ENABLED
9877 htmlInitAutoClose();
9878 htmlDefaultSAXHandlerInit();
9879#endif
9880#ifdef LIBXML_XPATH_ENABLED
9881 xmlXPathInit();
9882#endif
9883 xmlParserInitialized = 1;
9884}
9885
9886/**
9887 * xmlCleanupParser:
9888 *
9889 * Cleanup function for the XML parser. It tries to reclaim all
9890 * parsing related global memory allocated for the parser processing.
9891 * It doesn't deallocate any document related memory. Calling this
9892 * function should not prevent reusing the parser.
9893 */
9894
9895void
9896xmlCleanupParser(void) {
9897 xmlParserInitialized = 0;
9898 xmlCleanupCharEncodingHandlers();
9899 xmlCleanupPredefinedEntities();
9900}
9901
9902/**
9903 * xmlPedanticParserDefault:
9904 * @val: int 0 or 1
9905 *
9906 * Set and return the previous value for enabling pedantic warnings.
9907 *
9908 * Returns the last value for 0 for no substitution, 1 for substitution.
9909 */
9910
9911int
9912xmlPedanticParserDefault(int val) {
9913 int old = xmlPedanticParserDefaultValue;
9914
9915 xmlPedanticParserDefaultValue = val;
9916 return(old);
9917}
9918
9919/**
9920 * xmlSubstituteEntitiesDefault:
9921 * @val: int 0 or 1
9922 *
9923 * Set and return the previous value for default entity support.
9924 * Initially the parser always keep entity references instead of substituting
9925 * entity values in the output. This function has to be used to change the
9926 * default parser behaviour
9927 * SAX::subtituteEntities() has to be used for changing that on a file by
9928 * file basis.
9929 *
9930 * Returns the last value for 0 for no substitution, 1 for substitution.
9931 */
9932
9933int
9934xmlSubstituteEntitiesDefault(int val) {
9935 int old = xmlSubstituteEntitiesDefaultValue;
9936
9937 xmlSubstituteEntitiesDefaultValue = val;
9938 return(old);
9939}
9940
9941/**
9942 * xmlKeepBlanksDefault:
9943 * @val: int 0 or 1
9944 *
9945 * Set and return the previous value for default blanks text nodes support.
9946 * The 1.x version of the parser used an heuristic to try to detect
9947 * ignorable white spaces. As a result the SAX callback was generating
9948 * ignorableWhitespace() callbacks instead of characters() one, and when
9949 * using the DOM output text nodes containing those blanks were not generated.
9950 * The 2.x and later version will switch to the XML standard way and
9951 * ignorableWhitespace() are only generated when running the parser in
9952 * validating mode and when the current element doesn't allow CDATA or
9953 * mixed content.
9954 * This function is provided as a way to force the standard behaviour
9955 * on 1.X libs and to switch back to the old mode for compatibility when
9956 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9957 * by using xmlIsBlankNode() commodity function to detect the "empty"
9958 * nodes generated.
9959 * This value also affect autogeneration of indentation when saving code
9960 * if blanks sections are kept, indentation is not generated.
9961 *
9962 * Returns the last value for 0 for no substitution, 1 for substitution.
9963 */
9964
9965int
9966xmlKeepBlanksDefault(int val) {
9967 int old = xmlKeepBlanksDefaultValue;
9968
9969 xmlKeepBlanksDefaultValue = val;
9970 xmlIndentTreeOutput = !val;
9971 return(old);
9972}
9973