blob: c967768e9ad119f8489c83b3673aa953728bda04 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
36#ifdef WIN32
37#include "win32config.h"
38#define XML_DIR_SEP '\\'
39#else
40#include "config.h"
41#define XML_DIR_SEP '/'
42#endif
43
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <libxml/xmlmemory.h>
48#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
57
58#ifdef HAVE_CTYPE_H
59#include <ctype.h>
60#endif
61#ifdef HAVE_STDLIB_H
62#include <stdlib.h>
63#endif
64#ifdef HAVE_SYS_STAT_H
65#include <sys/stat.h>
66#endif
67#ifdef HAVE_FCNTL_H
68#include <fcntl.h>
69#endif
70#ifdef HAVE_UNISTD_H
71#include <unistd.h>
72#endif
73#ifdef HAVE_ZLIB_H
74#include <zlib.h>
75#endif
76
77
Daniel Veillard21a0f912001-02-25 19:54:14 +000078#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000079#define XML_PARSER_BUFFER_SIZE 100
80
81/*
82 * Various global defaults for parsing
83 */
84int xmlGetWarningsDefaultValue = 1;
85int xmlParserDebugEntities = 0;
86#ifdef VMS
87int xmlSubstituteEntitiesDefaultVal = 0;
88#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
89int xmlDoValidityCheckingDefaultVal = 0;
90#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
91#else
92int xmlSubstituteEntitiesDefaultValue = 0;
93int xmlDoValidityCheckingDefaultValue = 0;
94#endif
95int xmlLoadExtDtdDefaultValue = 0;
96int xmlPedanticParserDefaultValue = 0;
97int xmlKeepBlanksDefaultValue = 1;
98
99/*
100 * List of XML prefixed PI allowed by W3C specs
101 */
102
103const char *xmlW3CPIs[] = {
104 "xml-stylesheet",
105 NULL
106};
107
108/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
109void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
113
114/************************************************************************
115 * *
116 * Parser stacks related functions and macros *
117 * *
118 ************************************************************************/
119
120xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
121 const xmlChar ** str);
122
123/*
124 * Generic function for accessing stacks in the Parser Context
125 */
126
127#define PUSH_AND_POP(scope, type, name) \
128scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
129 if (ctxt->name##Nr >= ctxt->name##Max) { \
130 ctxt->name##Max *= 2; \
131 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
132 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
133 if (ctxt->name##Tab == NULL) { \
134 xmlGenericError(xmlGenericErrorContext, \
135 "realloc failed !\n"); \
136 return(0); \
137 } \
138 } \
139 ctxt->name##Tab[ctxt->name##Nr] = value; \
140 ctxt->name = value; \
141 return(ctxt->name##Nr++); \
142} \
143scope type name##Pop(xmlParserCtxtPtr ctxt) { \
144 type ret; \
145 if (ctxt->name##Nr <= 0) return(0); \
146 ctxt->name##Nr--; \
147 if (ctxt->name##Nr > 0) \
148 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
149 else \
150 ctxt->name = NULL; \
151 ret = ctxt->name##Tab[ctxt->name##Nr]; \
152 ctxt->name##Tab[ctxt->name##Nr] = 0; \
153 return(ret); \
154} \
155
156/*
157 * Those macros actually generate the functions
158 */
159PUSH_AND_POP(extern, xmlParserInputPtr, input)
160PUSH_AND_POP(extern, xmlNodePtr, node)
161PUSH_AND_POP(extern, xmlChar*, name)
162
163int spacePush(xmlParserCtxtPtr ctxt, int val) {
164 if (ctxt->spaceNr >= ctxt->spaceMax) {
165 ctxt->spaceMax *= 2;
166 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
167 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
168 if (ctxt->spaceTab == NULL) {
169 xmlGenericError(xmlGenericErrorContext,
170 "realloc failed !\n");
171 return(0);
172 }
173 }
174 ctxt->spaceTab[ctxt->spaceNr] = val;
175 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
176 return(ctxt->spaceNr++);
177}
178
179int spacePop(xmlParserCtxtPtr ctxt) {
180 int ret;
181 if (ctxt->spaceNr <= 0) return(0);
182 ctxt->spaceNr--;
183 if (ctxt->spaceNr > 0)
184 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
185 else
186 ctxt->space = NULL;
187 ret = ctxt->spaceTab[ctxt->spaceNr];
188 ctxt->spaceTab[ctxt->spaceNr] = -1;
189 return(ret);
190}
191
192/*
193 * Macros for accessing the content. Those should be used only by the parser,
194 * and not exported.
195 *
196 * Dirty macros, i.e. one often need to make assumption on the context to
197 * use them
198 *
199 * CUR_PTR return the current pointer to the xmlChar to be parsed.
200 * To be used with extreme caution since operations consuming
201 * characters may move the input buffer to a different location !
202 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
203 * This should be used internally by the parser
204 * only to compare to ASCII values otherwise it would break when
205 * running with UTF-8 encoding.
206 * RAW same as CUR but in the input buffer, bypass any token
207 * extraction that may have been done
208 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
209 * to compare on ASCII based substring.
210 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
211 * strings within the parser.
212 *
213 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
214 *
215 * NEXT Skip to the next character, this does the proper decoding
216 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
217 * NEXTL(l) Skip l xmlChars in the input buffer
218 * CUR_CHAR(l) returns the current unicode character (int), set l
219 * to the number of xmlChars used for the encoding [0-5].
220 * CUR_SCHAR same but operate on a string instead of the context
221 * COPY_BUF copy the current unicode char to the target buffer, increment
222 * the index
223 * GROW, SHRINK handling of input buffers
224 */
225
226#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
227#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
228#define NXT(val) ctxt->input->cur[(val)]
229#define CUR_PTR ctxt->input->cur
230
231#define SKIP(val) do { \
232 ctxt->nbChars += (val),ctxt->input->cur += (val); \
233 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000234 if ((*ctxt->input->cur == 0) && \
235 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
236 xmlPopInput(ctxt); \
237 } while (0)
238
Daniel Veillard48b2f892001-02-25 16:11:03 +0000239#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000240 xmlParserInputShrink(ctxt->input); \
241 if ((*ctxt->input->cur == 0) && \
242 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
243 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000244 }
Owen Taylor3473f882001-02-23 17:55:21 +0000245
Daniel Veillard48b2f892001-02-25 16:11:03 +0000246#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000247 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
248 if ((*ctxt->input->cur == 0) && \
249 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
250 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000251 }
Owen Taylor3473f882001-02-23 17:55:21 +0000252
253#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
254
255#define NEXT xmlNextChar(ctxt)
256
Daniel Veillard21a0f912001-02-25 19:54:14 +0000257#define NEXT1 { \
258 ctxt->input->cur++; \
259 ctxt->nbChars++; \
260 if (*ctxt->input->cur == 0) \
261 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
262 }
263
Owen Taylor3473f882001-02-23 17:55:21 +0000264#define NEXTL(l) do { \
265 if (*(ctxt->input->cur) == '\n') { \
266 ctxt->input->line++; ctxt->input->col = 1; \
267 } else ctxt->input->col++; \
268 ctxt->token = 0; ctxt->input->cur += l; \
269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000270 } while (0)
271
272#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
273#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
274
275#define COPY_BUF(l,b,i,v) \
276 if (l == 1) b[i++] = (xmlChar) v; \
277 else i += xmlCopyChar(l,&b[i],v)
278
279/**
280 * xmlSkipBlankChars:
281 * @ctxt: the XML parser context
282 *
283 * skip all blanks character found at that point in the input streams.
284 * It pops up finished entities in the process if allowable at that point.
285 *
286 * Returns the number of space chars skipped
287 */
288
289int
290xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
291 int cur, res = 0;
292
293 /*
294 * It's Okay to use CUR/NEXT here since all the blanks are on
295 * the ASCII range.
296 */
297 do {
298 cur = CUR;
299 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
300 NEXT;
301 cur = CUR;
302 res++;
303 }
304 while ((cur == 0) && (ctxt->inputNr > 1) &&
305 (ctxt->instate != XML_PARSER_COMMENT)) {
306 xmlPopInput(ctxt);
307 cur = CUR;
308 }
309 /*
310 * Need to handle support of entities branching here
311 */
312 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
313 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
314 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
315 return(res);
316}
317
318/************************************************************************
319 * *
320 * Commodity functions to handle entities *
321 * *
322 ************************************************************************/
323
324/**
325 * xmlPopInput:
326 * @ctxt: an XML parser context
327 *
328 * xmlPopInput: the current input pointed by ctxt->input came to an end
329 * pop it and return the next char.
330 *
331 * Returns the current xmlChar in the parser context
332 */
333xmlChar
334xmlPopInput(xmlParserCtxtPtr ctxt) {
335 if (ctxt->inputNr == 1) return(0); /* End of main Input */
336 if (xmlParserDebugEntities)
337 xmlGenericError(xmlGenericErrorContext,
338 "Popping input %d\n", ctxt->inputNr);
339 xmlFreeInputStream(inputPop(ctxt));
340 if ((*ctxt->input->cur == 0) &&
341 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
342 return(xmlPopInput(ctxt));
343 return(CUR);
344}
345
346/**
347 * xmlPushInput:
348 * @ctxt: an XML parser context
349 * @input: an XML parser input fragment (entity, XML fragment ...).
350 *
351 * xmlPushInput: switch to a new input stream which is stacked on top
352 * of the previous one(s).
353 */
354void
355xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
356 if (input == NULL) return;
357
358 if (xmlParserDebugEntities) {
359 if ((ctxt->input != NULL) && (ctxt->input->filename))
360 xmlGenericError(xmlGenericErrorContext,
361 "%s(%d): ", ctxt->input->filename,
362 ctxt->input->line);
363 xmlGenericError(xmlGenericErrorContext,
364 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
365 }
366 inputPush(ctxt, input);
367 GROW;
368}
369
370/**
371 * xmlParseCharRef:
372 * @ctxt: an XML parser context
373 *
374 * parse Reference declarations
375 *
376 * [66] CharRef ::= '&#' [0-9]+ ';' |
377 * '&#x' [0-9a-fA-F]+ ';'
378 *
379 * [ WFC: Legal Character ]
380 * Characters referred to using character references must match the
381 * production for Char.
382 *
383 * Returns the value parsed (as an int), 0 in case of error
384 */
385int
386xmlParseCharRef(xmlParserCtxtPtr ctxt) {
387 int val = 0;
388 int count = 0;
389
390 if (ctxt->token != 0) {
391 val = ctxt->token;
392 ctxt->token = 0;
393 return(val);
394 }
395 /*
396 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
397 */
398 if ((RAW == '&') && (NXT(1) == '#') &&
399 (NXT(2) == 'x')) {
400 SKIP(3);
401 GROW;
402 while (RAW != ';') { /* loop blocked by count */
403 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
404 val = val * 16 + (CUR - '0');
405 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
406 val = val * 16 + (CUR - 'a') + 10;
407 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
408 val = val * 16 + (CUR - 'A') + 10;
409 else {
410 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
412 ctxt->sax->error(ctxt->userData,
413 "xmlParseCharRef: invalid hexadecimal value\n");
414 ctxt->wellFormed = 0;
415 ctxt->disableSAX = 1;
416 val = 0;
417 break;
418 }
419 NEXT;
420 count++;
421 }
422 if (RAW == ';') {
423 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
424 ctxt->nbChars ++;
425 ctxt->input->cur++;
426 }
427 } else if ((RAW == '&') && (NXT(1) == '#')) {
428 SKIP(2);
429 GROW;
430 while (RAW != ';') { /* loop blocked by count */
431 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
432 val = val * 10 + (CUR - '0');
433 else {
434 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
436 ctxt->sax->error(ctxt->userData,
437 "xmlParseCharRef: invalid decimal value\n");
438 ctxt->wellFormed = 0;
439 ctxt->disableSAX = 1;
440 val = 0;
441 break;
442 }
443 NEXT;
444 count++;
445 }
446 if (RAW == ';') {
447 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
448 ctxt->nbChars ++;
449 ctxt->input->cur++;
450 }
451 } else {
452 ctxt->errNo = XML_ERR_INVALID_CHARREF;
453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
454 ctxt->sax->error(ctxt->userData,
455 "xmlParseCharRef: invalid value\n");
456 ctxt->wellFormed = 0;
457 ctxt->disableSAX = 1;
458 }
459
460 /*
461 * [ WFC: Legal Character ]
462 * Characters referred to using character references must match the
463 * production for Char.
464 */
465 if (IS_CHAR(val)) {
466 return(val);
467 } else {
468 ctxt->errNo = XML_ERR_INVALID_CHAR;
469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
470 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
471 val);
472 ctxt->wellFormed = 0;
473 ctxt->disableSAX = 1;
474 }
475 return(0);
476}
477
478/**
479 * xmlParseStringCharRef:
480 * @ctxt: an XML parser context
481 * @str: a pointer to an index in the string
482 *
483 * parse Reference declarations, variant parsing from a string rather
484 * than an an input flow.
485 *
486 * [66] CharRef ::= '&#' [0-9]+ ';' |
487 * '&#x' [0-9a-fA-F]+ ';'
488 *
489 * [ WFC: Legal Character ]
490 * Characters referred to using character references must match the
491 * production for Char.
492 *
493 * Returns the value parsed (as an int), 0 in case of error, str will be
494 * updated to the current value of the index
495 */
496int
497xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
498 const xmlChar *ptr;
499 xmlChar cur;
500 int val = 0;
501
502 if ((str == NULL) || (*str == NULL)) return(0);
503 ptr = *str;
504 cur = *ptr;
505 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
506 ptr += 3;
507 cur = *ptr;
508 while (cur != ';') { /* Non input consuming loop */
509 if ((cur >= '0') && (cur <= '9'))
510 val = val * 16 + (cur - '0');
511 else if ((cur >= 'a') && (cur <= 'f'))
512 val = val * 16 + (cur - 'a') + 10;
513 else if ((cur >= 'A') && (cur <= 'F'))
514 val = val * 16 + (cur - 'A') + 10;
515 else {
516 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
518 ctxt->sax->error(ctxt->userData,
519 "xmlParseStringCharRef: invalid hexadecimal value\n");
520 ctxt->wellFormed = 0;
521 ctxt->disableSAX = 1;
522 val = 0;
523 break;
524 }
525 ptr++;
526 cur = *ptr;
527 }
528 if (cur == ';')
529 ptr++;
530 } else if ((cur == '&') && (ptr[1] == '#')){
531 ptr += 2;
532 cur = *ptr;
533 while (cur != ';') { /* Non input consuming loops */
534 if ((cur >= '0') && (cur <= '9'))
535 val = val * 10 + (cur - '0');
536 else {
537 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
539 ctxt->sax->error(ctxt->userData,
540 "xmlParseStringCharRef: invalid decimal value\n");
541 ctxt->wellFormed = 0;
542 ctxt->disableSAX = 1;
543 val = 0;
544 break;
545 }
546 ptr++;
547 cur = *ptr;
548 }
549 if (cur == ';')
550 ptr++;
551 } else {
552 ctxt->errNo = XML_ERR_INVALID_CHARREF;
553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
554 ctxt->sax->error(ctxt->userData,
555 "xmlParseCharRef: invalid value\n");
556 ctxt->wellFormed = 0;
557 ctxt->disableSAX = 1;
558 return(0);
559 }
560 *str = ptr;
561
562 /*
563 * [ WFC: Legal Character ]
564 * Characters referred to using character references must match the
565 * production for Char.
566 */
567 if (IS_CHAR(val)) {
568 return(val);
569 } else {
570 ctxt->errNo = XML_ERR_INVALID_CHAR;
571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
572 ctxt->sax->error(ctxt->userData,
573 "CharRef: invalid xmlChar value %d\n", val);
574 ctxt->wellFormed = 0;
575 ctxt->disableSAX = 1;
576 }
577 return(0);
578}
579
580/**
581 * xmlParserHandlePEReference:
582 * @ctxt: the parser context
583 *
584 * [69] PEReference ::= '%' Name ';'
585 *
586 * [ WFC: No Recursion ]
587 * A parsed entity must not contain a recursive
588 * reference to itself, either directly or indirectly.
589 *
590 * [ WFC: Entity Declared ]
591 * In a document without any DTD, a document with only an internal DTD
592 * subset which contains no parameter entity references, or a document
593 * with "standalone='yes'", ... ... The declaration of a parameter
594 * entity must precede any reference to it...
595 *
596 * [ VC: Entity Declared ]
597 * In a document with an external subset or external parameter entities
598 * with "standalone='no'", ... ... The declaration of a parameter entity
599 * must precede any reference to it...
600 *
601 * [ WFC: In DTD ]
602 * Parameter-entity references may only appear in the DTD.
603 * NOTE: misleading but this is handled.
604 *
605 * A PEReference may have been detected in the current input stream
606 * the handling is done accordingly to
607 * http://www.w3.org/TR/REC-xml#entproc
608 * i.e.
609 * - Included in literal in entity values
610 * - Included as Paraemeter Entity reference within DTDs
611 */
612void
613xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
614 xmlChar *name;
615 xmlEntityPtr entity = NULL;
616 xmlParserInputPtr input;
617
618 if (ctxt->token != 0) {
619 return;
620 }
621 if (RAW != '%') return;
622 switch(ctxt->instate) {
623 case XML_PARSER_CDATA_SECTION:
624 return;
625 case XML_PARSER_COMMENT:
626 return;
627 case XML_PARSER_START_TAG:
628 return;
629 case XML_PARSER_END_TAG:
630 return;
631 case XML_PARSER_EOF:
632 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
634 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
635 ctxt->wellFormed = 0;
636 ctxt->disableSAX = 1;
637 return;
638 case XML_PARSER_PROLOG:
639 case XML_PARSER_START:
640 case XML_PARSER_MISC:
641 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
643 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
644 ctxt->wellFormed = 0;
645 ctxt->disableSAX = 1;
646 return;
647 case XML_PARSER_ENTITY_DECL:
648 case XML_PARSER_CONTENT:
649 case XML_PARSER_ATTRIBUTE_VALUE:
650 case XML_PARSER_PI:
651 case XML_PARSER_SYSTEM_LITERAL:
652 /* we just ignore it there */
653 return;
654 case XML_PARSER_EPILOG:
655 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
657 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
658 ctxt->wellFormed = 0;
659 ctxt->disableSAX = 1;
660 return;
661 case XML_PARSER_ENTITY_VALUE:
662 /*
663 * NOTE: in the case of entity values, we don't do the
664 * substitution here since we need the literal
665 * entity value to be able to save the internal
666 * subset of the document.
667 * This will be handled by xmlStringDecodeEntities
668 */
669 return;
670 case XML_PARSER_DTD:
671 /*
672 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
673 * In the internal DTD subset, parameter-entity references
674 * can occur only where markup declarations can occur, not
675 * within markup declarations.
676 * In that case this is handled in xmlParseMarkupDecl
677 */
678 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
679 return;
680 break;
681 case XML_PARSER_IGNORE:
682 return;
683 }
684
685 NEXT;
686 name = xmlParseName(ctxt);
687 if (xmlParserDebugEntities)
688 xmlGenericError(xmlGenericErrorContext,
689 "PE Reference: %s\n", name);
690 if (name == NULL) {
691 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
693 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
694 ctxt->wellFormed = 0;
695 ctxt->disableSAX = 1;
696 } else {
697 if (RAW == ';') {
698 NEXT;
699 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
700 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
701 if (entity == NULL) {
702
703 /*
704 * [ WFC: Entity Declared ]
705 * In a document without any DTD, a document with only an
706 * internal DTD subset which contains no parameter entity
707 * references, or a document with "standalone='yes'", ...
708 * ... The declaration of a parameter entity must precede
709 * any reference to it...
710 */
711 if ((ctxt->standalone == 1) ||
712 ((ctxt->hasExternalSubset == 0) &&
713 (ctxt->hasPErefs == 0))) {
714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
715 ctxt->sax->error(ctxt->userData,
716 "PEReference: %%%s; not found\n", name);
717 ctxt->wellFormed = 0;
718 ctxt->disableSAX = 1;
719 } else {
720 /*
721 * [ VC: Entity Declared ]
722 * In a document with an external subset or external
723 * parameter entities with "standalone='no'", ...
724 * ... The declaration of a parameter entity must precede
725 * any reference to it...
726 */
727 if ((!ctxt->disableSAX) &&
728 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
729 ctxt->vctxt.error(ctxt->vctxt.userData,
730 "PEReference: %%%s; not found\n", name);
731 } else if ((!ctxt->disableSAX) &&
732 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
733 ctxt->sax->warning(ctxt->userData,
734 "PEReference: %%%s; not found\n", name);
735 ctxt->valid = 0;
736 }
737 } else {
738 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
739 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
740 /*
741 * handle the extra spaces added before and after
742 * c.f. http://www.w3.org/TR/REC-xml#as-PE
743 * this is done independantly.
744 */
745 input = xmlNewEntityInputStream(ctxt, entity);
746 xmlPushInput(ctxt, input);
747 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
748 (RAW == '<') && (NXT(1) == '?') &&
749 (NXT(2) == 'x') && (NXT(3) == 'm') &&
750 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
751 xmlParseTextDecl(ctxt);
752 }
753 if (ctxt->token == 0)
754 ctxt->token = ' ';
755 } else {
756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
757 ctxt->sax->error(ctxt->userData,
758 "xmlHandlePEReference: %s is not a parameter entity\n",
759 name);
760 ctxt->wellFormed = 0;
761 ctxt->disableSAX = 1;
762 }
763 }
764 } else {
765 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
767 ctxt->sax->error(ctxt->userData,
768 "xmlHandlePEReference: expecting ';'\n");
769 ctxt->wellFormed = 0;
770 ctxt->disableSAX = 1;
771 }
772 xmlFree(name);
773 }
774}
775
776/*
777 * Macro used to grow the current buffer.
778 */
779#define growBuffer(buffer) { \
780 buffer##_size *= 2; \
781 buffer = (xmlChar *) \
782 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
783 if (buffer == NULL) { \
784 perror("realloc failed"); \
785 return(NULL); \
786 } \
787}
788
789/**
790 * xmlStringDecodeEntities:
791 * @ctxt: the parser context
792 * @str: the input string
793 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
794 * @end: an end marker xmlChar, 0 if none
795 * @end2: an end marker xmlChar, 0 if none
796 * @end3: an end marker xmlChar, 0 if none
797 *
798 * Takes a entity string content and process to do the adequate subtitutions.
799 *
800 * [67] Reference ::= EntityRef | CharRef
801 *
802 * [69] PEReference ::= '%' Name ';'
803 *
804 * Returns A newly allocated string with the substitution done. The caller
805 * must deallocate it !
806 */
807xmlChar *
808xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
809 xmlChar end, xmlChar end2, xmlChar end3) {
810 xmlChar *buffer = NULL;
811 int buffer_size = 0;
812
813 xmlChar *current = NULL;
814 xmlEntityPtr ent;
815 int c,l;
816 int nbchars = 0;
817
818 if (str == NULL)
819 return(NULL);
820
821 if (ctxt->depth > 40) {
822 ctxt->errNo = XML_ERR_ENTITY_LOOP;
823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
824 ctxt->sax->error(ctxt->userData,
825 "Detected entity reference loop\n");
826 ctxt->wellFormed = 0;
827 ctxt->disableSAX = 1;
828 return(NULL);
829 }
830
831 /*
832 * allocate a translation buffer.
833 */
834 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
835 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
836 if (buffer == NULL) {
837 perror("xmlDecodeEntities: malloc failed");
838 return(NULL);
839 }
840
841 /*
842 * Ok loop until we reach one of the ending char or a size limit.
843 * we are operating on already parsed values.
844 */
845 c = CUR_SCHAR(str, l);
846 while ((c != 0) && (c != end) && /* non input consuming loop */
847 (c != end2) && (c != end3)) {
848
849 if (c == 0) break;
850 if ((c == '&') && (str[1] == '#')) {
851 int val = xmlParseStringCharRef(ctxt, &str);
852 if (val != 0) {
853 COPY_BUF(0,buffer,nbchars,val);
854 }
855 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
856 if (xmlParserDebugEntities)
857 xmlGenericError(xmlGenericErrorContext,
858 "String decoding Entity Reference: %.30s\n",
859 str);
860 ent = xmlParseStringEntityRef(ctxt, &str);
861 if ((ent != NULL) &&
862 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
863 if (ent->content != NULL) {
864 COPY_BUF(0,buffer,nbchars,ent->content[0]);
865 } else {
866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
867 ctxt->sax->error(ctxt->userData,
868 "internal error entity has no content\n");
869 }
870 } else if ((ent != NULL) && (ent->content != NULL)) {
871 xmlChar *rep;
872
873 ctxt->depth++;
874 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
875 0, 0, 0);
876 ctxt->depth--;
877 if (rep != NULL) {
878 current = rep;
879 while (*current != 0) { /* non input consuming loop */
880 buffer[nbchars++] = *current++;
881 if (nbchars >
882 buffer_size - XML_PARSER_BUFFER_SIZE) {
883 growBuffer(buffer);
884 }
885 }
886 xmlFree(rep);
887 }
888 } else if (ent != NULL) {
889 int i = xmlStrlen(ent->name);
890 const xmlChar *cur = ent->name;
891
892 buffer[nbchars++] = '&';
893 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
894 growBuffer(buffer);
895 }
896 for (;i > 0;i--)
897 buffer[nbchars++] = *cur++;
898 buffer[nbchars++] = ';';
899 }
900 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
901 if (xmlParserDebugEntities)
902 xmlGenericError(xmlGenericErrorContext,
903 "String decoding PE Reference: %.30s\n", str);
904 ent = xmlParseStringPEReference(ctxt, &str);
905 if (ent != NULL) {
906 xmlChar *rep;
907
908 ctxt->depth++;
909 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
910 0, 0, 0);
911 ctxt->depth--;
912 if (rep != NULL) {
913 current = rep;
914 while (*current != 0) { /* non input consuming loop */
915 buffer[nbchars++] = *current++;
916 if (nbchars >
917 buffer_size - XML_PARSER_BUFFER_SIZE) {
918 growBuffer(buffer);
919 }
920 }
921 xmlFree(rep);
922 }
923 }
924 } else {
925 COPY_BUF(l,buffer,nbchars,c);
926 str += l;
927 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
928 growBuffer(buffer);
929 }
930 }
931 c = CUR_SCHAR(str, l);
932 }
933 buffer[nbchars++] = 0;
934 return(buffer);
935}
936
937
938/************************************************************************
939 * *
940 * Commodity functions to handle xmlChars *
941 * *
942 ************************************************************************/
943
944/**
945 * xmlStrndup:
946 * @cur: the input xmlChar *
947 * @len: the len of @cur
948 *
949 * a strndup for array of xmlChar's
950 *
951 * Returns a new xmlChar * or NULL
952 */
953xmlChar *
954xmlStrndup(const xmlChar *cur, int len) {
955 xmlChar *ret;
956
957 if ((cur == NULL) || (len < 0)) return(NULL);
958 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
959 if (ret == NULL) {
960 xmlGenericError(xmlGenericErrorContext,
961 "malloc of %ld byte failed\n",
962 (len + 1) * (long)sizeof(xmlChar));
963 return(NULL);
964 }
965 memcpy(ret, cur, len * sizeof(xmlChar));
966 ret[len] = 0;
967 return(ret);
968}
969
970/**
971 * xmlStrdup:
972 * @cur: the input xmlChar *
973 *
974 * a strdup for array of xmlChar's. Since they are supposed to be
975 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
976 * a termination mark of '0'.
977 *
978 * Returns a new xmlChar * or NULL
979 */
980xmlChar *
981xmlStrdup(const xmlChar *cur) {
982 const xmlChar *p = cur;
983
984 if (cur == NULL) return(NULL);
985 while (*p != 0) p++; /* non input consuming */
986 return(xmlStrndup(cur, p - cur));
987}
988
989/**
990 * xmlCharStrndup:
991 * @cur: the input char *
992 * @len: the len of @cur
993 *
994 * a strndup for char's to xmlChar's
995 *
996 * Returns a new xmlChar * or NULL
997 */
998
999xmlChar *
1000xmlCharStrndup(const char *cur, int len) {
1001 int i;
1002 xmlChar *ret;
1003
1004 if ((cur == NULL) || (len < 0)) return(NULL);
1005 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1006 if (ret == NULL) {
1007 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1008 (len + 1) * (long)sizeof(xmlChar));
1009 return(NULL);
1010 }
1011 for (i = 0;i < len;i++)
1012 ret[i] = (xmlChar) cur[i];
1013 ret[len] = 0;
1014 return(ret);
1015}
1016
1017/**
1018 * xmlCharStrdup:
1019 * @cur: the input char *
1020 * @len: the len of @cur
1021 *
1022 * a strdup for char's to xmlChar's
1023 *
1024 * Returns a new xmlChar * or NULL
1025 */
1026
1027xmlChar *
1028xmlCharStrdup(const char *cur) {
1029 const char *p = cur;
1030
1031 if (cur == NULL) return(NULL);
1032 while (*p != '\0') p++; /* non input consuming */
1033 return(xmlCharStrndup(cur, p - cur));
1034}
1035
1036/**
1037 * xmlStrcmp:
1038 * @str1: the first xmlChar *
1039 * @str2: the second xmlChar *
1040 *
1041 * a strcmp for xmlChar's
1042 *
1043 * Returns the integer result of the comparison
1044 */
1045
1046int
1047xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1048 register int tmp;
1049
1050 if (str1 == str2) return(0);
1051 if (str1 == NULL) return(-1);
1052 if (str2 == NULL) return(1);
1053 do {
1054 tmp = *str1++ - *str2;
1055 if (tmp != 0) return(tmp);
1056 } while (*str2++ != 0);
1057 return 0;
1058}
1059
1060/**
1061 * xmlStrEqual:
1062 * @str1: the first xmlChar *
1063 * @str2: the second xmlChar *
1064 *
1065 * Check if both string are equal of have same content
1066 * Should be a bit more readable and faster than xmlStrEqual()
1067 *
1068 * Returns 1 if they are equal, 0 if they are different
1069 */
1070
1071int
1072xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1073 if (str1 == str2) return(1);
1074 if (str1 == NULL) return(0);
1075 if (str2 == NULL) return(0);
1076 do {
1077 if (*str1++ != *str2) return(0);
1078 } while (*str2++);
1079 return(1);
1080}
1081
1082/**
1083 * xmlStrncmp:
1084 * @str1: the first xmlChar *
1085 * @str2: the second xmlChar *
1086 * @len: the max comparison length
1087 *
1088 * a strncmp for xmlChar's
1089 *
1090 * Returns the integer result of the comparison
1091 */
1092
1093int
1094xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1095 register int tmp;
1096
1097 if (len <= 0) return(0);
1098 if (str1 == str2) return(0);
1099 if (str1 == NULL) return(-1);
1100 if (str2 == NULL) return(1);
1101 do {
1102 tmp = *str1++ - *str2;
1103 if (tmp != 0 || --len == 0) return(tmp);
1104 } while (*str2++ != 0);
1105 return 0;
1106}
1107
1108static xmlChar casemap[256] = {
1109 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1110 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1111 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1112 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1113 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1114 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1115 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1116 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1117 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1118 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1119 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1120 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1121 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1122 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1123 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1124 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1125 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1126 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1127 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1128 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1129 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1130 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1131 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1132 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1133 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1134 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1135 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1136 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1137 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1138 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1139 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1140 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1141};
1142
1143/**
1144 * xmlStrcasecmp:
1145 * @str1: the first xmlChar *
1146 * @str2: the second xmlChar *
1147 *
1148 * a strcasecmp for xmlChar's
1149 *
1150 * Returns the integer result of the comparison
1151 */
1152
1153int
1154xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1155 register int tmp;
1156
1157 if (str1 == str2) return(0);
1158 if (str1 == NULL) return(-1);
1159 if (str2 == NULL) return(1);
1160 do {
1161 tmp = casemap[*str1++] - casemap[*str2];
1162 if (tmp != 0) return(tmp);
1163 } while (*str2++ != 0);
1164 return 0;
1165}
1166
1167/**
1168 * xmlStrncasecmp:
1169 * @str1: the first xmlChar *
1170 * @str2: the second xmlChar *
1171 * @len: the max comparison length
1172 *
1173 * a strncasecmp for xmlChar's
1174 *
1175 * Returns the integer result of the comparison
1176 */
1177
1178int
1179xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1180 register int tmp;
1181
1182 if (len <= 0) return(0);
1183 if (str1 == str2) return(0);
1184 if (str1 == NULL) return(-1);
1185 if (str2 == NULL) return(1);
1186 do {
1187 tmp = casemap[*str1++] - casemap[*str2];
1188 if (tmp != 0 || --len == 0) return(tmp);
1189 } while (*str2++ != 0);
1190 return 0;
1191}
1192
1193/**
1194 * xmlStrchr:
1195 * @str: the xmlChar * array
1196 * @val: the xmlChar to search
1197 *
1198 * a strchr for xmlChar's
1199 *
1200 * Returns the xmlChar * for the first occurence or NULL.
1201 */
1202
1203const xmlChar *
1204xmlStrchr(const xmlChar *str, xmlChar val) {
1205 if (str == NULL) return(NULL);
1206 while (*str != 0) { /* non input consuming */
1207 if (*str == val) return((xmlChar *) str);
1208 str++;
1209 }
1210 return(NULL);
1211}
1212
1213/**
1214 * xmlStrstr:
1215 * @str: the xmlChar * array (haystack)
1216 * @val: the xmlChar to search (needle)
1217 *
1218 * a strstr for xmlChar's
1219 *
1220 * Returns the xmlChar * for the first occurence or NULL.
1221 */
1222
1223const xmlChar *
1224xmlStrstr(const xmlChar *str, xmlChar *val) {
1225 int n;
1226
1227 if (str == NULL) return(NULL);
1228 if (val == NULL) return(NULL);
1229 n = xmlStrlen(val);
1230
1231 if (n == 0) return(str);
1232 while (*str != 0) { /* non input consuming */
1233 if (*str == *val) {
1234 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1235 }
1236 str++;
1237 }
1238 return(NULL);
1239}
1240
1241/**
1242 * xmlStrcasestr:
1243 * @str: the xmlChar * array (haystack)
1244 * @val: the xmlChar to search (needle)
1245 *
1246 * a case-ignoring strstr for xmlChar's
1247 *
1248 * Returns the xmlChar * for the first occurence or NULL.
1249 */
1250
1251const xmlChar *
1252xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1253 int n;
1254
1255 if (str == NULL) return(NULL);
1256 if (val == NULL) return(NULL);
1257 n = xmlStrlen(val);
1258
1259 if (n == 0) return(str);
1260 while (*str != 0) { /* non input consuming */
1261 if (casemap[*str] == casemap[*val])
1262 if (!xmlStrncasecmp(str, val, n)) return(str);
1263 str++;
1264 }
1265 return(NULL);
1266}
1267
1268/**
1269 * xmlStrsub:
1270 * @str: the xmlChar * array (haystack)
1271 * @start: the index of the first char (zero based)
1272 * @len: the length of the substring
1273 *
1274 * Extract a substring of a given string
1275 *
1276 * Returns the xmlChar * for the first occurence or NULL.
1277 */
1278
1279xmlChar *
1280xmlStrsub(const xmlChar *str, int start, int len) {
1281 int i;
1282
1283 if (str == NULL) return(NULL);
1284 if (start < 0) return(NULL);
1285 if (len < 0) return(NULL);
1286
1287 for (i = 0;i < start;i++) {
1288 if (*str == 0) return(NULL);
1289 str++;
1290 }
1291 if (*str == 0) return(NULL);
1292 return(xmlStrndup(str, len));
1293}
1294
1295/**
1296 * xmlStrlen:
1297 * @str: the xmlChar * array
1298 *
1299 * length of a xmlChar's string
1300 *
1301 * Returns the number of xmlChar contained in the ARRAY.
1302 */
1303
1304int
1305xmlStrlen(const xmlChar *str) {
1306 int len = 0;
1307
1308 if (str == NULL) return(0);
1309 while (*str != 0) { /* non input consuming */
1310 str++;
1311 len++;
1312 }
1313 return(len);
1314}
1315
1316/**
1317 * xmlStrncat:
1318 * @cur: the original xmlChar * array
1319 * @add: the xmlChar * array added
1320 * @len: the length of @add
1321 *
1322 * a strncat for array of xmlChar's, it will extend cur with the len
1323 * first bytes of @add.
1324 *
1325 * Returns a new xmlChar *, the original @cur is reallocated if needed
1326 * and should not be freed
1327 */
1328
1329xmlChar *
1330xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1331 int size;
1332 xmlChar *ret;
1333
1334 if ((add == NULL) || (len == 0))
1335 return(cur);
1336 if (cur == NULL)
1337 return(xmlStrndup(add, len));
1338
1339 size = xmlStrlen(cur);
1340 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1341 if (ret == NULL) {
1342 xmlGenericError(xmlGenericErrorContext,
1343 "xmlStrncat: realloc of %ld byte failed\n",
1344 (size + len + 1) * (long)sizeof(xmlChar));
1345 return(cur);
1346 }
1347 memcpy(&ret[size], add, len * sizeof(xmlChar));
1348 ret[size + len] = 0;
1349 return(ret);
1350}
1351
1352/**
1353 * xmlStrcat:
1354 * @cur: the original xmlChar * array
1355 * @add: the xmlChar * array added
1356 *
1357 * a strcat for array of xmlChar's. Since they are supposed to be
1358 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1359 * a termination mark of '0'.
1360 *
1361 * Returns a new xmlChar * containing the concatenated string.
1362 */
1363xmlChar *
1364xmlStrcat(xmlChar *cur, const xmlChar *add) {
1365 const xmlChar *p = add;
1366
1367 if (add == NULL) return(cur);
1368 if (cur == NULL)
1369 return(xmlStrdup(add));
1370
1371 while (*p != 0) p++; /* non input consuming */
1372 return(xmlStrncat(cur, add, p - add));
1373}
1374
1375/************************************************************************
1376 * *
1377 * Commodity functions, cleanup needed ? *
1378 * *
1379 ************************************************************************/
1380
1381/**
1382 * areBlanks:
1383 * @ctxt: an XML parser context
1384 * @str: a xmlChar *
1385 * @len: the size of @str
1386 *
1387 * Is this a sequence of blank chars that one can ignore ?
1388 *
1389 * Returns 1 if ignorable 0 otherwise.
1390 */
1391
1392static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1393 int i, ret;
1394 xmlNodePtr lastChild;
1395
Daniel Veillard2f362242001-03-02 17:36:21 +00001396 if (ctxt->keepBlanks)
1397 return(0);
1398
Owen Taylor3473f882001-02-23 17:55:21 +00001399 /*
1400 * Check for xml:space value.
1401 */
1402 if (*(ctxt->space) == 1)
1403 return(0);
1404
1405 /*
1406 * Check that the string is made of blanks
1407 */
1408 for (i = 0;i < len;i++)
1409 if (!(IS_BLANK(str[i]))) return(0);
1410
1411 /*
1412 * Look if the element is mixed content in the Dtd if available
1413 */
1414 if (ctxt->myDoc != NULL) {
1415 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1416 if (ret == 0) return(1);
1417 if (ret == 1) return(0);
1418 }
1419
1420 /*
1421 * Otherwise, heuristic :-\
1422 */
Owen Taylor3473f882001-02-23 17:55:21 +00001423 if (RAW != '<') return(0);
1424 if (ctxt->node == NULL) return(0);
1425 if ((ctxt->node->children == NULL) &&
1426 (RAW == '<') && (NXT(1) == '/')) return(0);
1427
1428 lastChild = xmlGetLastChild(ctxt->node);
1429 if (lastChild == NULL) {
1430 if (ctxt->node->content != NULL) return(0);
1431 } else if (xmlNodeIsText(lastChild))
1432 return(0);
1433 else if ((ctxt->node->children != NULL) &&
1434 (xmlNodeIsText(ctxt->node->children)))
1435 return(0);
1436 return(1);
1437}
1438
1439/*
1440 * Forward definition for recusive behaviour.
1441 */
1442void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1443void xmlParseReference(xmlParserCtxtPtr ctxt);
1444
1445/************************************************************************
1446 * *
1447 * Extra stuff for namespace support *
1448 * Relates to http://www.w3.org/TR/WD-xml-names *
1449 * *
1450 ************************************************************************/
1451
1452/**
1453 * xmlSplitQName:
1454 * @ctxt: an XML parser context
1455 * @name: an XML parser context
1456 * @prefix: a xmlChar **
1457 *
1458 * parse an UTF8 encoded XML qualified name string
1459 *
1460 * [NS 5] QName ::= (Prefix ':')? LocalPart
1461 *
1462 * [NS 6] Prefix ::= NCName
1463 *
1464 * [NS 7] LocalPart ::= NCName
1465 *
1466 * Returns the local part, and prefix is updated
1467 * to get the Prefix if any.
1468 */
1469
1470xmlChar *
1471xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1472 xmlChar buf[XML_MAX_NAMELEN + 5];
1473 xmlChar *buffer = NULL;
1474 int len = 0;
1475 int max = XML_MAX_NAMELEN;
1476 xmlChar *ret = NULL;
1477 const xmlChar *cur = name;
1478 int c;
1479
1480 *prefix = NULL;
1481
1482#ifndef XML_XML_NAMESPACE
1483 /* xml: prefix is not really a namespace */
1484 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1485 (cur[2] == 'l') && (cur[3] == ':'))
1486 return(xmlStrdup(name));
1487#endif
1488
1489 /* nasty but valid */
1490 if (cur[0] == ':')
1491 return(xmlStrdup(name));
1492
1493 c = *cur++;
1494 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1495 buf[len++] = c;
1496 c = *cur++;
1497 }
1498 if (len >= max) {
1499 /*
1500 * Okay someone managed to make a huge name, so he's ready to pay
1501 * for the processing speed.
1502 */
1503 max = len * 2;
1504
1505 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1506 if (buffer == NULL) {
1507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1508 ctxt->sax->error(ctxt->userData,
1509 "xmlSplitQName: out of memory\n");
1510 return(NULL);
1511 }
1512 memcpy(buffer, buf, len);
1513 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1514 if (len + 10 > max) {
1515 max *= 2;
1516 buffer = (xmlChar *) xmlRealloc(buffer,
1517 max * sizeof(xmlChar));
1518 if (buffer == NULL) {
1519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1520 ctxt->sax->error(ctxt->userData,
1521 "xmlSplitQName: out of memory\n");
1522 return(NULL);
1523 }
1524 }
1525 buffer[len++] = c;
1526 c = *cur++;
1527 }
1528 buffer[len] = 0;
1529 }
1530
1531 if (buffer == NULL)
1532 ret = xmlStrndup(buf, len);
1533 else {
1534 ret = buffer;
1535 buffer = NULL;
1536 max = XML_MAX_NAMELEN;
1537 }
1538
1539
1540 if (c == ':') {
1541 c = *cur++;
1542 if (c == 0) return(ret);
1543 *prefix = ret;
1544 len = 0;
1545
1546 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1547 buf[len++] = c;
1548 c = *cur++;
1549 }
1550 if (len >= max) {
1551 /*
1552 * Okay someone managed to make a huge name, so he's ready to pay
1553 * for the processing speed.
1554 */
1555 max = len * 2;
1556
1557 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1558 if (buffer == NULL) {
1559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1560 ctxt->sax->error(ctxt->userData,
1561 "xmlSplitQName: out of memory\n");
1562 return(NULL);
1563 }
1564 memcpy(buffer, buf, len);
1565 while (c != 0) { /* tested bigname2.xml */
1566 if (len + 10 > max) {
1567 max *= 2;
1568 buffer = (xmlChar *) xmlRealloc(buffer,
1569 max * sizeof(xmlChar));
1570 if (buffer == NULL) {
1571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1572 ctxt->sax->error(ctxt->userData,
1573 "xmlSplitQName: out of memory\n");
1574 return(NULL);
1575 }
1576 }
1577 buffer[len++] = c;
1578 c = *cur++;
1579 }
1580 buffer[len] = 0;
1581 }
1582
1583 if (buffer == NULL)
1584 ret = xmlStrndup(buf, len);
1585 else {
1586 ret = buffer;
1587 }
1588 }
1589
1590 return(ret);
1591}
1592
1593/************************************************************************
1594 * *
1595 * The parser itself *
1596 * Relates to http://www.w3.org/TR/REC-xml *
1597 * *
1598 ************************************************************************/
1599
Daniel Veillard21a0f912001-02-25 19:54:14 +00001600xmlChar *xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001601/**
1602 * xmlParseName:
1603 * @ctxt: an XML parser context
1604 *
1605 * parse an XML name.
1606 *
1607 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1608 * CombiningChar | Extender
1609 *
1610 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1611 *
1612 * [6] Names ::= Name (S Name)*
1613 *
1614 * Returns the Name parsed or NULL
1615 */
1616
1617xmlChar *
1618xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001619 const xmlChar *in;
1620 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001621 int count = 0;
1622
1623 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001624
1625 /*
1626 * Accelerator for simple ASCII names
1627 */
1628 in = ctxt->input->cur;
1629 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1630 ((*in >= 0x41) && (*in <= 0x5A)) ||
1631 (*in == '_') || (*in == ':')) {
1632 in++;
1633 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1634 ((*in >= 0x41) && (*in <= 0x5A)) ||
1635 ((*in >= 0x30) && (*in <= 0x39)) ||
1636 (*in == '_') || (*in == ':'))
1637 in++;
1638 if ((*in == ' ') || (*in == '>') || (*in == '/')) {
1639 count = in - ctxt->input->cur;
1640 ret = xmlStrndup(ctxt->input->cur, count);
1641 ctxt->input->cur = in;
1642 return(ret);
1643 }
1644 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001645 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001646}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001647
Daniel Veillard21a0f912001-02-25 19:54:14 +00001648xmlChar *
1649xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1650 xmlChar buf[XML_MAX_NAMELEN + 5];
1651 int len = 0, l;
1652 int c;
1653 int count = 0;
1654
1655 /*
1656 * Handler for more complex cases
1657 */
1658 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001659 c = CUR_CHAR(l);
1660 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1661 (!IS_LETTER(c) && (c != '_') &&
1662 (c != ':'))) {
1663 return(NULL);
1664 }
1665
1666 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1667 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1668 (c == '.') || (c == '-') ||
1669 (c == '_') || (c == ':') ||
1670 (IS_COMBINING(c)) ||
1671 (IS_EXTENDER(c)))) {
1672 if (count++ > 100) {
1673 count = 0;
1674 GROW;
1675 }
1676 COPY_BUF(l,buf,len,c);
1677 NEXTL(l);
1678 c = CUR_CHAR(l);
1679 if (len >= XML_MAX_NAMELEN) {
1680 /*
1681 * Okay someone managed to make a huge name, so he's ready to pay
1682 * for the processing speed.
1683 */
1684 xmlChar *buffer;
1685 int max = len * 2;
1686
1687 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1688 if (buffer == NULL) {
1689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1690 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001691 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001692 return(NULL);
1693 }
1694 memcpy(buffer, buf, len);
1695 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1696 (c == '.') || (c == '-') ||
1697 (c == '_') || (c == ':') ||
1698 (IS_COMBINING(c)) ||
1699 (IS_EXTENDER(c))) {
1700 if (count++ > 100) {
1701 count = 0;
1702 GROW;
1703 }
1704 if (len + 10 > max) {
1705 max *= 2;
1706 buffer = (xmlChar *) xmlRealloc(buffer,
1707 max * sizeof(xmlChar));
1708 if (buffer == NULL) {
1709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1710 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001711 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001712 return(NULL);
1713 }
1714 }
1715 COPY_BUF(l,buffer,len,c);
1716 NEXTL(l);
1717 c = CUR_CHAR(l);
1718 }
1719 buffer[len] = 0;
1720 return(buffer);
1721 }
1722 }
1723 return(xmlStrndup(buf, len));
1724}
1725
1726/**
1727 * xmlParseStringName:
1728 * @ctxt: an XML parser context
1729 * @str: a pointer to the string pointer (IN/OUT)
1730 *
1731 * parse an XML name.
1732 *
1733 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1734 * CombiningChar | Extender
1735 *
1736 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1737 *
1738 * [6] Names ::= Name (S Name)*
1739 *
1740 * Returns the Name parsed or NULL. The str pointer
1741 * is updated to the current location in the string.
1742 */
1743
1744xmlChar *
1745xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1746 xmlChar buf[XML_MAX_NAMELEN + 5];
1747 const xmlChar *cur = *str;
1748 int len = 0, l;
1749 int c;
1750
1751 c = CUR_SCHAR(cur, l);
1752 if (!IS_LETTER(c) && (c != '_') &&
1753 (c != ':')) {
1754 return(NULL);
1755 }
1756
1757 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1758 (c == '.') || (c == '-') ||
1759 (c == '_') || (c == ':') ||
1760 (IS_COMBINING(c)) ||
1761 (IS_EXTENDER(c))) {
1762 COPY_BUF(l,buf,len,c);
1763 cur += l;
1764 c = CUR_SCHAR(cur, l);
1765 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1766 /*
1767 * Okay someone managed to make a huge name, so he's ready to pay
1768 * for the processing speed.
1769 */
1770 xmlChar *buffer;
1771 int max = len * 2;
1772
1773 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1774 if (buffer == NULL) {
1775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1776 ctxt->sax->error(ctxt->userData,
1777 "xmlParseStringName: out of memory\n");
1778 return(NULL);
1779 }
1780 memcpy(buffer, buf, len);
1781 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1782 (c == '.') || (c == '-') ||
1783 (c == '_') || (c == ':') ||
1784 (IS_COMBINING(c)) ||
1785 (IS_EXTENDER(c))) {
1786 if (len + 10 > max) {
1787 max *= 2;
1788 buffer = (xmlChar *) xmlRealloc(buffer,
1789 max * sizeof(xmlChar));
1790 if (buffer == NULL) {
1791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1792 ctxt->sax->error(ctxt->userData,
1793 "xmlParseStringName: out of memory\n");
1794 return(NULL);
1795 }
1796 }
1797 COPY_BUF(l,buffer,len,c);
1798 cur += l;
1799 c = CUR_SCHAR(cur, l);
1800 }
1801 buffer[len] = 0;
1802 *str = cur;
1803 return(buffer);
1804 }
1805 }
1806 *str = cur;
1807 return(xmlStrndup(buf, len));
1808}
1809
1810/**
1811 * xmlParseNmtoken:
1812 * @ctxt: an XML parser context
1813 *
1814 * parse an XML Nmtoken.
1815 *
1816 * [7] Nmtoken ::= (NameChar)+
1817 *
1818 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1819 *
1820 * Returns the Nmtoken parsed or NULL
1821 */
1822
1823xmlChar *
1824xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1825 xmlChar buf[XML_MAX_NAMELEN + 5];
1826 int len = 0, l;
1827 int c;
1828 int count = 0;
1829
1830 GROW;
1831 c = CUR_CHAR(l);
1832
1833 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1834 (c == '.') || (c == '-') ||
1835 (c == '_') || (c == ':') ||
1836 (IS_COMBINING(c)) ||
1837 (IS_EXTENDER(c))) {
1838 if (count++ > 100) {
1839 count = 0;
1840 GROW;
1841 }
1842 COPY_BUF(l,buf,len,c);
1843 NEXTL(l);
1844 c = CUR_CHAR(l);
1845 if (len >= XML_MAX_NAMELEN) {
1846 /*
1847 * Okay someone managed to make a huge token, so he's ready to pay
1848 * for the processing speed.
1849 */
1850 xmlChar *buffer;
1851 int max = len * 2;
1852
1853 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1854 if (buffer == NULL) {
1855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1856 ctxt->sax->error(ctxt->userData,
1857 "xmlParseNmtoken: out of memory\n");
1858 return(NULL);
1859 }
1860 memcpy(buffer, buf, len);
1861 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1862 (c == '.') || (c == '-') ||
1863 (c == '_') || (c == ':') ||
1864 (IS_COMBINING(c)) ||
1865 (IS_EXTENDER(c))) {
1866 if (count++ > 100) {
1867 count = 0;
1868 GROW;
1869 }
1870 if (len + 10 > max) {
1871 max *= 2;
1872 buffer = (xmlChar *) xmlRealloc(buffer,
1873 max * sizeof(xmlChar));
1874 if (buffer == NULL) {
1875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1876 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001877 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001878 return(NULL);
1879 }
1880 }
1881 COPY_BUF(l,buffer,len,c);
1882 NEXTL(l);
1883 c = CUR_CHAR(l);
1884 }
1885 buffer[len] = 0;
1886 return(buffer);
1887 }
1888 }
1889 if (len == 0)
1890 return(NULL);
1891 return(xmlStrndup(buf, len));
1892}
1893
1894/**
1895 * xmlParseEntityValue:
1896 * @ctxt: an XML parser context
1897 * @orig: if non-NULL store a copy of the original entity value
1898 *
1899 * parse a value for ENTITY declarations
1900 *
1901 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1902 * "'" ([^%&'] | PEReference | Reference)* "'"
1903 *
1904 * Returns the EntityValue parsed with reference substitued or NULL
1905 */
1906
1907xmlChar *
1908xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1909 xmlChar *buf = NULL;
1910 int len = 0;
1911 int size = XML_PARSER_BUFFER_SIZE;
1912 int c, l;
1913 xmlChar stop;
1914 xmlChar *ret = NULL;
1915 const xmlChar *cur = NULL;
1916 xmlParserInputPtr input;
1917
1918 if (RAW == '"') stop = '"';
1919 else if (RAW == '\'') stop = '\'';
1920 else {
1921 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1923 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1924 ctxt->wellFormed = 0;
1925 ctxt->disableSAX = 1;
1926 return(NULL);
1927 }
1928 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1929 if (buf == NULL) {
1930 xmlGenericError(xmlGenericErrorContext,
1931 "malloc of %d byte failed\n", size);
1932 return(NULL);
1933 }
1934
1935 /*
1936 * The content of the entity definition is copied in a buffer.
1937 */
1938
1939 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1940 input = ctxt->input;
1941 GROW;
1942 NEXT;
1943 c = CUR_CHAR(l);
1944 /*
1945 * NOTE: 4.4.5 Included in Literal
1946 * When a parameter entity reference appears in a literal entity
1947 * value, ... a single or double quote character in the replacement
1948 * text is always treated as a normal data character and will not
1949 * terminate the literal.
1950 * In practice it means we stop the loop only when back at parsing
1951 * the initial entity and the quote is found
1952 */
1953 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1954 (ctxt->input != input))) {
1955 if (len + 5 >= size) {
1956 size *= 2;
1957 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1958 if (buf == NULL) {
1959 xmlGenericError(xmlGenericErrorContext,
1960 "realloc of %d byte failed\n", size);
1961 return(NULL);
1962 }
1963 }
1964 COPY_BUF(l,buf,len,c);
1965 NEXTL(l);
1966 /*
1967 * Pop-up of finished entities.
1968 */
1969 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1970 xmlPopInput(ctxt);
1971
1972 GROW;
1973 c = CUR_CHAR(l);
1974 if (c == 0) {
1975 GROW;
1976 c = CUR_CHAR(l);
1977 }
1978 }
1979 buf[len] = 0;
1980
1981 /*
1982 * Raise problem w.r.t. '&' and '%' being used in non-entities
1983 * reference constructs. Note Charref will be handled in
1984 * xmlStringDecodeEntities()
1985 */
1986 cur = buf;
1987 while (*cur != 0) { /* non input consuming */
1988 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1989 xmlChar *name;
1990 xmlChar tmp = *cur;
1991
1992 cur++;
1993 name = xmlParseStringName(ctxt, &cur);
1994 if ((name == NULL) || (*cur != ';')) {
1995 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1997 ctxt->sax->error(ctxt->userData,
1998 "EntityValue: '%c' forbidden except for entities references\n",
1999 tmp);
2000 ctxt->wellFormed = 0;
2001 ctxt->disableSAX = 1;
2002 }
2003 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2004 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2006 ctxt->sax->error(ctxt->userData,
2007 "EntityValue: PEReferences forbidden in internal subset\n",
2008 tmp);
2009 ctxt->wellFormed = 0;
2010 ctxt->disableSAX = 1;
2011 }
2012 if (name != NULL)
2013 xmlFree(name);
2014 }
2015 cur++;
2016 }
2017
2018 /*
2019 * Then PEReference entities are substituted.
2020 */
2021 if (c != stop) {
2022 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2024 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2025 ctxt->wellFormed = 0;
2026 ctxt->disableSAX = 1;
2027 xmlFree(buf);
2028 } else {
2029 NEXT;
2030 /*
2031 * NOTE: 4.4.7 Bypassed
2032 * When a general entity reference appears in the EntityValue in
2033 * an entity declaration, it is bypassed and left as is.
2034 * so XML_SUBSTITUTE_REF is not set here.
2035 */
2036 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2037 0, 0, 0);
2038 if (orig != NULL)
2039 *orig = buf;
2040 else
2041 xmlFree(buf);
2042 }
2043
2044 return(ret);
2045}
2046
2047/**
2048 * xmlParseAttValue:
2049 * @ctxt: an XML parser context
2050 *
2051 * parse a value for an attribute
2052 * Note: the parser won't do substitution of entities here, this
2053 * will be handled later in xmlStringGetNodeList
2054 *
2055 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2056 * "'" ([^<&'] | Reference)* "'"
2057 *
2058 * 3.3.3 Attribute-Value Normalization:
2059 * Before the value of an attribute is passed to the application or
2060 * checked for validity, the XML processor must normalize it as follows:
2061 * - a character reference is processed by appending the referenced
2062 * character to the attribute value
2063 * - an entity reference is processed by recursively processing the
2064 * replacement text of the entity
2065 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2066 * appending #x20 to the normalized value, except that only a single
2067 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2068 * parsed entity or the literal entity value of an internal parsed entity
2069 * - other characters are processed by appending them to the normalized value
2070 * If the declared value is not CDATA, then the XML processor must further
2071 * process the normalized attribute value by discarding any leading and
2072 * trailing space (#x20) characters, and by replacing sequences of space
2073 * (#x20) characters by a single space (#x20) character.
2074 * All attributes for which no declaration has been read should be treated
2075 * by a non-validating parser as if declared CDATA.
2076 *
2077 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2078 */
2079
2080xmlChar *
2081xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2082 xmlChar limit = 0;
2083 xmlChar *buf = NULL;
2084 int len = 0;
2085 int buf_size = 0;
2086 int c, l;
2087 xmlChar *current = NULL;
2088 xmlEntityPtr ent;
2089
2090
2091 SHRINK;
2092 if (NXT(0) == '"') {
2093 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2094 limit = '"';
2095 NEXT;
2096 } else if (NXT(0) == '\'') {
2097 limit = '\'';
2098 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2099 NEXT;
2100 } else {
2101 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2103 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2104 ctxt->wellFormed = 0;
2105 ctxt->disableSAX = 1;
2106 return(NULL);
2107 }
2108
2109 /*
2110 * allocate a translation buffer.
2111 */
2112 buf_size = XML_PARSER_BUFFER_SIZE;
2113 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2114 if (buf == NULL) {
2115 perror("xmlParseAttValue: malloc failed");
2116 return(NULL);
2117 }
2118
2119 /*
2120 * Ok loop until we reach one of the ending char or a size limit.
2121 */
2122 c = CUR_CHAR(l);
2123 while (((NXT(0) != limit) && /* checked */
2124 (c != '<')) || (ctxt->token != 0)) {
2125 if (c == 0) break;
2126 if (ctxt->token == '&') {
2127 /*
2128 * The reparsing will be done in xmlStringGetNodeList()
2129 * called by the attribute() function in SAX.c
2130 */
2131 static xmlChar buffer[6] = "&#38;";
2132
2133 if (len > buf_size - 10) {
2134 growBuffer(buf);
2135 }
2136 current = &buffer[0];
2137 while (*current != 0) { /* non input consuming */
2138 buf[len++] = *current++;
2139 }
2140 ctxt->token = 0;
2141 } else if (c == '&') {
2142 if (NXT(1) == '#') {
2143 int val = xmlParseCharRef(ctxt);
2144 if (val == '&') {
2145 /*
2146 * The reparsing will be done in xmlStringGetNodeList()
2147 * called by the attribute() function in SAX.c
2148 */
2149 static xmlChar buffer[6] = "&#38;";
2150
2151 if (len > buf_size - 10) {
2152 growBuffer(buf);
2153 }
2154 current = &buffer[0];
2155 while (*current != 0) { /* non input consuming */
2156 buf[len++] = *current++;
2157 }
2158 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002159 if (len > buf_size - 10) {
2160 growBuffer(buf);
2161 }
Owen Taylor3473f882001-02-23 17:55:21 +00002162 len += xmlCopyChar(0, &buf[len], val);
2163 }
2164 } else {
2165 ent = xmlParseEntityRef(ctxt);
2166 if ((ent != NULL) &&
2167 (ctxt->replaceEntities != 0)) {
2168 xmlChar *rep;
2169
2170 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2171 rep = xmlStringDecodeEntities(ctxt, ent->content,
2172 XML_SUBSTITUTE_REF, 0, 0, 0);
2173 if (rep != NULL) {
2174 current = rep;
2175 while (*current != 0) { /* non input consuming */
2176 buf[len++] = *current++;
2177 if (len > buf_size - 10) {
2178 growBuffer(buf);
2179 }
2180 }
2181 xmlFree(rep);
2182 }
2183 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002184 if (len > buf_size - 10) {
2185 growBuffer(buf);
2186 }
Owen Taylor3473f882001-02-23 17:55:21 +00002187 if (ent->content != NULL)
2188 buf[len++] = ent->content[0];
2189 }
2190 } else if (ent != NULL) {
2191 int i = xmlStrlen(ent->name);
2192 const xmlChar *cur = ent->name;
2193
2194 /*
2195 * This may look absurd but is needed to detect
2196 * entities problems
2197 */
2198 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2199 (ent->content != NULL)) {
2200 xmlChar *rep;
2201 rep = xmlStringDecodeEntities(ctxt, ent->content,
2202 XML_SUBSTITUTE_REF, 0, 0, 0);
2203 if (rep != NULL)
2204 xmlFree(rep);
2205 }
2206
2207 /*
2208 * Just output the reference
2209 */
2210 buf[len++] = '&';
2211 if (len > buf_size - i - 10) {
2212 growBuffer(buf);
2213 }
2214 for (;i > 0;i--)
2215 buf[len++] = *cur++;
2216 buf[len++] = ';';
2217 }
2218 }
2219 } else {
2220 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2221 COPY_BUF(l,buf,len,0x20);
2222 if (len > buf_size - 10) {
2223 growBuffer(buf);
2224 }
2225 } else {
2226 COPY_BUF(l,buf,len,c);
2227 if (len > buf_size - 10) {
2228 growBuffer(buf);
2229 }
2230 }
2231 NEXTL(l);
2232 }
2233 GROW;
2234 c = CUR_CHAR(l);
2235 }
2236 buf[len++] = 0;
2237 if (RAW == '<') {
2238 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2239 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2240 ctxt->sax->error(ctxt->userData,
2241 "Unescaped '<' not allowed in attributes values\n");
2242 ctxt->wellFormed = 0;
2243 ctxt->disableSAX = 1;
2244 } else if (RAW != limit) {
2245 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2247 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2248 ctxt->wellFormed = 0;
2249 ctxt->disableSAX = 1;
2250 } else
2251 NEXT;
2252 return(buf);
2253}
2254
2255/**
2256 * xmlParseSystemLiteral:
2257 * @ctxt: an XML parser context
2258 *
2259 * parse an XML Literal
2260 *
2261 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2262 *
2263 * Returns the SystemLiteral parsed or NULL
2264 */
2265
2266xmlChar *
2267xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2268 xmlChar *buf = NULL;
2269 int len = 0;
2270 int size = XML_PARSER_BUFFER_SIZE;
2271 int cur, l;
2272 xmlChar stop;
2273 int state = ctxt->instate;
2274 int count = 0;
2275
2276 SHRINK;
2277 if (RAW == '"') {
2278 NEXT;
2279 stop = '"';
2280 } else if (RAW == '\'') {
2281 NEXT;
2282 stop = '\'';
2283 } else {
2284 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2286 ctxt->sax->error(ctxt->userData,
2287 "SystemLiteral \" or ' expected\n");
2288 ctxt->wellFormed = 0;
2289 ctxt->disableSAX = 1;
2290 return(NULL);
2291 }
2292
2293 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2294 if (buf == NULL) {
2295 xmlGenericError(xmlGenericErrorContext,
2296 "malloc of %d byte failed\n", size);
2297 return(NULL);
2298 }
2299 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2300 cur = CUR_CHAR(l);
2301 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2302 if (len + 5 >= size) {
2303 size *= 2;
2304 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2305 if (buf == NULL) {
2306 xmlGenericError(xmlGenericErrorContext,
2307 "realloc of %d byte failed\n", size);
2308 ctxt->instate = (xmlParserInputState) state;
2309 return(NULL);
2310 }
2311 }
2312 count++;
2313 if (count > 50) {
2314 GROW;
2315 count = 0;
2316 }
2317 COPY_BUF(l,buf,len,cur);
2318 NEXTL(l);
2319 cur = CUR_CHAR(l);
2320 if (cur == 0) {
2321 GROW;
2322 SHRINK;
2323 cur = CUR_CHAR(l);
2324 }
2325 }
2326 buf[len] = 0;
2327 ctxt->instate = (xmlParserInputState) state;
2328 if (!IS_CHAR(cur)) {
2329 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2331 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2332 ctxt->wellFormed = 0;
2333 ctxt->disableSAX = 1;
2334 } else {
2335 NEXT;
2336 }
2337 return(buf);
2338}
2339
2340/**
2341 * xmlParsePubidLiteral:
2342 * @ctxt: an XML parser context
2343 *
2344 * parse an XML public literal
2345 *
2346 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2347 *
2348 * Returns the PubidLiteral parsed or NULL.
2349 */
2350
2351xmlChar *
2352xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2353 xmlChar *buf = NULL;
2354 int len = 0;
2355 int size = XML_PARSER_BUFFER_SIZE;
2356 xmlChar cur;
2357 xmlChar stop;
2358 int count = 0;
2359
2360 SHRINK;
2361 if (RAW == '"') {
2362 NEXT;
2363 stop = '"';
2364 } else if (RAW == '\'') {
2365 NEXT;
2366 stop = '\'';
2367 } else {
2368 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2370 ctxt->sax->error(ctxt->userData,
2371 "SystemLiteral \" or ' expected\n");
2372 ctxt->wellFormed = 0;
2373 ctxt->disableSAX = 1;
2374 return(NULL);
2375 }
2376 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2377 if (buf == NULL) {
2378 xmlGenericError(xmlGenericErrorContext,
2379 "malloc of %d byte failed\n", size);
2380 return(NULL);
2381 }
2382 cur = CUR;
2383 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2384 if (len + 1 >= size) {
2385 size *= 2;
2386 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2387 if (buf == NULL) {
2388 xmlGenericError(xmlGenericErrorContext,
2389 "realloc of %d byte failed\n", size);
2390 return(NULL);
2391 }
2392 }
2393 buf[len++] = cur;
2394 count++;
2395 if (count > 50) {
2396 GROW;
2397 count = 0;
2398 }
2399 NEXT;
2400 cur = CUR;
2401 if (cur == 0) {
2402 GROW;
2403 SHRINK;
2404 cur = CUR;
2405 }
2406 }
2407 buf[len] = 0;
2408 if (cur != stop) {
2409 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2411 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2412 ctxt->wellFormed = 0;
2413 ctxt->disableSAX = 1;
2414 } else {
2415 NEXT;
2416 }
2417 return(buf);
2418}
2419
Daniel Veillard48b2f892001-02-25 16:11:03 +00002420void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002421/**
2422 * xmlParseCharData:
2423 * @ctxt: an XML parser context
2424 * @cdata: int indicating whether we are within a CDATA section
2425 *
2426 * parse a CharData section.
2427 * if we are within a CDATA section ']]>' marks an end of section.
2428 *
2429 * The right angle bracket (>) may be represented using the string "&gt;",
2430 * and must, for compatibility, be escaped using "&gt;" or a character
2431 * reference when it appears in the string "]]>" in content, when that
2432 * string is not marking the end of a CDATA section.
2433 *
2434 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2435 */
2436
2437void
2438xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002439 const xmlChar *in;
2440 int nbchar = 0;
2441
2442 SHRINK;
2443 GROW;
2444 /*
2445 * Accelerated common case where input don't need to be
2446 * modified before passing it to the handler.
2447 */
2448 if ((ctxt->token == 0) && (!cdata)) {
2449 in = ctxt->input->cur;
2450 do {
2451 while (((*in >= 0x20) && (*in != '<') &&
2452 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2453 in++;
2454 if (*in == 0xA) {
2455 ctxt->input->line++;
2456 continue; /* while */
2457 }
2458 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002459 if (nbchar > 0) {
2460 if (IS_BLANK(*ctxt->input->cur) &&
2461 areBlanks(ctxt, ctxt->input->cur, nbchar)) {
2462 if (ctxt->sax->ignorableWhitespace != NULL)
2463 ctxt->sax->ignorableWhitespace(ctxt->userData,
2464 ctxt->input->cur, nbchar);
2465 } else {
2466 if (ctxt->sax->characters != NULL)
2467 ctxt->sax->characters(ctxt->userData,
2468 ctxt->input->cur, nbchar);
2469 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002470 }
2471 ctxt->input->cur = in;
2472 if (*in == 0xD) {
2473 in++;
2474 if (*in == 0xA) {
2475 ctxt->input->cur = in;
2476 in++;
2477 ctxt->input->line++;
2478 continue; /* while */
2479 }
2480 in--;
2481 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002482 if (*in == '<') {
2483 return;
2484 }
2485 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002486 return;
2487 }
2488 SHRINK;
2489 GROW;
2490 in = ctxt->input->cur;
2491 } while ((*in >= 0x20) && (*in <= 0x7F));
2492 nbchar = 0;
2493 }
2494 xmlParseCharDataComplex(ctxt, cdata);
2495}
2496
2497void
2498xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002499 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2500 int nbchar = 0;
2501 int cur, l;
2502 int count = 0;
2503
2504 SHRINK;
2505 GROW;
2506 cur = CUR_CHAR(l);
2507 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2508 ((cur != '&') || (ctxt->token == '&')) &&
2509 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2510 if ((cur == ']') && (NXT(1) == ']') &&
2511 (NXT(2) == '>')) {
2512 if (cdata) break;
2513 else {
2514 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2516 ctxt->sax->error(ctxt->userData,
2517 "Sequence ']]>' not allowed in content\n");
2518 /* Should this be relaxed ??? I see a "must here */
2519 ctxt->wellFormed = 0;
2520 ctxt->disableSAX = 1;
2521 }
2522 }
2523 COPY_BUF(l,buf,nbchar,cur);
2524 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2525 /*
2526 * Ok the segment is to be consumed as chars.
2527 */
2528 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2529 if (areBlanks(ctxt, buf, nbchar)) {
2530 if (ctxt->sax->ignorableWhitespace != NULL)
2531 ctxt->sax->ignorableWhitespace(ctxt->userData,
2532 buf, nbchar);
2533 } else {
2534 if (ctxt->sax->characters != NULL)
2535 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2536 }
2537 }
2538 nbchar = 0;
2539 }
2540 count++;
2541 if (count > 50) {
2542 GROW;
2543 count = 0;
2544 }
2545 NEXTL(l);
2546 cur = CUR_CHAR(l);
2547 }
2548 if (nbchar != 0) {
2549 /*
2550 * Ok the segment is to be consumed as chars.
2551 */
2552 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2553 if (areBlanks(ctxt, buf, nbchar)) {
2554 if (ctxt->sax->ignorableWhitespace != NULL)
2555 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2556 } else {
2557 if (ctxt->sax->characters != NULL)
2558 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2559 }
2560 }
2561 }
2562}
2563
2564/**
2565 * xmlParseExternalID:
2566 * @ctxt: an XML parser context
2567 * @publicID: a xmlChar** receiving PubidLiteral
2568 * @strict: indicate whether we should restrict parsing to only
2569 * production [75], see NOTE below
2570 *
2571 * Parse an External ID or a Public ID
2572 *
2573 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2574 * 'PUBLIC' S PubidLiteral S SystemLiteral
2575 *
2576 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2577 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2578 *
2579 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2580 *
2581 * Returns the function returns SystemLiteral and in the second
2582 * case publicID receives PubidLiteral, is strict is off
2583 * it is possible to return NULL and have publicID set.
2584 */
2585
2586xmlChar *
2587xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2588 xmlChar *URI = NULL;
2589
2590 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002591
2592 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002593 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2594 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2595 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2596 SKIP(6);
2597 if (!IS_BLANK(CUR)) {
2598 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2600 ctxt->sax->error(ctxt->userData,
2601 "Space required after 'SYSTEM'\n");
2602 ctxt->wellFormed = 0;
2603 ctxt->disableSAX = 1;
2604 }
2605 SKIP_BLANKS;
2606 URI = xmlParseSystemLiteral(ctxt);
2607 if (URI == NULL) {
2608 ctxt->errNo = XML_ERR_URI_REQUIRED;
2609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2610 ctxt->sax->error(ctxt->userData,
2611 "xmlParseExternalID: SYSTEM, no URI\n");
2612 ctxt->wellFormed = 0;
2613 ctxt->disableSAX = 1;
2614 }
2615 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2616 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2617 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2618 SKIP(6);
2619 if (!IS_BLANK(CUR)) {
2620 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2622 ctxt->sax->error(ctxt->userData,
2623 "Space required after 'PUBLIC'\n");
2624 ctxt->wellFormed = 0;
2625 ctxt->disableSAX = 1;
2626 }
2627 SKIP_BLANKS;
2628 *publicID = xmlParsePubidLiteral(ctxt);
2629 if (*publicID == NULL) {
2630 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2632 ctxt->sax->error(ctxt->userData,
2633 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2634 ctxt->wellFormed = 0;
2635 ctxt->disableSAX = 1;
2636 }
2637 if (strict) {
2638 /*
2639 * We don't handle [83] so "S SystemLiteral" is required.
2640 */
2641 if (!IS_BLANK(CUR)) {
2642 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2644 ctxt->sax->error(ctxt->userData,
2645 "Space required after the Public Identifier\n");
2646 ctxt->wellFormed = 0;
2647 ctxt->disableSAX = 1;
2648 }
2649 } else {
2650 /*
2651 * We handle [83] so we return immediately, if
2652 * "S SystemLiteral" is not detected. From a purely parsing
2653 * point of view that's a nice mess.
2654 */
2655 const xmlChar *ptr;
2656 GROW;
2657
2658 ptr = CUR_PTR;
2659 if (!IS_BLANK(*ptr)) return(NULL);
2660
2661 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2662 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2663 }
2664 SKIP_BLANKS;
2665 URI = xmlParseSystemLiteral(ctxt);
2666 if (URI == NULL) {
2667 ctxt->errNo = XML_ERR_URI_REQUIRED;
2668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2669 ctxt->sax->error(ctxt->userData,
2670 "xmlParseExternalID: PUBLIC, no URI\n");
2671 ctxt->wellFormed = 0;
2672 ctxt->disableSAX = 1;
2673 }
2674 }
2675 return(URI);
2676}
2677
2678/**
2679 * xmlParseComment:
2680 * @ctxt: an XML parser context
2681 *
2682 * Skip an XML (SGML) comment <!-- .... -->
2683 * The spec says that "For compatibility, the string "--" (double-hyphen)
2684 * must not occur within comments. "
2685 *
2686 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2687 */
2688void
2689xmlParseComment(xmlParserCtxtPtr ctxt) {
2690 xmlChar *buf = NULL;
2691 int len;
2692 int size = XML_PARSER_BUFFER_SIZE;
2693 int q, ql;
2694 int r, rl;
2695 int cur, l;
2696 xmlParserInputState state;
2697 xmlParserInputPtr input = ctxt->input;
2698 int count = 0;
2699
2700 /*
2701 * Check that there is a comment right here.
2702 */
2703 if ((RAW != '<') || (NXT(1) != '!') ||
2704 (NXT(2) != '-') || (NXT(3) != '-')) return;
2705
2706 state = ctxt->instate;
2707 ctxt->instate = XML_PARSER_COMMENT;
2708 SHRINK;
2709 SKIP(4);
2710 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2711 if (buf == NULL) {
2712 xmlGenericError(xmlGenericErrorContext,
2713 "malloc of %d byte failed\n", size);
2714 ctxt->instate = state;
2715 return;
2716 }
2717 q = CUR_CHAR(ql);
2718 NEXTL(ql);
2719 r = CUR_CHAR(rl);
2720 NEXTL(rl);
2721 cur = CUR_CHAR(l);
2722 len = 0;
2723 while (IS_CHAR(cur) && /* checked */
2724 ((cur != '>') ||
2725 (r != '-') || (q != '-'))) {
2726 if ((r == '-') && (q == '-') && (len > 1)) {
2727 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2728 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2729 ctxt->sax->error(ctxt->userData,
2730 "Comment must not contain '--' (double-hyphen)`\n");
2731 ctxt->wellFormed = 0;
2732 ctxt->disableSAX = 1;
2733 }
2734 if (len + 5 >= size) {
2735 size *= 2;
2736 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2737 if (buf == NULL) {
2738 xmlGenericError(xmlGenericErrorContext,
2739 "realloc of %d byte failed\n", size);
2740 ctxt->instate = state;
2741 return;
2742 }
2743 }
2744 COPY_BUF(ql,buf,len,q);
2745 q = r;
2746 ql = rl;
2747 r = cur;
2748 rl = l;
2749
2750 count++;
2751 if (count > 50) {
2752 GROW;
2753 count = 0;
2754 }
2755 NEXTL(l);
2756 cur = CUR_CHAR(l);
2757 if (cur == 0) {
2758 SHRINK;
2759 GROW;
2760 cur = CUR_CHAR(l);
2761 }
2762 }
2763 buf[len] = 0;
2764 if (!IS_CHAR(cur)) {
2765 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2767 ctxt->sax->error(ctxt->userData,
2768 "Comment not terminated \n<!--%.50s\n", buf);
2769 ctxt->wellFormed = 0;
2770 ctxt->disableSAX = 1;
2771 xmlFree(buf);
2772 } else {
2773 if (input != ctxt->input) {
2774 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2776 ctxt->sax->error(ctxt->userData,
2777"Comment doesn't start and stop in the same entity\n");
2778 ctxt->wellFormed = 0;
2779 ctxt->disableSAX = 1;
2780 }
2781 NEXT;
2782 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2783 (!ctxt->disableSAX))
2784 ctxt->sax->comment(ctxt->userData, buf);
2785 xmlFree(buf);
2786 }
2787 ctxt->instate = state;
2788}
2789
2790/**
2791 * xmlParsePITarget:
2792 * @ctxt: an XML parser context
2793 *
2794 * parse the name of a PI
2795 *
2796 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2797 *
2798 * Returns the PITarget name or NULL
2799 */
2800
2801xmlChar *
2802xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2803 xmlChar *name;
2804
2805 name = xmlParseName(ctxt);
2806 if ((name != NULL) &&
2807 ((name[0] == 'x') || (name[0] == 'X')) &&
2808 ((name[1] == 'm') || (name[1] == 'M')) &&
2809 ((name[2] == 'l') || (name[2] == 'L'))) {
2810 int i;
2811 if ((name[0] == 'x') && (name[1] == 'm') &&
2812 (name[2] == 'l') && (name[3] == 0)) {
2813 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2815 ctxt->sax->error(ctxt->userData,
2816 "XML declaration allowed only at the start of the document\n");
2817 ctxt->wellFormed = 0;
2818 ctxt->disableSAX = 1;
2819 return(name);
2820 } else if (name[3] == 0) {
2821 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2823 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2824 ctxt->wellFormed = 0;
2825 ctxt->disableSAX = 1;
2826 return(name);
2827 }
2828 for (i = 0;;i++) {
2829 if (xmlW3CPIs[i] == NULL) break;
2830 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2831 return(name);
2832 }
2833 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2834 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2835 ctxt->sax->warning(ctxt->userData,
2836 "xmlParsePItarget: invalid name prefix 'xml'\n");
2837 }
2838 }
2839 return(name);
2840}
2841
2842/**
2843 * xmlParsePI:
2844 * @ctxt: an XML parser context
2845 *
2846 * parse an XML Processing Instruction.
2847 *
2848 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2849 *
2850 * The processing is transfered to SAX once parsed.
2851 */
2852
2853void
2854xmlParsePI(xmlParserCtxtPtr ctxt) {
2855 xmlChar *buf = NULL;
2856 int len = 0;
2857 int size = XML_PARSER_BUFFER_SIZE;
2858 int cur, l;
2859 xmlChar *target;
2860 xmlParserInputState state;
2861 int count = 0;
2862
2863 if ((RAW == '<') && (NXT(1) == '?')) {
2864 xmlParserInputPtr input = ctxt->input;
2865 state = ctxt->instate;
2866 ctxt->instate = XML_PARSER_PI;
2867 /*
2868 * this is a Processing Instruction.
2869 */
2870 SKIP(2);
2871 SHRINK;
2872
2873 /*
2874 * Parse the target name and check for special support like
2875 * namespace.
2876 */
2877 target = xmlParsePITarget(ctxt);
2878 if (target != NULL) {
2879 if ((RAW == '?') && (NXT(1) == '>')) {
2880 if (input != ctxt->input) {
2881 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2883 ctxt->sax->error(ctxt->userData,
2884 "PI declaration doesn't start and stop in the same entity\n");
2885 ctxt->wellFormed = 0;
2886 ctxt->disableSAX = 1;
2887 }
2888 SKIP(2);
2889
2890 /*
2891 * SAX: PI detected.
2892 */
2893 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2894 (ctxt->sax->processingInstruction != NULL))
2895 ctxt->sax->processingInstruction(ctxt->userData,
2896 target, NULL);
2897 ctxt->instate = state;
2898 xmlFree(target);
2899 return;
2900 }
2901 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2902 if (buf == NULL) {
2903 xmlGenericError(xmlGenericErrorContext,
2904 "malloc of %d byte failed\n", size);
2905 ctxt->instate = state;
2906 return;
2907 }
2908 cur = CUR;
2909 if (!IS_BLANK(cur)) {
2910 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2912 ctxt->sax->error(ctxt->userData,
2913 "xmlParsePI: PI %s space expected\n", target);
2914 ctxt->wellFormed = 0;
2915 ctxt->disableSAX = 1;
2916 }
2917 SKIP_BLANKS;
2918 cur = CUR_CHAR(l);
2919 while (IS_CHAR(cur) && /* checked */
2920 ((cur != '?') || (NXT(1) != '>'))) {
2921 if (len + 5 >= size) {
2922 size *= 2;
2923 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2924 if (buf == NULL) {
2925 xmlGenericError(xmlGenericErrorContext,
2926 "realloc of %d byte failed\n", size);
2927 ctxt->instate = state;
2928 return;
2929 }
2930 }
2931 count++;
2932 if (count > 50) {
2933 GROW;
2934 count = 0;
2935 }
2936 COPY_BUF(l,buf,len,cur);
2937 NEXTL(l);
2938 cur = CUR_CHAR(l);
2939 if (cur == 0) {
2940 SHRINK;
2941 GROW;
2942 cur = CUR_CHAR(l);
2943 }
2944 }
2945 buf[len] = 0;
2946 if (cur != '?') {
2947 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
2948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2949 ctxt->sax->error(ctxt->userData,
2950 "xmlParsePI: PI %s never end ...\n", target);
2951 ctxt->wellFormed = 0;
2952 ctxt->disableSAX = 1;
2953 } else {
2954 if (input != ctxt->input) {
2955 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2957 ctxt->sax->error(ctxt->userData,
2958 "PI declaration doesn't start and stop in the same entity\n");
2959 ctxt->wellFormed = 0;
2960 ctxt->disableSAX = 1;
2961 }
2962 SKIP(2);
2963
2964 /*
2965 * SAX: PI detected.
2966 */
2967 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2968 (ctxt->sax->processingInstruction != NULL))
2969 ctxt->sax->processingInstruction(ctxt->userData,
2970 target, buf);
2971 }
2972 xmlFree(buf);
2973 xmlFree(target);
2974 } else {
2975 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
2976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2977 ctxt->sax->error(ctxt->userData,
2978 "xmlParsePI : no target name\n");
2979 ctxt->wellFormed = 0;
2980 ctxt->disableSAX = 1;
2981 }
2982 ctxt->instate = state;
2983 }
2984}
2985
2986/**
2987 * xmlParseNotationDecl:
2988 * @ctxt: an XML parser context
2989 *
2990 * parse a notation declaration
2991 *
2992 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2993 *
2994 * Hence there is actually 3 choices:
2995 * 'PUBLIC' S PubidLiteral
2996 * 'PUBLIC' S PubidLiteral S SystemLiteral
2997 * and 'SYSTEM' S SystemLiteral
2998 *
2999 * See the NOTE on xmlParseExternalID().
3000 */
3001
3002void
3003xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3004 xmlChar *name;
3005 xmlChar *Pubid;
3006 xmlChar *Systemid;
3007
3008 if ((RAW == '<') && (NXT(1) == '!') &&
3009 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3010 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3011 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3012 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3013 xmlParserInputPtr input = ctxt->input;
3014 SHRINK;
3015 SKIP(10);
3016 if (!IS_BLANK(CUR)) {
3017 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3019 ctxt->sax->error(ctxt->userData,
3020 "Space required after '<!NOTATION'\n");
3021 ctxt->wellFormed = 0;
3022 ctxt->disableSAX = 1;
3023 return;
3024 }
3025 SKIP_BLANKS;
3026
Daniel Veillard29631a82001-03-05 09:49:20 +00003027 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003028 if (name == NULL) {
3029 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3030 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3031 ctxt->sax->error(ctxt->userData,
3032 "NOTATION: Name expected here\n");
3033 ctxt->wellFormed = 0;
3034 ctxt->disableSAX = 1;
3035 return;
3036 }
3037 if (!IS_BLANK(CUR)) {
3038 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3040 ctxt->sax->error(ctxt->userData,
3041 "Space required after the NOTATION name'\n");
3042 ctxt->wellFormed = 0;
3043 ctxt->disableSAX = 1;
3044 return;
3045 }
3046 SKIP_BLANKS;
3047
3048 /*
3049 * Parse the IDs.
3050 */
3051 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3052 SKIP_BLANKS;
3053
3054 if (RAW == '>') {
3055 if (input != ctxt->input) {
3056 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3058 ctxt->sax->error(ctxt->userData,
3059"Notation declaration doesn't start and stop in the same entity\n");
3060 ctxt->wellFormed = 0;
3061 ctxt->disableSAX = 1;
3062 }
3063 NEXT;
3064 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3065 (ctxt->sax->notationDecl != NULL))
3066 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3067 } else {
3068 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3069 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3070 ctxt->sax->error(ctxt->userData,
3071 "'>' required to close NOTATION declaration\n");
3072 ctxt->wellFormed = 0;
3073 ctxt->disableSAX = 1;
3074 }
3075 xmlFree(name);
3076 if (Systemid != NULL) xmlFree(Systemid);
3077 if (Pubid != NULL) xmlFree(Pubid);
3078 }
3079}
3080
3081/**
3082 * xmlParseEntityDecl:
3083 * @ctxt: an XML parser context
3084 *
3085 * parse <!ENTITY declarations
3086 *
3087 * [70] EntityDecl ::= GEDecl | PEDecl
3088 *
3089 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3090 *
3091 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3092 *
3093 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3094 *
3095 * [74] PEDef ::= EntityValue | ExternalID
3096 *
3097 * [76] NDataDecl ::= S 'NDATA' S Name
3098 *
3099 * [ VC: Notation Declared ]
3100 * The Name must match the declared name of a notation.
3101 */
3102
3103void
3104xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3105 xmlChar *name = NULL;
3106 xmlChar *value = NULL;
3107 xmlChar *URI = NULL, *literal = NULL;
3108 xmlChar *ndata = NULL;
3109 int isParameter = 0;
3110 xmlChar *orig = NULL;
3111
3112 GROW;
3113 if ((RAW == '<') && (NXT(1) == '!') &&
3114 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3115 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3116 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3117 xmlParserInputPtr input = ctxt->input;
3118 ctxt->instate = XML_PARSER_ENTITY_DECL;
3119 SHRINK;
3120 SKIP(8);
3121 if (!IS_BLANK(CUR)) {
3122 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3123 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3124 ctxt->sax->error(ctxt->userData,
3125 "Space required after '<!ENTITY'\n");
3126 ctxt->wellFormed = 0;
3127 ctxt->disableSAX = 1;
3128 }
3129 SKIP_BLANKS;
3130
3131 if (RAW == '%') {
3132 NEXT;
3133 if (!IS_BLANK(CUR)) {
3134 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3136 ctxt->sax->error(ctxt->userData,
3137 "Space required after '%'\n");
3138 ctxt->wellFormed = 0;
3139 ctxt->disableSAX = 1;
3140 }
3141 SKIP_BLANKS;
3142 isParameter = 1;
3143 }
3144
Daniel Veillard29631a82001-03-05 09:49:20 +00003145 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003146 if (name == NULL) {
3147 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3149 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3150 ctxt->wellFormed = 0;
3151 ctxt->disableSAX = 1;
3152 return;
3153 }
3154 if (!IS_BLANK(CUR)) {
3155 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3157 ctxt->sax->error(ctxt->userData,
3158 "Space required after the entity name\n");
3159 ctxt->wellFormed = 0;
3160 ctxt->disableSAX = 1;
3161 }
3162 SKIP_BLANKS;
3163
3164 /*
3165 * handle the various case of definitions...
3166 */
3167 if (isParameter) {
3168 if ((RAW == '"') || (RAW == '\'')) {
3169 value = xmlParseEntityValue(ctxt, &orig);
3170 if (value) {
3171 if ((ctxt->sax != NULL) &&
3172 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3173 ctxt->sax->entityDecl(ctxt->userData, name,
3174 XML_INTERNAL_PARAMETER_ENTITY,
3175 NULL, NULL, value);
3176 }
3177 } else {
3178 URI = xmlParseExternalID(ctxt, &literal, 1);
3179 if ((URI == NULL) && (literal == NULL)) {
3180 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3181 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3182 ctxt->sax->error(ctxt->userData,
3183 "Entity value required\n");
3184 ctxt->wellFormed = 0;
3185 ctxt->disableSAX = 1;
3186 }
3187 if (URI) {
3188 xmlURIPtr uri;
3189
3190 uri = xmlParseURI((const char *) URI);
3191 if (uri == NULL) {
3192 ctxt->errNo = XML_ERR_INVALID_URI;
3193 if ((ctxt->sax != NULL) &&
3194 (!ctxt->disableSAX) &&
3195 (ctxt->sax->error != NULL))
3196 ctxt->sax->error(ctxt->userData,
3197 "Invalid URI: %s\n", URI);
3198 ctxt->wellFormed = 0;
3199 } else {
3200 if (uri->fragment != NULL) {
3201 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3202 if ((ctxt->sax != NULL) &&
3203 (!ctxt->disableSAX) &&
3204 (ctxt->sax->error != NULL))
3205 ctxt->sax->error(ctxt->userData,
3206 "Fragment not allowed: %s\n", URI);
3207 ctxt->wellFormed = 0;
3208 } else {
3209 if ((ctxt->sax != NULL) &&
3210 (!ctxt->disableSAX) &&
3211 (ctxt->sax->entityDecl != NULL))
3212 ctxt->sax->entityDecl(ctxt->userData, name,
3213 XML_EXTERNAL_PARAMETER_ENTITY,
3214 literal, URI, NULL);
3215 }
3216 xmlFreeURI(uri);
3217 }
3218 }
3219 }
3220 } else {
3221 if ((RAW == '"') || (RAW == '\'')) {
3222 value = xmlParseEntityValue(ctxt, &orig);
3223 if ((ctxt->sax != NULL) &&
3224 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3225 ctxt->sax->entityDecl(ctxt->userData, name,
3226 XML_INTERNAL_GENERAL_ENTITY,
3227 NULL, NULL, value);
3228 } else {
3229 URI = xmlParseExternalID(ctxt, &literal, 1);
3230 if ((URI == NULL) && (literal == NULL)) {
3231 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3232 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3233 ctxt->sax->error(ctxt->userData,
3234 "Entity value required\n");
3235 ctxt->wellFormed = 0;
3236 ctxt->disableSAX = 1;
3237 }
3238 if (URI) {
3239 xmlURIPtr uri;
3240
3241 uri = xmlParseURI((const char *)URI);
3242 if (uri == NULL) {
3243 ctxt->errNo = XML_ERR_INVALID_URI;
3244 if ((ctxt->sax != NULL) &&
3245 (!ctxt->disableSAX) &&
3246 (ctxt->sax->error != NULL))
3247 ctxt->sax->error(ctxt->userData,
3248 "Invalid URI: %s\n", URI);
3249 ctxt->wellFormed = 0;
3250 } else {
3251 if (uri->fragment != NULL) {
3252 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3253 if ((ctxt->sax != NULL) &&
3254 (!ctxt->disableSAX) &&
3255 (ctxt->sax->error != NULL))
3256 ctxt->sax->error(ctxt->userData,
3257 "Fragment not allowed: %s\n", URI);
3258 ctxt->wellFormed = 0;
3259 }
3260 xmlFreeURI(uri);
3261 }
3262 }
3263 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3264 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3266 ctxt->sax->error(ctxt->userData,
3267 "Space required before 'NDATA'\n");
3268 ctxt->wellFormed = 0;
3269 ctxt->disableSAX = 1;
3270 }
3271 SKIP_BLANKS;
3272 if ((RAW == 'N') && (NXT(1) == 'D') &&
3273 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3274 (NXT(4) == 'A')) {
3275 SKIP(5);
3276 if (!IS_BLANK(CUR)) {
3277 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3279 ctxt->sax->error(ctxt->userData,
3280 "Space required after 'NDATA'\n");
3281 ctxt->wellFormed = 0;
3282 ctxt->disableSAX = 1;
3283 }
3284 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003285 ndata = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003286 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3287 (ctxt->sax->unparsedEntityDecl != NULL))
3288 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3289 literal, URI, ndata);
3290 } else {
3291 if ((ctxt->sax != NULL) &&
3292 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3293 ctxt->sax->entityDecl(ctxt->userData, name,
3294 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3295 literal, URI, NULL);
3296 }
3297 }
3298 }
3299 SKIP_BLANKS;
3300 if (RAW != '>') {
3301 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3302 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3303 ctxt->sax->error(ctxt->userData,
3304 "xmlParseEntityDecl: entity %s not terminated\n", name);
3305 ctxt->wellFormed = 0;
3306 ctxt->disableSAX = 1;
3307 } else {
3308 if (input != ctxt->input) {
3309 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3311 ctxt->sax->error(ctxt->userData,
3312"Entity declaration doesn't start and stop in the same entity\n");
3313 ctxt->wellFormed = 0;
3314 ctxt->disableSAX = 1;
3315 }
3316 NEXT;
3317 }
3318 if (orig != NULL) {
3319 /*
3320 * Ugly mechanism to save the raw entity value.
3321 */
3322 xmlEntityPtr cur = NULL;
3323
3324 if (isParameter) {
3325 if ((ctxt->sax != NULL) &&
3326 (ctxt->sax->getParameterEntity != NULL))
3327 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3328 } else {
3329 if ((ctxt->sax != NULL) &&
3330 (ctxt->sax->getEntity != NULL))
3331 cur = ctxt->sax->getEntity(ctxt->userData, name);
3332 }
3333 if (cur != NULL) {
3334 if (cur->orig != NULL)
3335 xmlFree(orig);
3336 else
3337 cur->orig = orig;
3338 } else
3339 xmlFree(orig);
3340 }
3341 if (name != NULL) xmlFree(name);
3342 if (value != NULL) xmlFree(value);
3343 if (URI != NULL) xmlFree(URI);
3344 if (literal != NULL) xmlFree(literal);
3345 if (ndata != NULL) xmlFree(ndata);
3346 }
3347}
3348
3349/**
3350 * xmlParseDefaultDecl:
3351 * @ctxt: an XML parser context
3352 * @value: Receive a possible fixed default value for the attribute
3353 *
3354 * Parse an attribute default declaration
3355 *
3356 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3357 *
3358 * [ VC: Required Attribute ]
3359 * if the default declaration is the keyword #REQUIRED, then the
3360 * attribute must be specified for all elements of the type in the
3361 * attribute-list declaration.
3362 *
3363 * [ VC: Attribute Default Legal ]
3364 * The declared default value must meet the lexical constraints of
3365 * the declared attribute type c.f. xmlValidateAttributeDecl()
3366 *
3367 * [ VC: Fixed Attribute Default ]
3368 * if an attribute has a default value declared with the #FIXED
3369 * keyword, instances of that attribute must match the default value.
3370 *
3371 * [ WFC: No < in Attribute Values ]
3372 * handled in xmlParseAttValue()
3373 *
3374 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3375 * or XML_ATTRIBUTE_FIXED.
3376 */
3377
3378int
3379xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3380 int val;
3381 xmlChar *ret;
3382
3383 *value = NULL;
3384 if ((RAW == '#') && (NXT(1) == 'R') &&
3385 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3386 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3387 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3388 (NXT(8) == 'D')) {
3389 SKIP(9);
3390 return(XML_ATTRIBUTE_REQUIRED);
3391 }
3392 if ((RAW == '#') && (NXT(1) == 'I') &&
3393 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3394 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3395 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3396 SKIP(8);
3397 return(XML_ATTRIBUTE_IMPLIED);
3398 }
3399 val = XML_ATTRIBUTE_NONE;
3400 if ((RAW == '#') && (NXT(1) == 'F') &&
3401 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3402 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3403 SKIP(6);
3404 val = XML_ATTRIBUTE_FIXED;
3405 if (!IS_BLANK(CUR)) {
3406 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3408 ctxt->sax->error(ctxt->userData,
3409 "Space required after '#FIXED'\n");
3410 ctxt->wellFormed = 0;
3411 ctxt->disableSAX = 1;
3412 }
3413 SKIP_BLANKS;
3414 }
3415 ret = xmlParseAttValue(ctxt);
3416 ctxt->instate = XML_PARSER_DTD;
3417 if (ret == NULL) {
3418 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3419 ctxt->sax->error(ctxt->userData,
3420 "Attribute default value declaration error\n");
3421 ctxt->wellFormed = 0;
3422 ctxt->disableSAX = 1;
3423 } else
3424 *value = ret;
3425 return(val);
3426}
3427
3428/**
3429 * xmlParseNotationType:
3430 * @ctxt: an XML parser context
3431 *
3432 * parse an Notation attribute type.
3433 *
3434 * Note: the leading 'NOTATION' S part has already being parsed...
3435 *
3436 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3437 *
3438 * [ VC: Notation Attributes ]
3439 * Values of this type must match one of the notation names included
3440 * in the declaration; all notation names in the declaration must be declared.
3441 *
3442 * Returns: the notation attribute tree built while parsing
3443 */
3444
3445xmlEnumerationPtr
3446xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3447 xmlChar *name;
3448 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3449
3450 if (RAW != '(') {
3451 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3453 ctxt->sax->error(ctxt->userData,
3454 "'(' required to start 'NOTATION'\n");
3455 ctxt->wellFormed = 0;
3456 ctxt->disableSAX = 1;
3457 return(NULL);
3458 }
3459 SHRINK;
3460 do {
3461 NEXT;
3462 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003463 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003464 if (name == NULL) {
3465 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3467 ctxt->sax->error(ctxt->userData,
3468 "Name expected in NOTATION declaration\n");
3469 ctxt->wellFormed = 0;
3470 ctxt->disableSAX = 1;
3471 return(ret);
3472 }
3473 cur = xmlCreateEnumeration(name);
3474 xmlFree(name);
3475 if (cur == NULL) return(ret);
3476 if (last == NULL) ret = last = cur;
3477 else {
3478 last->next = cur;
3479 last = cur;
3480 }
3481 SKIP_BLANKS;
3482 } while (RAW == '|');
3483 if (RAW != ')') {
3484 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3485 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3486 ctxt->sax->error(ctxt->userData,
3487 "')' required to finish NOTATION declaration\n");
3488 ctxt->wellFormed = 0;
3489 ctxt->disableSAX = 1;
3490 if ((last != NULL) && (last != ret))
3491 xmlFreeEnumeration(last);
3492 return(ret);
3493 }
3494 NEXT;
3495 return(ret);
3496}
3497
3498/**
3499 * xmlParseEnumerationType:
3500 * @ctxt: an XML parser context
3501 *
3502 * parse an Enumeration attribute type.
3503 *
3504 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3505 *
3506 * [ VC: Enumeration ]
3507 * Values of this type must match one of the Nmtoken tokens in
3508 * the declaration
3509 *
3510 * Returns: the enumeration attribute tree built while parsing
3511 */
3512
3513xmlEnumerationPtr
3514xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3515 xmlChar *name;
3516 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3517
3518 if (RAW != '(') {
3519 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3521 ctxt->sax->error(ctxt->userData,
3522 "'(' required to start ATTLIST enumeration\n");
3523 ctxt->wellFormed = 0;
3524 ctxt->disableSAX = 1;
3525 return(NULL);
3526 }
3527 SHRINK;
3528 do {
3529 NEXT;
3530 SKIP_BLANKS;
3531 name = xmlParseNmtoken(ctxt);
3532 if (name == NULL) {
3533 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3534 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3535 ctxt->sax->error(ctxt->userData,
3536 "NmToken expected in ATTLIST enumeration\n");
3537 ctxt->wellFormed = 0;
3538 ctxt->disableSAX = 1;
3539 return(ret);
3540 }
3541 cur = xmlCreateEnumeration(name);
3542 xmlFree(name);
3543 if (cur == NULL) return(ret);
3544 if (last == NULL) ret = last = cur;
3545 else {
3546 last->next = cur;
3547 last = cur;
3548 }
3549 SKIP_BLANKS;
3550 } while (RAW == '|');
3551 if (RAW != ')') {
3552 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3554 ctxt->sax->error(ctxt->userData,
3555 "')' required to finish ATTLIST enumeration\n");
3556 ctxt->wellFormed = 0;
3557 ctxt->disableSAX = 1;
3558 return(ret);
3559 }
3560 NEXT;
3561 return(ret);
3562}
3563
3564/**
3565 * xmlParseEnumeratedType:
3566 * @ctxt: an XML parser context
3567 * @tree: the enumeration tree built while parsing
3568 *
3569 * parse an Enumerated attribute type.
3570 *
3571 * [57] EnumeratedType ::= NotationType | Enumeration
3572 *
3573 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3574 *
3575 *
3576 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3577 */
3578
3579int
3580xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3581 if ((RAW == 'N') && (NXT(1) == 'O') &&
3582 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3583 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3584 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3585 SKIP(8);
3586 if (!IS_BLANK(CUR)) {
3587 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3589 ctxt->sax->error(ctxt->userData,
3590 "Space required after 'NOTATION'\n");
3591 ctxt->wellFormed = 0;
3592 ctxt->disableSAX = 1;
3593 return(0);
3594 }
3595 SKIP_BLANKS;
3596 *tree = xmlParseNotationType(ctxt);
3597 if (*tree == NULL) return(0);
3598 return(XML_ATTRIBUTE_NOTATION);
3599 }
3600 *tree = xmlParseEnumerationType(ctxt);
3601 if (*tree == NULL) return(0);
3602 return(XML_ATTRIBUTE_ENUMERATION);
3603}
3604
3605/**
3606 * xmlParseAttributeType:
3607 * @ctxt: an XML parser context
3608 * @tree: the enumeration tree built while parsing
3609 *
3610 * parse the Attribute list def for an element
3611 *
3612 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3613 *
3614 * [55] StringType ::= 'CDATA'
3615 *
3616 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3617 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3618 *
3619 * Validity constraints for attribute values syntax are checked in
3620 * xmlValidateAttributeValue()
3621 *
3622 * [ VC: ID ]
3623 * Values of type ID must match the Name production. A name must not
3624 * appear more than once in an XML document as a value of this type;
3625 * i.e., ID values must uniquely identify the elements which bear them.
3626 *
3627 * [ VC: One ID per Element Type ]
3628 * No element type may have more than one ID attribute specified.
3629 *
3630 * [ VC: ID Attribute Default ]
3631 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3632 *
3633 * [ VC: IDREF ]
3634 * Values of type IDREF must match the Name production, and values
3635 * of type IDREFS must match Names; each IDREF Name must match the value
3636 * of an ID attribute on some element in the XML document; i.e. IDREF
3637 * values must match the value of some ID attribute.
3638 *
3639 * [ VC: Entity Name ]
3640 * Values of type ENTITY must match the Name production, values
3641 * of type ENTITIES must match Names; each Entity Name must match the
3642 * name of an unparsed entity declared in the DTD.
3643 *
3644 * [ VC: Name Token ]
3645 * Values of type NMTOKEN must match the Nmtoken production; values
3646 * of type NMTOKENS must match Nmtokens.
3647 *
3648 * Returns the attribute type
3649 */
3650int
3651xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3652 SHRINK;
3653 if ((RAW == 'C') && (NXT(1) == 'D') &&
3654 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3655 (NXT(4) == 'A')) {
3656 SKIP(5);
3657 return(XML_ATTRIBUTE_CDATA);
3658 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3659 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3660 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3661 SKIP(6);
3662 return(XML_ATTRIBUTE_IDREFS);
3663 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3664 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3665 (NXT(4) == 'F')) {
3666 SKIP(5);
3667 return(XML_ATTRIBUTE_IDREF);
3668 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3669 SKIP(2);
3670 return(XML_ATTRIBUTE_ID);
3671 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3672 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3673 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3674 SKIP(6);
3675 return(XML_ATTRIBUTE_ENTITY);
3676 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3677 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3678 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3679 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3680 SKIP(8);
3681 return(XML_ATTRIBUTE_ENTITIES);
3682 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3683 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3684 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3685 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3686 SKIP(8);
3687 return(XML_ATTRIBUTE_NMTOKENS);
3688 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3689 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3690 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3691 (NXT(6) == 'N')) {
3692 SKIP(7);
3693 return(XML_ATTRIBUTE_NMTOKEN);
3694 }
3695 return(xmlParseEnumeratedType(ctxt, tree));
3696}
3697
3698/**
3699 * xmlParseAttributeListDecl:
3700 * @ctxt: an XML parser context
3701 *
3702 * : parse the Attribute list def for an element
3703 *
3704 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3705 *
3706 * [53] AttDef ::= S Name S AttType S DefaultDecl
3707 *
3708 */
3709void
3710xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3711 xmlChar *elemName;
3712 xmlChar *attrName;
3713 xmlEnumerationPtr tree;
3714
3715 if ((RAW == '<') && (NXT(1) == '!') &&
3716 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3717 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3718 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3719 (NXT(8) == 'T')) {
3720 xmlParserInputPtr input = ctxt->input;
3721
3722 SKIP(9);
3723 if (!IS_BLANK(CUR)) {
3724 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3725 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3726 ctxt->sax->error(ctxt->userData,
3727 "Space required after '<!ATTLIST'\n");
3728 ctxt->wellFormed = 0;
3729 ctxt->disableSAX = 1;
3730 }
3731 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003732 elemName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003733 if (elemName == NULL) {
3734 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3736 ctxt->sax->error(ctxt->userData,
3737 "ATTLIST: no name for Element\n");
3738 ctxt->wellFormed = 0;
3739 ctxt->disableSAX = 1;
3740 return;
3741 }
3742 SKIP_BLANKS;
3743 GROW;
3744 while (RAW != '>') {
3745 const xmlChar *check = CUR_PTR;
3746 int type;
3747 int def;
3748 xmlChar *defaultValue = NULL;
3749
3750 GROW;
3751 tree = NULL;
Daniel Veillard29631a82001-03-05 09:49:20 +00003752 attrName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003753 if (attrName == NULL) {
3754 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3756 ctxt->sax->error(ctxt->userData,
3757 "ATTLIST: no name for Attribute\n");
3758 ctxt->wellFormed = 0;
3759 ctxt->disableSAX = 1;
3760 break;
3761 }
3762 GROW;
3763 if (!IS_BLANK(CUR)) {
3764 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3766 ctxt->sax->error(ctxt->userData,
3767 "Space required after the attribute name\n");
3768 ctxt->wellFormed = 0;
3769 ctxt->disableSAX = 1;
3770 if (attrName != NULL)
3771 xmlFree(attrName);
3772 if (defaultValue != NULL)
3773 xmlFree(defaultValue);
3774 break;
3775 }
3776 SKIP_BLANKS;
3777
3778 type = xmlParseAttributeType(ctxt, &tree);
3779 if (type <= 0) {
3780 if (attrName != NULL)
3781 xmlFree(attrName);
3782 if (defaultValue != NULL)
3783 xmlFree(defaultValue);
3784 break;
3785 }
3786
3787 GROW;
3788 if (!IS_BLANK(CUR)) {
3789 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3791 ctxt->sax->error(ctxt->userData,
3792 "Space required after the attribute type\n");
3793 ctxt->wellFormed = 0;
3794 ctxt->disableSAX = 1;
3795 if (attrName != NULL)
3796 xmlFree(attrName);
3797 if (defaultValue != NULL)
3798 xmlFree(defaultValue);
3799 if (tree != NULL)
3800 xmlFreeEnumeration(tree);
3801 break;
3802 }
3803 SKIP_BLANKS;
3804
3805 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3806 if (def <= 0) {
3807 if (attrName != NULL)
3808 xmlFree(attrName);
3809 if (defaultValue != NULL)
3810 xmlFree(defaultValue);
3811 if (tree != NULL)
3812 xmlFreeEnumeration(tree);
3813 break;
3814 }
3815
3816 GROW;
3817 if (RAW != '>') {
3818 if (!IS_BLANK(CUR)) {
3819 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3821 ctxt->sax->error(ctxt->userData,
3822 "Space required after the attribute default value\n");
3823 ctxt->wellFormed = 0;
3824 ctxt->disableSAX = 1;
3825 if (attrName != NULL)
3826 xmlFree(attrName);
3827 if (defaultValue != NULL)
3828 xmlFree(defaultValue);
3829 if (tree != NULL)
3830 xmlFreeEnumeration(tree);
3831 break;
3832 }
3833 SKIP_BLANKS;
3834 }
3835 if (check == CUR_PTR) {
3836 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3838 ctxt->sax->error(ctxt->userData,
3839 "xmlParseAttributeListDecl: detected internal error\n");
3840 if (attrName != NULL)
3841 xmlFree(attrName);
3842 if (defaultValue != NULL)
3843 xmlFree(defaultValue);
3844 if (tree != NULL)
3845 xmlFreeEnumeration(tree);
3846 break;
3847 }
3848 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3849 (ctxt->sax->attributeDecl != NULL))
3850 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3851 type, def, defaultValue, tree);
3852 if (attrName != NULL)
3853 xmlFree(attrName);
3854 if (defaultValue != NULL)
3855 xmlFree(defaultValue);
3856 GROW;
3857 }
3858 if (RAW == '>') {
3859 if (input != ctxt->input) {
3860 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3862 ctxt->sax->error(ctxt->userData,
3863"Attribute list declaration doesn't start and stop in the same entity\n");
3864 ctxt->wellFormed = 0;
3865 ctxt->disableSAX = 1;
3866 }
3867 NEXT;
3868 }
3869
3870 xmlFree(elemName);
3871 }
3872}
3873
3874/**
3875 * xmlParseElementMixedContentDecl:
3876 * @ctxt: an XML parser context
3877 *
3878 * parse the declaration for a Mixed Element content
3879 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3880 *
3881 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3882 * '(' S? '#PCDATA' S? ')'
3883 *
3884 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3885 *
3886 * [ VC: No Duplicate Types ]
3887 * The same name must not appear more than once in a single
3888 * mixed-content declaration.
3889 *
3890 * returns: the list of the xmlElementContentPtr describing the element choices
3891 */
3892xmlElementContentPtr
3893xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3894 xmlElementContentPtr ret = NULL, cur = NULL, n;
3895 xmlChar *elem = NULL;
3896
3897 GROW;
3898 if ((RAW == '#') && (NXT(1) == 'P') &&
3899 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3900 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3901 (NXT(6) == 'A')) {
3902 SKIP(7);
3903 SKIP_BLANKS;
3904 SHRINK;
3905 if (RAW == ')') {
3906 ctxt->entity = ctxt->input;
3907 NEXT;
3908 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3909 if (RAW == '*') {
3910 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3911 NEXT;
3912 }
3913 return(ret);
3914 }
3915 if ((RAW == '(') || (RAW == '|')) {
3916 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3917 if (ret == NULL) return(NULL);
3918 }
3919 while (RAW == '|') {
3920 NEXT;
3921 if (elem == NULL) {
3922 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3923 if (ret == NULL) return(NULL);
3924 ret->c1 = cur;
3925 cur = ret;
3926 } else {
3927 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3928 if (n == NULL) return(NULL);
3929 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3930 cur->c2 = n;
3931 cur = n;
3932 xmlFree(elem);
3933 }
3934 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003935 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003936 if (elem == NULL) {
3937 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3939 ctxt->sax->error(ctxt->userData,
3940 "xmlParseElementMixedContentDecl : Name expected\n");
3941 ctxt->wellFormed = 0;
3942 ctxt->disableSAX = 1;
3943 xmlFreeElementContent(cur);
3944 return(NULL);
3945 }
3946 SKIP_BLANKS;
3947 GROW;
3948 }
3949 if ((RAW == ')') && (NXT(1) == '*')) {
3950 if (elem != NULL) {
3951 cur->c2 = xmlNewElementContent(elem,
3952 XML_ELEMENT_CONTENT_ELEMENT);
3953 xmlFree(elem);
3954 }
3955 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3956 ctxt->entity = ctxt->input;
3957 SKIP(2);
3958 } else {
3959 if (elem != NULL) xmlFree(elem);
3960 xmlFreeElementContent(ret);
3961 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
3962 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3963 ctxt->sax->error(ctxt->userData,
3964 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
3965 ctxt->wellFormed = 0;
3966 ctxt->disableSAX = 1;
3967 return(NULL);
3968 }
3969
3970 } else {
3971 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
3972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3973 ctxt->sax->error(ctxt->userData,
3974 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3975 ctxt->wellFormed = 0;
3976 ctxt->disableSAX = 1;
3977 }
3978 return(ret);
3979}
3980
3981/**
3982 * xmlParseElementChildrenContentDecl:
3983 * @ctxt: an XML parser context
3984 *
3985 * parse the declaration for a Mixed Element content
3986 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3987 *
3988 *
3989 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3990 *
3991 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3992 *
3993 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3994 *
3995 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3996 *
3997 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3998 * TODO Parameter-entity replacement text must be properly nested
3999 * with parenthetized groups. That is to say, if either of the
4000 * opening or closing parentheses in a choice, seq, or Mixed
4001 * construct is contained in the replacement text for a parameter
4002 * entity, both must be contained in the same replacement text. For
4003 * interoperability, if a parameter-entity reference appears in a
4004 * choice, seq, or Mixed construct, its replacement text should not
4005 * be empty, and neither the first nor last non-blank character of
4006 * the replacement text should be a connector (| or ,).
4007 *
4008 * returns: the tree of xmlElementContentPtr describing the element
4009 * hierarchy.
4010 */
4011xmlElementContentPtr
4012#ifdef VMS
4013xmlParseElementChildrenContentD
4014#else
4015xmlParseElementChildrenContentDecl
4016#endif
4017(xmlParserCtxtPtr ctxt) {
4018 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4019 xmlChar *elem;
4020 xmlChar type = 0;
4021
4022 SKIP_BLANKS;
4023 GROW;
4024 if (RAW == '(') {
4025 /* Recurse on first child */
4026 NEXT;
4027 SKIP_BLANKS;
4028 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4029 SKIP_BLANKS;
4030 GROW;
4031 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004032 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004033 if (elem == NULL) {
4034 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4036 ctxt->sax->error(ctxt->userData,
4037 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4038 ctxt->wellFormed = 0;
4039 ctxt->disableSAX = 1;
4040 return(NULL);
4041 }
4042 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4043 GROW;
4044 if (RAW == '?') {
4045 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4046 NEXT;
4047 } else if (RAW == '*') {
4048 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4049 NEXT;
4050 } else if (RAW == '+') {
4051 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4052 NEXT;
4053 } else {
4054 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4055 }
4056 xmlFree(elem);
4057 GROW;
4058 }
4059 SKIP_BLANKS;
4060 SHRINK;
4061 while (RAW != ')') {
4062 /*
4063 * Each loop we parse one separator and one element.
4064 */
4065 if (RAW == ',') {
4066 if (type == 0) type = CUR;
4067
4068 /*
4069 * Detect "Name | Name , Name" error
4070 */
4071 else if (type != CUR) {
4072 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4074 ctxt->sax->error(ctxt->userData,
4075 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4076 type);
4077 ctxt->wellFormed = 0;
4078 ctxt->disableSAX = 1;
4079 if ((op != NULL) && (op != ret))
4080 xmlFreeElementContent(op);
4081 if ((last != NULL) && (last != ret) &&
4082 (last != ret->c1) && (last != ret->c2))
4083 xmlFreeElementContent(last);
4084 if (ret != NULL)
4085 xmlFreeElementContent(ret);
4086 return(NULL);
4087 }
4088 NEXT;
4089
4090 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4091 if (op == NULL) {
4092 xmlFreeElementContent(ret);
4093 return(NULL);
4094 }
4095 if (last == NULL) {
4096 op->c1 = ret;
4097 ret = cur = op;
4098 } else {
4099 cur->c2 = op;
4100 op->c1 = last;
4101 cur =op;
4102 last = NULL;
4103 }
4104 } else if (RAW == '|') {
4105 if (type == 0) type = CUR;
4106
4107 /*
4108 * Detect "Name , Name | Name" error
4109 */
4110 else if (type != CUR) {
4111 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4112 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4113 ctxt->sax->error(ctxt->userData,
4114 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4115 type);
4116 ctxt->wellFormed = 0;
4117 ctxt->disableSAX = 1;
4118 if ((op != NULL) && (op != ret) && (op != last))
4119 xmlFreeElementContent(op);
4120 if ((last != NULL) && (last != ret) &&
4121 (last != ret->c1) && (last != ret->c2))
4122 xmlFreeElementContent(last);
4123 if (ret != NULL)
4124 xmlFreeElementContent(ret);
4125 return(NULL);
4126 }
4127 NEXT;
4128
4129 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4130 if (op == NULL) {
4131 if ((op != NULL) && (op != ret))
4132 xmlFreeElementContent(op);
4133 if ((last != NULL) && (last != ret) &&
4134 (last != ret->c1) && (last != ret->c2))
4135 xmlFreeElementContent(last);
4136 if (ret != NULL)
4137 xmlFreeElementContent(ret);
4138 return(NULL);
4139 }
4140 if (last == NULL) {
4141 op->c1 = ret;
4142 ret = cur = op;
4143 } else {
4144 cur->c2 = op;
4145 op->c1 = last;
4146 cur =op;
4147 last = NULL;
4148 }
4149 } else {
4150 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4152 ctxt->sax->error(ctxt->userData,
4153 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4154 ctxt->wellFormed = 0;
4155 ctxt->disableSAX = 1;
4156 if ((op != NULL) && (op != ret))
4157 xmlFreeElementContent(op);
4158 if ((last != NULL) && (last != ret) &&
4159 (last != ret->c1) && (last != ret->c2))
4160 xmlFreeElementContent(last);
4161 if (ret != NULL)
4162 xmlFreeElementContent(ret);
4163 return(NULL);
4164 }
4165 GROW;
4166 SKIP_BLANKS;
4167 GROW;
4168 if (RAW == '(') {
4169 /* Recurse on second child */
4170 NEXT;
4171 SKIP_BLANKS;
4172 last = xmlParseElementChildrenContentDecl(ctxt);
4173 SKIP_BLANKS;
4174 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004175 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004176 if (elem == NULL) {
4177 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4179 ctxt->sax->error(ctxt->userData,
4180 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4181 ctxt->wellFormed = 0;
4182 ctxt->disableSAX = 1;
4183 if ((op != NULL) && (op != ret))
4184 xmlFreeElementContent(op);
4185 if ((last != NULL) && (last != ret) &&
4186 (last != ret->c1) && (last != ret->c2))
4187 xmlFreeElementContent(last);
4188 if (ret != NULL)
4189 xmlFreeElementContent(ret);
4190 return(NULL);
4191 }
4192 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4193 xmlFree(elem);
4194 if (RAW == '?') {
4195 last->ocur = XML_ELEMENT_CONTENT_OPT;
4196 NEXT;
4197 } else if (RAW == '*') {
4198 last->ocur = XML_ELEMENT_CONTENT_MULT;
4199 NEXT;
4200 } else if (RAW == '+') {
4201 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4202 NEXT;
4203 } else {
4204 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4205 }
4206 }
4207 SKIP_BLANKS;
4208 GROW;
4209 }
4210 if ((cur != NULL) && (last != NULL)) {
4211 cur->c2 = last;
4212 }
4213 ctxt->entity = ctxt->input;
4214 NEXT;
4215 if (RAW == '?') {
4216 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4217 NEXT;
4218 } else if (RAW == '*') {
4219 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4220 NEXT;
4221 } else if (RAW == '+') {
4222 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4223 NEXT;
4224 }
4225 return(ret);
4226}
4227
4228/**
4229 * xmlParseElementContentDecl:
4230 * @ctxt: an XML parser context
4231 * @name: the name of the element being defined.
4232 * @result: the Element Content pointer will be stored here if any
4233 *
4234 * parse the declaration for an Element content either Mixed or Children,
4235 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4236 *
4237 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4238 *
4239 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4240 */
4241
4242int
4243xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4244 xmlElementContentPtr *result) {
4245
4246 xmlElementContentPtr tree = NULL;
4247 xmlParserInputPtr input = ctxt->input;
4248 int res;
4249
4250 *result = NULL;
4251
4252 if (RAW != '(') {
4253 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4254 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4255 ctxt->sax->error(ctxt->userData,
4256 "xmlParseElementContentDecl : '(' expected\n");
4257 ctxt->wellFormed = 0;
4258 ctxt->disableSAX = 1;
4259 return(-1);
4260 }
4261 NEXT;
4262 GROW;
4263 SKIP_BLANKS;
4264 if ((RAW == '#') && (NXT(1) == 'P') &&
4265 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4266 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4267 (NXT(6) == 'A')) {
4268 tree = xmlParseElementMixedContentDecl(ctxt);
4269 res = XML_ELEMENT_TYPE_MIXED;
4270 } else {
4271 tree = xmlParseElementChildrenContentDecl(ctxt);
4272 res = XML_ELEMENT_TYPE_ELEMENT;
4273 }
4274 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4275 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4277 ctxt->sax->error(ctxt->userData,
4278"Element content declaration doesn't start and stop in the same entity\n");
4279 ctxt->wellFormed = 0;
4280 ctxt->disableSAX = 1;
4281 }
4282 SKIP_BLANKS;
4283 *result = tree;
4284 return(res);
4285}
4286
4287/**
4288 * xmlParseElementDecl:
4289 * @ctxt: an XML parser context
4290 *
4291 * parse an Element declaration.
4292 *
4293 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4294 *
4295 * [ VC: Unique Element Type Declaration ]
4296 * No element type may be declared more than once
4297 *
4298 * Returns the type of the element, or -1 in case of error
4299 */
4300int
4301xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4302 xmlChar *name;
4303 int ret = -1;
4304 xmlElementContentPtr content = NULL;
4305
4306 GROW;
4307 if ((RAW == '<') && (NXT(1) == '!') &&
4308 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4309 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4310 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4311 (NXT(8) == 'T')) {
4312 xmlParserInputPtr input = ctxt->input;
4313
4314 SKIP(9);
4315 if (!IS_BLANK(CUR)) {
4316 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4317 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4318 ctxt->sax->error(ctxt->userData,
4319 "Space required after 'ELEMENT'\n");
4320 ctxt->wellFormed = 0;
4321 ctxt->disableSAX = 1;
4322 }
4323 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00004324 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004325 if (name == NULL) {
4326 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4328 ctxt->sax->error(ctxt->userData,
4329 "xmlParseElementDecl: no name for Element\n");
4330 ctxt->wellFormed = 0;
4331 ctxt->disableSAX = 1;
4332 return(-1);
4333 }
4334 while ((RAW == 0) && (ctxt->inputNr > 1))
4335 xmlPopInput(ctxt);
4336 if (!IS_BLANK(CUR)) {
4337 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4339 ctxt->sax->error(ctxt->userData,
4340 "Space required after the element name\n");
4341 ctxt->wellFormed = 0;
4342 ctxt->disableSAX = 1;
4343 }
4344 SKIP_BLANKS;
4345 if ((RAW == 'E') && (NXT(1) == 'M') &&
4346 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4347 (NXT(4) == 'Y')) {
4348 SKIP(5);
4349 /*
4350 * Element must always be empty.
4351 */
4352 ret = XML_ELEMENT_TYPE_EMPTY;
4353 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4354 (NXT(2) == 'Y')) {
4355 SKIP(3);
4356 /*
4357 * Element is a generic container.
4358 */
4359 ret = XML_ELEMENT_TYPE_ANY;
4360 } else if (RAW == '(') {
4361 ret = xmlParseElementContentDecl(ctxt, name, &content);
4362 } else {
4363 /*
4364 * [ WFC: PEs in Internal Subset ] error handling.
4365 */
4366 if ((RAW == '%') && (ctxt->external == 0) &&
4367 (ctxt->inputNr == 1)) {
4368 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4370 ctxt->sax->error(ctxt->userData,
4371 "PEReference: forbidden within markup decl in internal subset\n");
4372 } else {
4373 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4375 ctxt->sax->error(ctxt->userData,
4376 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4377 }
4378 ctxt->wellFormed = 0;
4379 ctxt->disableSAX = 1;
4380 if (name != NULL) xmlFree(name);
4381 return(-1);
4382 }
4383
4384 SKIP_BLANKS;
4385 /*
4386 * Pop-up of finished entities.
4387 */
4388 while ((RAW == 0) && (ctxt->inputNr > 1))
4389 xmlPopInput(ctxt);
4390 SKIP_BLANKS;
4391
4392 if (RAW != '>') {
4393 ctxt->errNo = XML_ERR_GT_REQUIRED;
4394 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4395 ctxt->sax->error(ctxt->userData,
4396 "xmlParseElementDecl: expected '>' at the end\n");
4397 ctxt->wellFormed = 0;
4398 ctxt->disableSAX = 1;
4399 } else {
4400 if (input != ctxt->input) {
4401 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4403 ctxt->sax->error(ctxt->userData,
4404"Element declaration doesn't start and stop in the same entity\n");
4405 ctxt->wellFormed = 0;
4406 ctxt->disableSAX = 1;
4407 }
4408
4409 NEXT;
4410 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4411 (ctxt->sax->elementDecl != NULL))
4412 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4413 content);
4414 }
4415 if (content != NULL) {
4416 xmlFreeElementContent(content);
4417 }
4418 if (name != NULL) {
4419 xmlFree(name);
4420 }
4421 }
4422 return(ret);
4423}
4424
4425/**
4426 * xmlParseMarkupDecl:
4427 * @ctxt: an XML parser context
4428 *
4429 * parse Markup declarations
4430 *
4431 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4432 * NotationDecl | PI | Comment
4433 *
4434 * [ VC: Proper Declaration/PE Nesting ]
4435 * Parameter-entity replacement text must be properly nested with
4436 * markup declarations. That is to say, if either the first character
4437 * or the last character of a markup declaration (markupdecl above) is
4438 * contained in the replacement text for a parameter-entity reference,
4439 * both must be contained in the same replacement text.
4440 *
4441 * [ WFC: PEs in Internal Subset ]
4442 * In the internal DTD subset, parameter-entity references can occur
4443 * only where markup declarations can occur, not within markup declarations.
4444 * (This does not apply to references that occur in external parameter
4445 * entities or to the external subset.)
4446 */
4447void
4448xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4449 GROW;
4450 xmlParseElementDecl(ctxt);
4451 xmlParseAttributeListDecl(ctxt);
4452 xmlParseEntityDecl(ctxt);
4453 xmlParseNotationDecl(ctxt);
4454 xmlParsePI(ctxt);
4455 xmlParseComment(ctxt);
4456 /*
4457 * This is only for internal subset. On external entities,
4458 * the replacement is done before parsing stage
4459 */
4460 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4461 xmlParsePEReference(ctxt);
4462 ctxt->instate = XML_PARSER_DTD;
4463}
4464
4465/**
4466 * xmlParseTextDecl:
4467 * @ctxt: an XML parser context
4468 *
4469 * parse an XML declaration header for external entities
4470 *
4471 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4472 *
4473 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4474 */
4475
4476void
4477xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4478 xmlChar *version;
4479
4480 /*
4481 * We know that '<?xml' is here.
4482 */
4483 if ((RAW == '<') && (NXT(1) == '?') &&
4484 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4485 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4486 SKIP(5);
4487 } else {
4488 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4490 ctxt->sax->error(ctxt->userData,
4491 "Text declaration '<?xml' required\n");
4492 ctxt->wellFormed = 0;
4493 ctxt->disableSAX = 1;
4494
4495 return;
4496 }
4497
4498 if (!IS_BLANK(CUR)) {
4499 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4501 ctxt->sax->error(ctxt->userData,
4502 "Space needed after '<?xml'\n");
4503 ctxt->wellFormed = 0;
4504 ctxt->disableSAX = 1;
4505 }
4506 SKIP_BLANKS;
4507
4508 /*
4509 * We may have the VersionInfo here.
4510 */
4511 version = xmlParseVersionInfo(ctxt);
4512 if (version == NULL)
4513 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4514 ctxt->input->version = version;
4515
4516 /*
4517 * We must have the encoding declaration
4518 */
4519 if (!IS_BLANK(CUR)) {
4520 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4522 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4523 ctxt->wellFormed = 0;
4524 ctxt->disableSAX = 1;
4525 }
4526 xmlParseEncodingDecl(ctxt);
4527 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4528 /*
4529 * The XML REC instructs us to stop parsing right here
4530 */
4531 return;
4532 }
4533
4534 SKIP_BLANKS;
4535 if ((RAW == '?') && (NXT(1) == '>')) {
4536 SKIP(2);
4537 } else if (RAW == '>') {
4538 /* Deprecated old WD ... */
4539 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4540 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4541 ctxt->sax->error(ctxt->userData,
4542 "XML declaration must end-up with '?>'\n");
4543 ctxt->wellFormed = 0;
4544 ctxt->disableSAX = 1;
4545 NEXT;
4546 } else {
4547 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4549 ctxt->sax->error(ctxt->userData,
4550 "parsing XML declaration: '?>' expected\n");
4551 ctxt->wellFormed = 0;
4552 ctxt->disableSAX = 1;
4553 MOVETO_ENDTAG(CUR_PTR);
4554 NEXT;
4555 }
4556}
4557
4558/*
4559 * xmlParseConditionalSections
4560 * @ctxt: an XML parser context
4561 *
4562 * [61] conditionalSect ::= includeSect | ignoreSect
4563 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4564 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4565 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4566 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4567 */
4568
4569void
4570xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4571 SKIP(3);
4572 SKIP_BLANKS;
4573 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4574 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4575 (NXT(6) == 'E')) {
4576 SKIP(7);
4577 SKIP_BLANKS;
4578 if (RAW != '[') {
4579 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4581 ctxt->sax->error(ctxt->userData,
4582 "XML conditional section '[' expected\n");
4583 ctxt->wellFormed = 0;
4584 ctxt->disableSAX = 1;
4585 } else {
4586 NEXT;
4587 }
4588 if (xmlParserDebugEntities) {
4589 if ((ctxt->input != NULL) && (ctxt->input->filename))
4590 xmlGenericError(xmlGenericErrorContext,
4591 "%s(%d): ", ctxt->input->filename,
4592 ctxt->input->line);
4593 xmlGenericError(xmlGenericErrorContext,
4594 "Entering INCLUDE Conditional Section\n");
4595 }
4596
4597 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4598 (NXT(2) != '>'))) {
4599 const xmlChar *check = CUR_PTR;
4600 int cons = ctxt->input->consumed;
4601 int tok = ctxt->token;
4602
4603 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4604 xmlParseConditionalSections(ctxt);
4605 } else if (IS_BLANK(CUR)) {
4606 NEXT;
4607 } else if (RAW == '%') {
4608 xmlParsePEReference(ctxt);
4609 } else
4610 xmlParseMarkupDecl(ctxt);
4611
4612 /*
4613 * Pop-up of finished entities.
4614 */
4615 while ((RAW == 0) && (ctxt->inputNr > 1))
4616 xmlPopInput(ctxt);
4617
4618 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4619 (tok == ctxt->token)) {
4620 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4622 ctxt->sax->error(ctxt->userData,
4623 "Content error in the external subset\n");
4624 ctxt->wellFormed = 0;
4625 ctxt->disableSAX = 1;
4626 break;
4627 }
4628 }
4629 if (xmlParserDebugEntities) {
4630 if ((ctxt->input != NULL) && (ctxt->input->filename))
4631 xmlGenericError(xmlGenericErrorContext,
4632 "%s(%d): ", ctxt->input->filename,
4633 ctxt->input->line);
4634 xmlGenericError(xmlGenericErrorContext,
4635 "Leaving INCLUDE Conditional Section\n");
4636 }
4637
4638 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4639 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4640 int state;
4641 int instate;
4642 int depth = 0;
4643
4644 SKIP(6);
4645 SKIP_BLANKS;
4646 if (RAW != '[') {
4647 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4649 ctxt->sax->error(ctxt->userData,
4650 "XML conditional section '[' expected\n");
4651 ctxt->wellFormed = 0;
4652 ctxt->disableSAX = 1;
4653 } else {
4654 NEXT;
4655 }
4656 if (xmlParserDebugEntities) {
4657 if ((ctxt->input != NULL) && (ctxt->input->filename))
4658 xmlGenericError(xmlGenericErrorContext,
4659 "%s(%d): ", ctxt->input->filename,
4660 ctxt->input->line);
4661 xmlGenericError(xmlGenericErrorContext,
4662 "Entering IGNORE Conditional Section\n");
4663 }
4664
4665 /*
4666 * Parse up to the end of the conditionnal section
4667 * But disable SAX event generating DTD building in the meantime
4668 */
4669 state = ctxt->disableSAX;
4670 instate = ctxt->instate;
4671 ctxt->disableSAX = 1;
4672 ctxt->instate = XML_PARSER_IGNORE;
4673
4674 while (depth >= 0) {
4675 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4676 depth++;
4677 SKIP(3);
4678 continue;
4679 }
4680 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4681 if (--depth >= 0) SKIP(3);
4682 continue;
4683 }
4684 NEXT;
4685 continue;
4686 }
4687
4688 ctxt->disableSAX = state;
4689 ctxt->instate = instate;
4690
4691 if (xmlParserDebugEntities) {
4692 if ((ctxt->input != NULL) && (ctxt->input->filename))
4693 xmlGenericError(xmlGenericErrorContext,
4694 "%s(%d): ", ctxt->input->filename,
4695 ctxt->input->line);
4696 xmlGenericError(xmlGenericErrorContext,
4697 "Leaving IGNORE Conditional Section\n");
4698 }
4699
4700 } else {
4701 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4703 ctxt->sax->error(ctxt->userData,
4704 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4705 ctxt->wellFormed = 0;
4706 ctxt->disableSAX = 1;
4707 }
4708
4709 if (RAW == 0)
4710 SHRINK;
4711
4712 if (RAW == 0) {
4713 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4715 ctxt->sax->error(ctxt->userData,
4716 "XML conditional section not closed\n");
4717 ctxt->wellFormed = 0;
4718 ctxt->disableSAX = 1;
4719 } else {
4720 SKIP(3);
4721 }
4722}
4723
4724/**
4725 * xmlParseExternalSubset:
4726 * @ctxt: an XML parser context
4727 * @ExternalID: the external identifier
4728 * @SystemID: the system identifier (or URL)
4729 *
4730 * parse Markup declarations from an external subset
4731 *
4732 * [30] extSubset ::= textDecl? extSubsetDecl
4733 *
4734 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4735 */
4736void
4737xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4738 const xmlChar *SystemID) {
4739 GROW;
4740 if ((RAW == '<') && (NXT(1) == '?') &&
4741 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4742 (NXT(4) == 'l')) {
4743 xmlParseTextDecl(ctxt);
4744 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4745 /*
4746 * The XML REC instructs us to stop parsing right here
4747 */
4748 ctxt->instate = XML_PARSER_EOF;
4749 return;
4750 }
4751 }
4752 if (ctxt->myDoc == NULL) {
4753 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4754 }
4755 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4756 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4757
4758 ctxt->instate = XML_PARSER_DTD;
4759 ctxt->external = 1;
4760 while (((RAW == '<') && (NXT(1) == '?')) ||
4761 ((RAW == '<') && (NXT(1) == '!')) ||
4762 IS_BLANK(CUR)) {
4763 const xmlChar *check = CUR_PTR;
4764 int cons = ctxt->input->consumed;
4765 int tok = ctxt->token;
4766
4767 GROW;
4768 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4769 xmlParseConditionalSections(ctxt);
4770 } else if (IS_BLANK(CUR)) {
4771 NEXT;
4772 } else if (RAW == '%') {
4773 xmlParsePEReference(ctxt);
4774 } else
4775 xmlParseMarkupDecl(ctxt);
4776
4777 /*
4778 * Pop-up of finished entities.
4779 */
4780 while ((RAW == 0) && (ctxt->inputNr > 1))
4781 xmlPopInput(ctxt);
4782
4783 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4784 (tok == ctxt->token)) {
4785 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4786 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4787 ctxt->sax->error(ctxt->userData,
4788 "Content error in the external subset\n");
4789 ctxt->wellFormed = 0;
4790 ctxt->disableSAX = 1;
4791 break;
4792 }
4793 }
4794
4795 if (RAW != 0) {
4796 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4798 ctxt->sax->error(ctxt->userData,
4799 "Extra content at the end of the document\n");
4800 ctxt->wellFormed = 0;
4801 ctxt->disableSAX = 1;
4802 }
4803
4804}
4805
4806/**
4807 * xmlParseReference:
4808 * @ctxt: an XML parser context
4809 *
4810 * parse and handle entity references in content, depending on the SAX
4811 * interface, this may end-up in a call to character() if this is a
4812 * CharRef, a predefined entity, if there is no reference() callback.
4813 * or if the parser was asked to switch to that mode.
4814 *
4815 * [67] Reference ::= EntityRef | CharRef
4816 */
4817void
4818xmlParseReference(xmlParserCtxtPtr ctxt) {
4819 xmlEntityPtr ent;
4820 xmlChar *val;
4821 if (RAW != '&') return;
4822
4823 if (NXT(1) == '#') {
4824 int i = 0;
4825 xmlChar out[10];
4826 int hex = NXT(2);
4827 int val = xmlParseCharRef(ctxt);
4828
4829 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4830 /*
4831 * So we are using non-UTF-8 buffers
4832 * Check that the char fit on 8bits, if not
4833 * generate a CharRef.
4834 */
4835 if (val <= 0xFF) {
4836 out[0] = val;
4837 out[1] = 0;
4838 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4839 (!ctxt->disableSAX))
4840 ctxt->sax->characters(ctxt->userData, out, 1);
4841 } else {
4842 if ((hex == 'x') || (hex == 'X'))
4843 sprintf((char *)out, "#x%X", val);
4844 else
4845 sprintf((char *)out, "#%d", val);
4846 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4847 (!ctxt->disableSAX))
4848 ctxt->sax->reference(ctxt->userData, out);
4849 }
4850 } else {
4851 /*
4852 * Just encode the value in UTF-8
4853 */
4854 COPY_BUF(0 ,out, i, val);
4855 out[i] = 0;
4856 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4857 (!ctxt->disableSAX))
4858 ctxt->sax->characters(ctxt->userData, out, i);
4859 }
4860 } else {
4861 ent = xmlParseEntityRef(ctxt);
4862 if (ent == NULL) return;
4863 if ((ent->name != NULL) &&
4864 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4865 xmlNodePtr list = NULL;
4866 int ret;
4867
4868
4869 /*
4870 * The first reference to the entity trigger a parsing phase
4871 * where the ent->children is filled with the result from
4872 * the parsing.
4873 */
4874 if (ent->children == NULL) {
4875 xmlChar *value;
4876 value = ent->content;
4877
4878 /*
4879 * Check that this entity is well formed
4880 */
4881 if ((value != NULL) &&
4882 (value[1] == 0) && (value[0] == '<') &&
4883 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4884 /*
4885 * DONE: get definite answer on this !!!
4886 * Lots of entity decls are used to declare a single
4887 * char
4888 * <!ENTITY lt "<">
4889 * Which seems to be valid since
4890 * 2.4: The ampersand character (&) and the left angle
4891 * bracket (<) may appear in their literal form only
4892 * when used ... They are also legal within the literal
4893 * entity value of an internal entity declaration;i
4894 * see "4.3.2 Well-Formed Parsed Entities".
4895 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4896 * Looking at the OASIS test suite and James Clark
4897 * tests, this is broken. However the XML REC uses
4898 * it. Is the XML REC not well-formed ????
4899 * This is a hack to avoid this problem
4900 *
4901 * ANSWER: since lt gt amp .. are already defined,
4902 * this is a redefinition and hence the fact that the
4903 * contentis not well balanced is not a Wf error, this
4904 * is lousy but acceptable.
4905 */
4906 list = xmlNewDocText(ctxt->myDoc, value);
4907 if (list != NULL) {
4908 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4909 (ent->children == NULL)) {
4910 ent->children = list;
4911 ent->last = list;
4912 list->parent = (xmlNodePtr) ent;
4913 } else {
4914 xmlFreeNodeList(list);
4915 }
4916 } else if (list != NULL) {
4917 xmlFreeNodeList(list);
4918 }
4919 } else {
4920 /*
4921 * 4.3.2: An internal general parsed entity is well-formed
4922 * if its replacement text matches the production labeled
4923 * content.
4924 */
4925 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4926 ctxt->depth++;
4927 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
4928 ctxt->sax, NULL, ctxt->depth,
4929 value, &list);
4930 ctxt->depth--;
4931 } else if (ent->etype ==
4932 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4933 ctxt->depth++;
4934 ret = xmlParseExternalEntity(ctxt->myDoc,
4935 ctxt->sax, NULL, ctxt->depth,
4936 ent->URI, ent->ExternalID, &list);
4937 ctxt->depth--;
4938 } else {
4939 ret = -1;
4940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4941 ctxt->sax->error(ctxt->userData,
4942 "Internal: invalid entity type\n");
4943 }
4944 if (ret == XML_ERR_ENTITY_LOOP) {
4945 ctxt->errNo = XML_ERR_ENTITY_LOOP;
4946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4947 ctxt->sax->error(ctxt->userData,
4948 "Detected entity reference loop\n");
4949 ctxt->wellFormed = 0;
4950 ctxt->disableSAX = 1;
4951 } else if ((ret == 0) && (list != NULL)) {
4952 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4953 (ent->children == NULL)) {
4954 ent->children = list;
4955 while (list != NULL) {
4956 list->parent = (xmlNodePtr) ent;
4957 if (list->next == NULL)
4958 ent->last = list;
4959 list = list->next;
4960 }
4961 } else {
4962 xmlFreeNodeList(list);
4963 }
4964 } else if (ret > 0) {
4965 ctxt->errNo = ret;
4966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4967 ctxt->sax->error(ctxt->userData,
4968 "Entity value required\n");
4969 ctxt->wellFormed = 0;
4970 ctxt->disableSAX = 1;
4971 } else if (list != NULL) {
4972 xmlFreeNodeList(list);
4973 }
4974 }
4975 }
4976 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4977 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
4978 /*
4979 * Create a node.
4980 */
4981 ctxt->sax->reference(ctxt->userData, ent->name);
4982 return;
4983 } else if (ctxt->replaceEntities) {
4984 if ((ctxt->node != NULL) && (ent->children != NULL)) {
4985 /*
4986 * Seems we are generating the DOM content, do
4987 * a simple tree copy
4988 */
4989 xmlNodePtr new;
4990 new = xmlCopyNodeList(ent->children);
4991
4992 xmlAddChildList(ctxt->node, new);
4993 /*
4994 * This is to avoid a nasty side effect, see
4995 * characters() in SAX.c
4996 */
4997 ctxt->nodemem = 0;
4998 ctxt->nodelen = 0;
4999 return;
5000 } else {
5001 /*
5002 * Probably running in SAX mode
5003 */
5004 xmlParserInputPtr input;
5005
5006 input = xmlNewEntityInputStream(ctxt, ent);
5007 xmlPushInput(ctxt, input);
5008 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5009 (RAW == '<') && (NXT(1) == '?') &&
5010 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5011 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5012 xmlParseTextDecl(ctxt);
5013 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5014 /*
5015 * The XML REC instructs us to stop parsing right here
5016 */
5017 ctxt->instate = XML_PARSER_EOF;
5018 return;
5019 }
5020 if (input->standalone == 1) {
5021 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5023 ctxt->sax->error(ctxt->userData,
5024 "external parsed entities cannot be standalone\n");
5025 ctxt->wellFormed = 0;
5026 ctxt->disableSAX = 1;
5027 }
5028 }
5029 return;
5030 }
5031 }
5032 } else {
5033 val = ent->content;
5034 if (val == NULL) return;
5035 /*
5036 * inline the entity.
5037 */
5038 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5039 (!ctxt->disableSAX))
5040 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5041 }
5042 }
5043}
5044
5045/**
5046 * xmlParseEntityRef:
5047 * @ctxt: an XML parser context
5048 *
5049 * parse ENTITY references declarations
5050 *
5051 * [68] EntityRef ::= '&' Name ';'
5052 *
5053 * [ WFC: Entity Declared ]
5054 * In a document without any DTD, a document with only an internal DTD
5055 * subset which contains no parameter entity references, or a document
5056 * with "standalone='yes'", the Name given in the entity reference
5057 * must match that in an entity declaration, except that well-formed
5058 * documents need not declare any of the following entities: amp, lt,
5059 * gt, apos, quot. The declaration of a parameter entity must precede
5060 * any reference to it. Similarly, the declaration of a general entity
5061 * must precede any reference to it which appears in a default value in an
5062 * attribute-list declaration. Note that if entities are declared in the
5063 * external subset or in external parameter entities, a non-validating
5064 * processor is not obligated to read and process their declarations;
5065 * for such documents, the rule that an entity must be declared is a
5066 * well-formedness constraint only if standalone='yes'.
5067 *
5068 * [ WFC: Parsed Entity ]
5069 * An entity reference must not contain the name of an unparsed entity
5070 *
5071 * Returns the xmlEntityPtr if found, or NULL otherwise.
5072 */
5073xmlEntityPtr
5074xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5075 xmlChar *name;
5076 xmlEntityPtr ent = NULL;
5077
5078 GROW;
5079
5080 if (RAW == '&') {
5081 NEXT;
5082 name = xmlParseName(ctxt);
5083 if (name == NULL) {
5084 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5086 ctxt->sax->error(ctxt->userData,
5087 "xmlParseEntityRef: no name\n");
5088 ctxt->wellFormed = 0;
5089 ctxt->disableSAX = 1;
5090 } else {
5091 if (RAW == ';') {
5092 NEXT;
5093 /*
5094 * Ask first SAX for entity resolution, otherwise try the
5095 * predefined set.
5096 */
5097 if (ctxt->sax != NULL) {
5098 if (ctxt->sax->getEntity != NULL)
5099 ent = ctxt->sax->getEntity(ctxt->userData, name);
5100 if (ent == NULL)
5101 ent = xmlGetPredefinedEntity(name);
5102 }
5103 /*
5104 * [ WFC: Entity Declared ]
5105 * In a document without any DTD, a document with only an
5106 * internal DTD subset which contains no parameter entity
5107 * references, or a document with "standalone='yes'", the
5108 * Name given in the entity reference must match that in an
5109 * entity declaration, except that well-formed documents
5110 * need not declare any of the following entities: amp, lt,
5111 * gt, apos, quot.
5112 * The declaration of a parameter entity must precede any
5113 * reference to it.
5114 * Similarly, the declaration of a general entity must
5115 * precede any reference to it which appears in a default
5116 * value in an attribute-list declaration. Note that if
5117 * entities are declared in the external subset or in
5118 * external parameter entities, a non-validating processor
5119 * is not obligated to read and process their declarations;
5120 * for such documents, the rule that an entity must be
5121 * declared is a well-formedness constraint only if
5122 * standalone='yes'.
5123 */
5124 if (ent == NULL) {
5125 if ((ctxt->standalone == 1) ||
5126 ((ctxt->hasExternalSubset == 0) &&
5127 (ctxt->hasPErefs == 0))) {
5128 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5130 ctxt->sax->error(ctxt->userData,
5131 "Entity '%s' not defined\n", name);
5132 ctxt->wellFormed = 0;
5133 ctxt->disableSAX = 1;
5134 } else {
5135 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5136 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5137 ctxt->sax->warning(ctxt->userData,
5138 "Entity '%s' not defined\n", name);
5139 }
5140 }
5141
5142 /*
5143 * [ WFC: Parsed Entity ]
5144 * An entity reference must not contain the name of an
5145 * unparsed entity
5146 */
5147 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5148 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5149 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5150 ctxt->sax->error(ctxt->userData,
5151 "Entity reference to unparsed entity %s\n", name);
5152 ctxt->wellFormed = 0;
5153 ctxt->disableSAX = 1;
5154 }
5155
5156 /*
5157 * [ WFC: No External Entity References ]
5158 * Attribute values cannot contain direct or indirect
5159 * entity references to external entities.
5160 */
5161 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5162 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5163 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5164 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5165 ctxt->sax->error(ctxt->userData,
5166 "Attribute references external entity '%s'\n", name);
5167 ctxt->wellFormed = 0;
5168 ctxt->disableSAX = 1;
5169 }
5170 /*
5171 * [ WFC: No < in Attribute Values ]
5172 * The replacement text of any entity referred to directly or
5173 * indirectly in an attribute value (other than "&lt;") must
5174 * not contain a <.
5175 */
5176 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5177 (ent != NULL) &&
5178 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5179 (ent->content != NULL) &&
5180 (xmlStrchr(ent->content, '<'))) {
5181 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5183 ctxt->sax->error(ctxt->userData,
5184 "'<' in entity '%s' is not allowed in attributes values\n", name);
5185 ctxt->wellFormed = 0;
5186 ctxt->disableSAX = 1;
5187 }
5188
5189 /*
5190 * Internal check, no parameter entities here ...
5191 */
5192 else {
5193 switch (ent->etype) {
5194 case XML_INTERNAL_PARAMETER_ENTITY:
5195 case XML_EXTERNAL_PARAMETER_ENTITY:
5196 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5198 ctxt->sax->error(ctxt->userData,
5199 "Attempt to reference the parameter entity '%s'\n", name);
5200 ctxt->wellFormed = 0;
5201 ctxt->disableSAX = 1;
5202 break;
5203 default:
5204 break;
5205 }
5206 }
5207
5208 /*
5209 * [ WFC: No Recursion ]
5210 * A parsed entity must not contain a recursive reference
5211 * to itself, either directly or indirectly.
5212 * Done somewhere else
5213 */
5214
5215 } else {
5216 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5217 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5218 ctxt->sax->error(ctxt->userData,
5219 "xmlParseEntityRef: expecting ';'\n");
5220 ctxt->wellFormed = 0;
5221 ctxt->disableSAX = 1;
5222 }
5223 xmlFree(name);
5224 }
5225 }
5226 return(ent);
5227}
5228
5229/**
5230 * xmlParseStringEntityRef:
5231 * @ctxt: an XML parser context
5232 * @str: a pointer to an index in the string
5233 *
5234 * parse ENTITY references declarations, but this version parses it from
5235 * a string value.
5236 *
5237 * [68] EntityRef ::= '&' Name ';'
5238 *
5239 * [ WFC: Entity Declared ]
5240 * In a document without any DTD, a document with only an internal DTD
5241 * subset which contains no parameter entity references, or a document
5242 * with "standalone='yes'", the Name given in the entity reference
5243 * must match that in an entity declaration, except that well-formed
5244 * documents need not declare any of the following entities: amp, lt,
5245 * gt, apos, quot. The declaration of a parameter entity must precede
5246 * any reference to it. Similarly, the declaration of a general entity
5247 * must precede any reference to it which appears in a default value in an
5248 * attribute-list declaration. Note that if entities are declared in the
5249 * external subset or in external parameter entities, a non-validating
5250 * processor is not obligated to read and process their declarations;
5251 * for such documents, the rule that an entity must be declared is a
5252 * well-formedness constraint only if standalone='yes'.
5253 *
5254 * [ WFC: Parsed Entity ]
5255 * An entity reference must not contain the name of an unparsed entity
5256 *
5257 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5258 * is updated to the current location in the string.
5259 */
5260xmlEntityPtr
5261xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5262 xmlChar *name;
5263 const xmlChar *ptr;
5264 xmlChar cur;
5265 xmlEntityPtr ent = NULL;
5266
5267 if ((str == NULL) || (*str == NULL))
5268 return(NULL);
5269 ptr = *str;
5270 cur = *ptr;
5271 if (cur == '&') {
5272 ptr++;
5273 cur = *ptr;
5274 name = xmlParseStringName(ctxt, &ptr);
5275 if (name == NULL) {
5276 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5277 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5278 ctxt->sax->error(ctxt->userData,
5279 "xmlParseEntityRef: no name\n");
5280 ctxt->wellFormed = 0;
5281 ctxt->disableSAX = 1;
5282 } else {
5283 if (*ptr == ';') {
5284 ptr++;
5285 /*
5286 * Ask first SAX for entity resolution, otherwise try the
5287 * predefined set.
5288 */
5289 if (ctxt->sax != NULL) {
5290 if (ctxt->sax->getEntity != NULL)
5291 ent = ctxt->sax->getEntity(ctxt->userData, name);
5292 if (ent == NULL)
5293 ent = xmlGetPredefinedEntity(name);
5294 }
5295 /*
5296 * [ WFC: Entity Declared ]
5297 * In a document without any DTD, a document with only an
5298 * internal DTD subset which contains no parameter entity
5299 * references, or a document with "standalone='yes'", the
5300 * Name given in the entity reference must match that in an
5301 * entity declaration, except that well-formed documents
5302 * need not declare any of the following entities: amp, lt,
5303 * gt, apos, quot.
5304 * The declaration of a parameter entity must precede any
5305 * reference to it.
5306 * Similarly, the declaration of a general entity must
5307 * precede any reference to it which appears in a default
5308 * value in an attribute-list declaration. Note that if
5309 * entities are declared in the external subset or in
5310 * external parameter entities, a non-validating processor
5311 * is not obligated to read and process their declarations;
5312 * for such documents, the rule that an entity must be
5313 * declared is a well-formedness constraint only if
5314 * standalone='yes'.
5315 */
5316 if (ent == NULL) {
5317 if ((ctxt->standalone == 1) ||
5318 ((ctxt->hasExternalSubset == 0) &&
5319 (ctxt->hasPErefs == 0))) {
5320 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5322 ctxt->sax->error(ctxt->userData,
5323 "Entity '%s' not defined\n", name);
5324 ctxt->wellFormed = 0;
5325 ctxt->disableSAX = 1;
5326 } else {
5327 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5328 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5329 ctxt->sax->warning(ctxt->userData,
5330 "Entity '%s' not defined\n", name);
5331 }
5332 }
5333
5334 /*
5335 * [ WFC: Parsed Entity ]
5336 * An entity reference must not contain the name of an
5337 * unparsed entity
5338 */
5339 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5340 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5341 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5342 ctxt->sax->error(ctxt->userData,
5343 "Entity reference to unparsed entity %s\n", name);
5344 ctxt->wellFormed = 0;
5345 ctxt->disableSAX = 1;
5346 }
5347
5348 /*
5349 * [ WFC: No External Entity References ]
5350 * Attribute values cannot contain direct or indirect
5351 * entity references to external entities.
5352 */
5353 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5354 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5355 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5357 ctxt->sax->error(ctxt->userData,
5358 "Attribute references external entity '%s'\n", name);
5359 ctxt->wellFormed = 0;
5360 ctxt->disableSAX = 1;
5361 }
5362 /*
5363 * [ WFC: No < in Attribute Values ]
5364 * The replacement text of any entity referred to directly or
5365 * indirectly in an attribute value (other than "&lt;") must
5366 * not contain a <.
5367 */
5368 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5369 (ent != NULL) &&
5370 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5371 (ent->content != NULL) &&
5372 (xmlStrchr(ent->content, '<'))) {
5373 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5375 ctxt->sax->error(ctxt->userData,
5376 "'<' in entity '%s' is not allowed in attributes values\n", name);
5377 ctxt->wellFormed = 0;
5378 ctxt->disableSAX = 1;
5379 }
5380
5381 /*
5382 * Internal check, no parameter entities here ...
5383 */
5384 else {
5385 switch (ent->etype) {
5386 case XML_INTERNAL_PARAMETER_ENTITY:
5387 case XML_EXTERNAL_PARAMETER_ENTITY:
5388 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5389 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5390 ctxt->sax->error(ctxt->userData,
5391 "Attempt to reference the parameter entity '%s'\n", name);
5392 ctxt->wellFormed = 0;
5393 ctxt->disableSAX = 1;
5394 break;
5395 default:
5396 break;
5397 }
5398 }
5399
5400 /*
5401 * [ WFC: No Recursion ]
5402 * A parsed entity must not contain a recursive reference
5403 * to itself, either directly or indirectly.
5404 * Done somewhwere else
5405 */
5406
5407 } else {
5408 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5410 ctxt->sax->error(ctxt->userData,
5411 "xmlParseEntityRef: expecting ';'\n");
5412 ctxt->wellFormed = 0;
5413 ctxt->disableSAX = 1;
5414 }
5415 xmlFree(name);
5416 }
5417 }
5418 *str = ptr;
5419 return(ent);
5420}
5421
5422/**
5423 * xmlParsePEReference:
5424 * @ctxt: an XML parser context
5425 *
5426 * parse PEReference declarations
5427 * The entity content is handled directly by pushing it's content as
5428 * a new input stream.
5429 *
5430 * [69] PEReference ::= '%' Name ';'
5431 *
5432 * [ WFC: No Recursion ]
5433 * A parsed entity must not contain a recursive
5434 * reference to itself, either directly or indirectly.
5435 *
5436 * [ WFC: Entity Declared ]
5437 * In a document without any DTD, a document with only an internal DTD
5438 * subset which contains no parameter entity references, or a document
5439 * with "standalone='yes'", ... ... The declaration of a parameter
5440 * entity must precede any reference to it...
5441 *
5442 * [ VC: Entity Declared ]
5443 * In a document with an external subset or external parameter entities
5444 * with "standalone='no'", ... ... The declaration of a parameter entity
5445 * must precede any reference to it...
5446 *
5447 * [ WFC: In DTD ]
5448 * Parameter-entity references may only appear in the DTD.
5449 * NOTE: misleading but this is handled.
5450 */
5451void
5452xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5453 xmlChar *name;
5454 xmlEntityPtr entity = NULL;
5455 xmlParserInputPtr input;
5456
5457 if (RAW == '%') {
5458 NEXT;
Daniel Veillard29631a82001-03-05 09:49:20 +00005459 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005460 if (name == NULL) {
5461 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5463 ctxt->sax->error(ctxt->userData,
5464 "xmlParsePEReference: no name\n");
5465 ctxt->wellFormed = 0;
5466 ctxt->disableSAX = 1;
5467 } else {
5468 if (RAW == ';') {
5469 NEXT;
5470 if ((ctxt->sax != NULL) &&
5471 (ctxt->sax->getParameterEntity != NULL))
5472 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5473 name);
5474 if (entity == NULL) {
5475 /*
5476 * [ WFC: Entity Declared ]
5477 * In a document without any DTD, a document with only an
5478 * internal DTD subset which contains no parameter entity
5479 * references, or a document with "standalone='yes'", ...
5480 * ... The declaration of a parameter entity must precede
5481 * any reference to it...
5482 */
5483 if ((ctxt->standalone == 1) ||
5484 ((ctxt->hasExternalSubset == 0) &&
5485 (ctxt->hasPErefs == 0))) {
5486 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5487 if ((!ctxt->disableSAX) &&
5488 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5489 ctxt->sax->error(ctxt->userData,
5490 "PEReference: %%%s; not found\n", name);
5491 ctxt->wellFormed = 0;
5492 ctxt->disableSAX = 1;
5493 } else {
5494 /*
5495 * [ VC: Entity Declared ]
5496 * In a document with an external subset or external
5497 * parameter entities with "standalone='no'", ...
5498 * ... The declaration of a parameter entity must precede
5499 * any reference to it...
5500 */
5501 if ((!ctxt->disableSAX) &&
5502 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5503 ctxt->sax->warning(ctxt->userData,
5504 "PEReference: %%%s; not found\n", name);
5505 ctxt->valid = 0;
5506 }
5507 } else {
5508 /*
5509 * Internal checking in case the entity quest barfed
5510 */
5511 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5512 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5513 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5514 ctxt->sax->warning(ctxt->userData,
5515 "Internal: %%%s; is not a parameter entity\n", name);
5516 } else {
5517 /*
5518 * TODO !!!
5519 * handle the extra spaces added before and after
5520 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5521 */
5522 input = xmlNewEntityInputStream(ctxt, entity);
5523 xmlPushInput(ctxt, input);
5524 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5525 (RAW == '<') && (NXT(1) == '?') &&
5526 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5527 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5528 xmlParseTextDecl(ctxt);
5529 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5530 /*
5531 * The XML REC instructs us to stop parsing
5532 * right here
5533 */
5534 ctxt->instate = XML_PARSER_EOF;
5535 xmlFree(name);
5536 return;
5537 }
5538 }
5539 if (ctxt->token == 0)
5540 ctxt->token = ' ';
5541 }
5542 }
5543 ctxt->hasPErefs = 1;
5544 } else {
5545 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5547 ctxt->sax->error(ctxt->userData,
5548 "xmlParsePEReference: expecting ';'\n");
5549 ctxt->wellFormed = 0;
5550 ctxt->disableSAX = 1;
5551 }
5552 xmlFree(name);
5553 }
5554 }
5555}
5556
5557/**
5558 * xmlParseStringPEReference:
5559 * @ctxt: an XML parser context
5560 * @str: a pointer to an index in the string
5561 *
5562 * parse PEReference declarations
5563 *
5564 * [69] PEReference ::= '%' Name ';'
5565 *
5566 * [ WFC: No Recursion ]
5567 * A parsed entity must not contain a recursive
5568 * reference to itself, either directly or indirectly.
5569 *
5570 * [ WFC: Entity Declared ]
5571 * In a document without any DTD, a document with only an internal DTD
5572 * subset which contains no parameter entity references, or a document
5573 * with "standalone='yes'", ... ... The declaration of a parameter
5574 * entity must precede any reference to it...
5575 *
5576 * [ VC: Entity Declared ]
5577 * In a document with an external subset or external parameter entities
5578 * with "standalone='no'", ... ... The declaration of a parameter entity
5579 * must precede any reference to it...
5580 *
5581 * [ WFC: In DTD ]
5582 * Parameter-entity references may only appear in the DTD.
5583 * NOTE: misleading but this is handled.
5584 *
5585 * Returns the string of the entity content.
5586 * str is updated to the current value of the index
5587 */
5588xmlEntityPtr
5589xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5590 const xmlChar *ptr;
5591 xmlChar cur;
5592 xmlChar *name;
5593 xmlEntityPtr entity = NULL;
5594
5595 if ((str == NULL) || (*str == NULL)) return(NULL);
5596 ptr = *str;
5597 cur = *ptr;
5598 if (cur == '%') {
5599 ptr++;
5600 cur = *ptr;
5601 name = xmlParseStringName(ctxt, &ptr);
5602 if (name == NULL) {
5603 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5605 ctxt->sax->error(ctxt->userData,
5606 "xmlParseStringPEReference: no name\n");
5607 ctxt->wellFormed = 0;
5608 ctxt->disableSAX = 1;
5609 } else {
5610 cur = *ptr;
5611 if (cur == ';') {
5612 ptr++;
5613 cur = *ptr;
5614 if ((ctxt->sax != NULL) &&
5615 (ctxt->sax->getParameterEntity != NULL))
5616 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5617 name);
5618 if (entity == NULL) {
5619 /*
5620 * [ WFC: Entity Declared ]
5621 * In a document without any DTD, a document with only an
5622 * internal DTD subset which contains no parameter entity
5623 * references, or a document with "standalone='yes'", ...
5624 * ... The declaration of a parameter entity must precede
5625 * any reference to it...
5626 */
5627 if ((ctxt->standalone == 1) ||
5628 ((ctxt->hasExternalSubset == 0) &&
5629 (ctxt->hasPErefs == 0))) {
5630 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5632 ctxt->sax->error(ctxt->userData,
5633 "PEReference: %%%s; not found\n", name);
5634 ctxt->wellFormed = 0;
5635 ctxt->disableSAX = 1;
5636 } else {
5637 /*
5638 * [ VC: Entity Declared ]
5639 * In a document with an external subset or external
5640 * parameter entities with "standalone='no'", ...
5641 * ... The declaration of a parameter entity must
5642 * precede any reference to it...
5643 */
5644 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5645 ctxt->sax->warning(ctxt->userData,
5646 "PEReference: %%%s; not found\n", name);
5647 ctxt->valid = 0;
5648 }
5649 } else {
5650 /*
5651 * Internal checking in case the entity quest barfed
5652 */
5653 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5654 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5655 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5656 ctxt->sax->warning(ctxt->userData,
5657 "Internal: %%%s; is not a parameter entity\n", name);
5658 }
5659 }
5660 ctxt->hasPErefs = 1;
5661 } else {
5662 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5664 ctxt->sax->error(ctxt->userData,
5665 "xmlParseStringPEReference: expecting ';'\n");
5666 ctxt->wellFormed = 0;
5667 ctxt->disableSAX = 1;
5668 }
5669 xmlFree(name);
5670 }
5671 }
5672 *str = ptr;
5673 return(entity);
5674}
5675
5676/**
5677 * xmlParseDocTypeDecl:
5678 * @ctxt: an XML parser context
5679 *
5680 * parse a DOCTYPE declaration
5681 *
5682 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5683 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5684 *
5685 * [ VC: Root Element Type ]
5686 * The Name in the document type declaration must match the element
5687 * type of the root element.
5688 */
5689
5690void
5691xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5692 xmlChar *name = NULL;
5693 xmlChar *ExternalID = NULL;
5694 xmlChar *URI = NULL;
5695
5696 /*
5697 * We know that '<!DOCTYPE' has been detected.
5698 */
5699 SKIP(9);
5700
5701 SKIP_BLANKS;
5702
5703 /*
5704 * Parse the DOCTYPE name.
5705 */
5706 name = xmlParseName(ctxt);
5707 if (name == NULL) {
5708 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5710 ctxt->sax->error(ctxt->userData,
5711 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5712 ctxt->wellFormed = 0;
5713 ctxt->disableSAX = 1;
5714 }
5715 ctxt->intSubName = name;
5716
5717 SKIP_BLANKS;
5718
5719 /*
5720 * Check for SystemID and ExternalID
5721 */
5722 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5723
5724 if ((URI != NULL) || (ExternalID != NULL)) {
5725 ctxt->hasExternalSubset = 1;
5726 }
5727 ctxt->extSubURI = URI;
5728 ctxt->extSubSystem = ExternalID;
5729
5730 SKIP_BLANKS;
5731
5732 /*
5733 * Create and update the internal subset.
5734 */
5735 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5736 (!ctxt->disableSAX))
5737 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5738
5739 /*
5740 * Is there any internal subset declarations ?
5741 * they are handled separately in xmlParseInternalSubset()
5742 */
5743 if (RAW == '[')
5744 return;
5745
5746 /*
5747 * We should be at the end of the DOCTYPE declaration.
5748 */
5749 if (RAW != '>') {
5750 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5751 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5752 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5753 ctxt->wellFormed = 0;
5754 ctxt->disableSAX = 1;
5755 }
5756 NEXT;
5757}
5758
5759/**
5760 * xmlParseInternalsubset:
5761 * @ctxt: an XML parser context
5762 *
5763 * parse the internal subset declaration
5764 *
5765 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5766 */
5767
5768void
5769xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5770 /*
5771 * Is there any DTD definition ?
5772 */
5773 if (RAW == '[') {
5774 ctxt->instate = XML_PARSER_DTD;
5775 NEXT;
5776 /*
5777 * Parse the succession of Markup declarations and
5778 * PEReferences.
5779 * Subsequence (markupdecl | PEReference | S)*
5780 */
5781 while (RAW != ']') {
5782 const xmlChar *check = CUR_PTR;
5783 int cons = ctxt->input->consumed;
5784
5785 SKIP_BLANKS;
5786 xmlParseMarkupDecl(ctxt);
5787 xmlParsePEReference(ctxt);
5788
5789 /*
5790 * Pop-up of finished entities.
5791 */
5792 while ((RAW == 0) && (ctxt->inputNr > 1))
5793 xmlPopInput(ctxt);
5794
5795 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5796 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5798 ctxt->sax->error(ctxt->userData,
5799 "xmlParseInternalSubset: error detected in Markup declaration\n");
5800 ctxt->wellFormed = 0;
5801 ctxt->disableSAX = 1;
5802 break;
5803 }
5804 }
5805 if (RAW == ']') {
5806 NEXT;
5807 SKIP_BLANKS;
5808 }
5809 }
5810
5811 /*
5812 * We should be at the end of the DOCTYPE declaration.
5813 */
5814 if (RAW != '>') {
5815 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5817 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5818 ctxt->wellFormed = 0;
5819 ctxt->disableSAX = 1;
5820 }
5821 NEXT;
5822}
5823
5824/**
5825 * xmlParseAttribute:
5826 * @ctxt: an XML parser context
5827 * @value: a xmlChar ** used to store the value of the attribute
5828 *
5829 * parse an attribute
5830 *
5831 * [41] Attribute ::= Name Eq AttValue
5832 *
5833 * [ WFC: No External Entity References ]
5834 * Attribute values cannot contain direct or indirect entity references
5835 * to external entities.
5836 *
5837 * [ WFC: No < in Attribute Values ]
5838 * The replacement text of any entity referred to directly or indirectly in
5839 * an attribute value (other than "&lt;") must not contain a <.
5840 *
5841 * [ VC: Attribute Value Type ]
5842 * The attribute must have been declared; the value must be of the type
5843 * declared for it.
5844 *
5845 * [25] Eq ::= S? '=' S?
5846 *
5847 * With namespace:
5848 *
5849 * [NS 11] Attribute ::= QName Eq AttValue
5850 *
5851 * Also the case QName == xmlns:??? is handled independently as a namespace
5852 * definition.
5853 *
5854 * Returns the attribute name, and the value in *value.
5855 */
5856
5857xmlChar *
5858xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5859 xmlChar *name, *val;
5860
5861 *value = NULL;
5862 name = xmlParseName(ctxt);
5863 if (name == NULL) {
5864 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5866 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5867 ctxt->wellFormed = 0;
5868 ctxt->disableSAX = 1;
5869 return(NULL);
5870 }
5871
5872 /*
5873 * read the value
5874 */
5875 SKIP_BLANKS;
5876 if (RAW == '=') {
5877 NEXT;
5878 SKIP_BLANKS;
5879 val = xmlParseAttValue(ctxt);
5880 ctxt->instate = XML_PARSER_CONTENT;
5881 } else {
5882 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5884 ctxt->sax->error(ctxt->userData,
5885 "Specification mandate value for attribute %s\n", name);
5886 ctxt->wellFormed = 0;
5887 ctxt->disableSAX = 1;
5888 xmlFree(name);
5889 return(NULL);
5890 }
5891
5892 /*
5893 * Check that xml:lang conforms to the specification
5894 * No more registered as an error, just generate a warning now
5895 * since this was deprecated in XML second edition
5896 */
5897 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5898 if (!xmlCheckLanguageID(val)) {
5899 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5900 ctxt->sax->warning(ctxt->userData,
5901 "Malformed value for xml:lang : %s\n", val);
5902 }
5903 }
5904
5905 /*
5906 * Check that xml:space conforms to the specification
5907 */
5908 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5909 if (xmlStrEqual(val, BAD_CAST "default"))
5910 *(ctxt->space) = 0;
5911 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5912 *(ctxt->space) = 1;
5913 else {
5914 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5916 ctxt->sax->error(ctxt->userData,
5917"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5918 val);
5919 ctxt->wellFormed = 0;
5920 ctxt->disableSAX = 1;
5921 }
5922 }
5923
5924 *value = val;
5925 return(name);
5926}
5927
5928/**
5929 * xmlParseStartTag:
5930 * @ctxt: an XML parser context
5931 *
5932 * parse a start of tag either for rule element or
5933 * EmptyElement. In both case we don't parse the tag closing chars.
5934 *
5935 * [40] STag ::= '<' Name (S Attribute)* S? '>'
5936 *
5937 * [ WFC: Unique Att Spec ]
5938 * No attribute name may appear more than once in the same start-tag or
5939 * empty-element tag.
5940 *
5941 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5942 *
5943 * [ WFC: Unique Att Spec ]
5944 * No attribute name may appear more than once in the same start-tag or
5945 * empty-element tag.
5946 *
5947 * With namespace:
5948 *
5949 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5950 *
5951 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
5952 *
5953 * Returns the element name parsed
5954 */
5955
5956xmlChar *
5957xmlParseStartTag(xmlParserCtxtPtr ctxt) {
5958 xmlChar *name;
5959 xmlChar *attname;
5960 xmlChar *attvalue;
5961 const xmlChar **atts = NULL;
5962 int nbatts = 0;
5963 int maxatts = 0;
5964 int i;
5965
5966 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00005967 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00005968
5969 name = xmlParseName(ctxt);
5970 if (name == NULL) {
5971 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5973 ctxt->sax->error(ctxt->userData,
5974 "xmlParseStartTag: invalid element name\n");
5975 ctxt->wellFormed = 0;
5976 ctxt->disableSAX = 1;
5977 return(NULL);
5978 }
5979
5980 /*
5981 * Now parse the attributes, it ends up with the ending
5982 *
5983 * (S Attribute)* S?
5984 */
5985 SKIP_BLANKS;
5986 GROW;
5987
Daniel Veillard21a0f912001-02-25 19:54:14 +00005988 while ((RAW != '>') &&
5989 ((RAW != '/') || (NXT(1) != '>')) &&
5990 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005991 const xmlChar *q = CUR_PTR;
5992 int cons = ctxt->input->consumed;
5993
5994 attname = xmlParseAttribute(ctxt, &attvalue);
5995 if ((attname != NULL) && (attvalue != NULL)) {
5996 /*
5997 * [ WFC: Unique Att Spec ]
5998 * No attribute name may appear more than once in the same
5999 * start-tag or empty-element tag.
6000 */
6001 for (i = 0; i < nbatts;i += 2) {
6002 if (xmlStrEqual(atts[i], attname)) {
6003 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6005 ctxt->sax->error(ctxt->userData,
6006 "Attribute %s redefined\n",
6007 attname);
6008 ctxt->wellFormed = 0;
6009 ctxt->disableSAX = 1;
6010 xmlFree(attname);
6011 xmlFree(attvalue);
6012 goto failed;
6013 }
6014 }
6015
6016 /*
6017 * Add the pair to atts
6018 */
6019 if (atts == NULL) {
6020 maxatts = 10;
6021 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6022 if (atts == NULL) {
6023 xmlGenericError(xmlGenericErrorContext,
6024 "malloc of %ld byte failed\n",
6025 maxatts * (long)sizeof(xmlChar *));
6026 return(NULL);
6027 }
6028 } else if (nbatts + 4 > maxatts) {
6029 maxatts *= 2;
6030 atts = (const xmlChar **) xmlRealloc((void *) atts,
6031 maxatts * sizeof(xmlChar *));
6032 if (atts == NULL) {
6033 xmlGenericError(xmlGenericErrorContext,
6034 "realloc of %ld byte failed\n",
6035 maxatts * (long)sizeof(xmlChar *));
6036 return(NULL);
6037 }
6038 }
6039 atts[nbatts++] = attname;
6040 atts[nbatts++] = attvalue;
6041 atts[nbatts] = NULL;
6042 atts[nbatts + 1] = NULL;
6043 } else {
6044 if (attname != NULL)
6045 xmlFree(attname);
6046 if (attvalue != NULL)
6047 xmlFree(attvalue);
6048 }
6049
6050failed:
6051
6052 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6053 break;
6054 if (!IS_BLANK(RAW)) {
6055 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6057 ctxt->sax->error(ctxt->userData,
6058 "attributes construct error\n");
6059 ctxt->wellFormed = 0;
6060 ctxt->disableSAX = 1;
6061 }
6062 SKIP_BLANKS;
6063 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6064 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6066 ctxt->sax->error(ctxt->userData,
6067 "xmlParseStartTag: problem parsing attributes\n");
6068 ctxt->wellFormed = 0;
6069 ctxt->disableSAX = 1;
6070 break;
6071 }
6072 GROW;
6073 }
6074
6075 /*
6076 * SAX: Start of Element !
6077 */
6078 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6079 (!ctxt->disableSAX))
6080 ctxt->sax->startElement(ctxt->userData, name, atts);
6081
6082 if (atts != NULL) {
6083 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6084 xmlFree((void *) atts);
6085 }
6086 return(name);
6087}
6088
6089/**
6090 * xmlParseEndTag:
6091 * @ctxt: an XML parser context
6092 *
6093 * parse an end of tag
6094 *
6095 * [42] ETag ::= '</' Name S? '>'
6096 *
6097 * With namespace
6098 *
6099 * [NS 9] ETag ::= '</' QName S? '>'
6100 */
6101
6102void
6103xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6104 xmlChar *name;
6105 xmlChar *oldname;
6106
6107 GROW;
6108 if ((RAW != '<') || (NXT(1) != '/')) {
6109 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6110 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6111 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6112 ctxt->wellFormed = 0;
6113 ctxt->disableSAX = 1;
6114 return;
6115 }
6116 SKIP(2);
6117
6118 name = xmlParseName(ctxt);
6119
6120 /*
6121 * We should definitely be at the ending "S? '>'" part
6122 */
6123 GROW;
6124 SKIP_BLANKS;
6125 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6126 ctxt->errNo = XML_ERR_GT_REQUIRED;
6127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6128 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6129 ctxt->wellFormed = 0;
6130 ctxt->disableSAX = 1;
6131 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006132 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006133
6134 /*
6135 * [ WFC: Element Type Match ]
6136 * The Name in an element's end-tag must match the element type in the
6137 * start-tag.
6138 *
6139 */
6140 if ((name == NULL) || (ctxt->name == NULL) ||
6141 (!xmlStrEqual(name, ctxt->name))) {
6142 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6144 if ((name != NULL) && (ctxt->name != NULL)) {
6145 ctxt->sax->error(ctxt->userData,
6146 "Opening and ending tag mismatch: %s and %s\n",
6147 ctxt->name, name);
6148 } else if (ctxt->name != NULL) {
6149 ctxt->sax->error(ctxt->userData,
6150 "Ending tag eror for: %s\n", ctxt->name);
6151 } else {
6152 ctxt->sax->error(ctxt->userData,
6153 "Ending tag error: internal error ???\n");
6154 }
6155
6156 }
6157 ctxt->wellFormed = 0;
6158 ctxt->disableSAX = 1;
6159 }
6160
6161 /*
6162 * SAX: End of Tag
6163 */
6164 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6165 (!ctxt->disableSAX))
6166 ctxt->sax->endElement(ctxt->userData, name);
6167
6168 if (name != NULL)
6169 xmlFree(name);
6170 oldname = namePop(ctxt);
6171 spacePop(ctxt);
6172 if (oldname != NULL) {
6173#ifdef DEBUG_STACK
6174 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6175#endif
6176 xmlFree(oldname);
6177 }
6178 return;
6179}
6180
6181/**
6182 * xmlParseCDSect:
6183 * @ctxt: an XML parser context
6184 *
6185 * Parse escaped pure raw content.
6186 *
6187 * [18] CDSect ::= CDStart CData CDEnd
6188 *
6189 * [19] CDStart ::= '<![CDATA['
6190 *
6191 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6192 *
6193 * [21] CDEnd ::= ']]>'
6194 */
6195void
6196xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6197 xmlChar *buf = NULL;
6198 int len = 0;
6199 int size = XML_PARSER_BUFFER_SIZE;
6200 int r, rl;
6201 int s, sl;
6202 int cur, l;
6203 int count = 0;
6204
6205 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6206 (NXT(2) == '[') && (NXT(3) == 'C') &&
6207 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6208 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6209 (NXT(8) == '[')) {
6210 SKIP(9);
6211 } else
6212 return;
6213
6214 ctxt->instate = XML_PARSER_CDATA_SECTION;
6215 r = CUR_CHAR(rl);
6216 if (!IS_CHAR(r)) {
6217 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6219 ctxt->sax->error(ctxt->userData,
6220 "CData section not finished\n");
6221 ctxt->wellFormed = 0;
6222 ctxt->disableSAX = 1;
6223 ctxt->instate = XML_PARSER_CONTENT;
6224 return;
6225 }
6226 NEXTL(rl);
6227 s = CUR_CHAR(sl);
6228 if (!IS_CHAR(s)) {
6229 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6231 ctxt->sax->error(ctxt->userData,
6232 "CData section not finished\n");
6233 ctxt->wellFormed = 0;
6234 ctxt->disableSAX = 1;
6235 ctxt->instate = XML_PARSER_CONTENT;
6236 return;
6237 }
6238 NEXTL(sl);
6239 cur = CUR_CHAR(l);
6240 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6241 if (buf == NULL) {
6242 xmlGenericError(xmlGenericErrorContext,
6243 "malloc of %d byte failed\n", size);
6244 return;
6245 }
6246 while (IS_CHAR(cur) &&
6247 ((r != ']') || (s != ']') || (cur != '>'))) {
6248 if (len + 5 >= size) {
6249 size *= 2;
6250 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6251 if (buf == NULL) {
6252 xmlGenericError(xmlGenericErrorContext,
6253 "realloc of %d byte failed\n", size);
6254 return;
6255 }
6256 }
6257 COPY_BUF(rl,buf,len,r);
6258 r = s;
6259 rl = sl;
6260 s = cur;
6261 sl = l;
6262 count++;
6263 if (count > 50) {
6264 GROW;
6265 count = 0;
6266 }
6267 NEXTL(l);
6268 cur = CUR_CHAR(l);
6269 }
6270 buf[len] = 0;
6271 ctxt->instate = XML_PARSER_CONTENT;
6272 if (cur != '>') {
6273 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6275 ctxt->sax->error(ctxt->userData,
6276 "CData section not finished\n%.50s\n", buf);
6277 ctxt->wellFormed = 0;
6278 ctxt->disableSAX = 1;
6279 xmlFree(buf);
6280 return;
6281 }
6282 NEXTL(l);
6283
6284 /*
6285 * Ok the buffer is to be consumed as cdata.
6286 */
6287 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6288 if (ctxt->sax->cdataBlock != NULL)
6289 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6290 }
6291 xmlFree(buf);
6292}
6293
6294/**
6295 * xmlParseContent:
6296 * @ctxt: an XML parser context
6297 *
6298 * Parse a content:
6299 *
6300 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6301 */
6302
6303void
6304xmlParseContent(xmlParserCtxtPtr ctxt) {
6305 GROW;
6306 while (((RAW != 0) || (ctxt->token != 0)) &&
6307 ((RAW != '<') || (NXT(1) != '/'))) {
6308 const xmlChar *test = CUR_PTR;
6309 int cons = ctxt->input->consumed;
6310 xmlChar tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006311 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006312
6313 /*
6314 * Handle possible processed charrefs.
6315 */
6316 if (ctxt->token != 0) {
6317 xmlParseCharData(ctxt, 0);
6318 }
6319 /*
6320 * First case : a Processing Instruction.
6321 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006322 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006323 xmlParsePI(ctxt);
6324 }
6325
6326 /*
6327 * Second case : a CDSection
6328 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006329 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006330 (NXT(2) == '[') && (NXT(3) == 'C') &&
6331 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6332 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6333 (NXT(8) == '[')) {
6334 xmlParseCDSect(ctxt);
6335 }
6336
6337 /*
6338 * Third case : a comment
6339 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006340 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006341 (NXT(2) == '-') && (NXT(3) == '-')) {
6342 xmlParseComment(ctxt);
6343 ctxt->instate = XML_PARSER_CONTENT;
6344 }
6345
6346 /*
6347 * Fourth case : a sub-element.
6348 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006349 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006350 xmlParseElement(ctxt);
6351 }
6352
6353 /*
6354 * Fifth case : a reference. If if has not been resolved,
6355 * parsing returns it's Name, create the node
6356 */
6357
Daniel Veillard21a0f912001-02-25 19:54:14 +00006358 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006359 xmlParseReference(ctxt);
6360 }
6361
6362 /*
6363 * Last case, text. Note that References are handled directly.
6364 */
6365 else {
6366 xmlParseCharData(ctxt, 0);
6367 }
6368
6369 GROW;
6370 /*
6371 * Pop-up of finished entities.
6372 */
6373 while ((RAW == 0) && (ctxt->inputNr > 1))
6374 xmlPopInput(ctxt);
6375 SHRINK;
6376
6377 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6378 (tok == ctxt->token)) {
6379 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6381 ctxt->sax->error(ctxt->userData,
6382 "detected an error in element content\n");
6383 ctxt->wellFormed = 0;
6384 ctxt->disableSAX = 1;
6385 ctxt->instate = XML_PARSER_EOF;
6386 break;
6387 }
6388 }
6389}
6390
6391/**
6392 * xmlParseElement:
6393 * @ctxt: an XML parser context
6394 *
6395 * parse an XML element, this is highly recursive
6396 *
6397 * [39] element ::= EmptyElemTag | STag content ETag
6398 *
6399 * [ WFC: Element Type Match ]
6400 * The Name in an element's end-tag must match the element type in the
6401 * start-tag.
6402 *
6403 * [ VC: Element Valid ]
6404 * An element is valid if there is a declaration matching elementdecl
6405 * where the Name matches the element type and one of the following holds:
6406 * - The declaration matches EMPTY and the element has no content.
6407 * - The declaration matches children and the sequence of child elements
6408 * belongs to the language generated by the regular expression in the
6409 * content model, with optional white space (characters matching the
6410 * nonterminal S) between each pair of child elements.
6411 * - The declaration matches Mixed and the content consists of character
6412 * data and child elements whose types match names in the content model.
6413 * - The declaration matches ANY, and the types of any child elements have
6414 * been declared.
6415 */
6416
6417void
6418xmlParseElement(xmlParserCtxtPtr ctxt) {
6419 const xmlChar *openTag = CUR_PTR;
6420 xmlChar *name;
6421 xmlChar *oldname;
6422 xmlParserNodeInfo node_info;
6423 xmlNodePtr ret;
6424
6425 /* Capture start position */
6426 if (ctxt->record_info) {
6427 node_info.begin_pos = ctxt->input->consumed +
6428 (CUR_PTR - ctxt->input->base);
6429 node_info.begin_line = ctxt->input->line;
6430 }
6431
6432 if (ctxt->spaceNr == 0)
6433 spacePush(ctxt, -1);
6434 else
6435 spacePush(ctxt, *ctxt->space);
6436
6437 name = xmlParseStartTag(ctxt);
6438 if (name == NULL) {
6439 spacePop(ctxt);
6440 return;
6441 }
6442 namePush(ctxt, name);
6443 ret = ctxt->node;
6444
6445 /*
6446 * [ VC: Root Element Type ]
6447 * The Name in the document type declaration must match the element
6448 * type of the root element.
6449 */
6450 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6451 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6452 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6453
6454 /*
6455 * Check for an Empty Element.
6456 */
6457 if ((RAW == '/') && (NXT(1) == '>')) {
6458 SKIP(2);
6459 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6460 (!ctxt->disableSAX))
6461 ctxt->sax->endElement(ctxt->userData, name);
6462 oldname = namePop(ctxt);
6463 spacePop(ctxt);
6464 if (oldname != NULL) {
6465#ifdef DEBUG_STACK
6466 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6467#endif
6468 xmlFree(oldname);
6469 }
6470 if ( ret != NULL && ctxt->record_info ) {
6471 node_info.end_pos = ctxt->input->consumed +
6472 (CUR_PTR - ctxt->input->base);
6473 node_info.end_line = ctxt->input->line;
6474 node_info.node = ret;
6475 xmlParserAddNodeInfo(ctxt, &node_info);
6476 }
6477 return;
6478 }
6479 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006480 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006481 } else {
6482 ctxt->errNo = XML_ERR_GT_REQUIRED;
6483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6484 ctxt->sax->error(ctxt->userData,
6485 "Couldn't find end of Start Tag\n%.30s\n",
6486 openTag);
6487 ctxt->wellFormed = 0;
6488 ctxt->disableSAX = 1;
6489
6490 /*
6491 * end of parsing of this node.
6492 */
6493 nodePop(ctxt);
6494 oldname = namePop(ctxt);
6495 spacePop(ctxt);
6496 if (oldname != NULL) {
6497#ifdef DEBUG_STACK
6498 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6499#endif
6500 xmlFree(oldname);
6501 }
6502
6503 /*
6504 * Capture end position and add node
6505 */
6506 if ( ret != NULL && ctxt->record_info ) {
6507 node_info.end_pos = ctxt->input->consumed +
6508 (CUR_PTR - ctxt->input->base);
6509 node_info.end_line = ctxt->input->line;
6510 node_info.node = ret;
6511 xmlParserAddNodeInfo(ctxt, &node_info);
6512 }
6513 return;
6514 }
6515
6516 /*
6517 * Parse the content of the element:
6518 */
6519 xmlParseContent(ctxt);
6520 if (!IS_CHAR(RAW)) {
6521 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6523 ctxt->sax->error(ctxt->userData,
6524 "Premature end of data in tag %.30s\n", openTag);
6525 ctxt->wellFormed = 0;
6526 ctxt->disableSAX = 1;
6527
6528 /*
6529 * end of parsing of this node.
6530 */
6531 nodePop(ctxt);
6532 oldname = namePop(ctxt);
6533 spacePop(ctxt);
6534 if (oldname != NULL) {
6535#ifdef DEBUG_STACK
6536 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6537#endif
6538 xmlFree(oldname);
6539 }
6540 return;
6541 }
6542
6543 /*
6544 * parse the end of tag: '</' should be here.
6545 */
6546 xmlParseEndTag(ctxt);
6547
6548 /*
6549 * Capture end position and add node
6550 */
6551 if ( ret != NULL && ctxt->record_info ) {
6552 node_info.end_pos = ctxt->input->consumed +
6553 (CUR_PTR - ctxt->input->base);
6554 node_info.end_line = ctxt->input->line;
6555 node_info.node = ret;
6556 xmlParserAddNodeInfo(ctxt, &node_info);
6557 }
6558}
6559
6560/**
6561 * xmlParseVersionNum:
6562 * @ctxt: an XML parser context
6563 *
6564 * parse the XML version value.
6565 *
6566 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6567 *
6568 * Returns the string giving the XML version number, or NULL
6569 */
6570xmlChar *
6571xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6572 xmlChar *buf = NULL;
6573 int len = 0;
6574 int size = 10;
6575 xmlChar cur;
6576
6577 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6578 if (buf == NULL) {
6579 xmlGenericError(xmlGenericErrorContext,
6580 "malloc of %d byte failed\n", size);
6581 return(NULL);
6582 }
6583 cur = CUR;
6584 while (((cur >= 'a') && (cur <= 'z')) ||
6585 ((cur >= 'A') && (cur <= 'Z')) ||
6586 ((cur >= '0') && (cur <= '9')) ||
6587 (cur == '_') || (cur == '.') ||
6588 (cur == ':') || (cur == '-')) {
6589 if (len + 1 >= size) {
6590 size *= 2;
6591 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6592 if (buf == NULL) {
6593 xmlGenericError(xmlGenericErrorContext,
6594 "realloc of %d byte failed\n", size);
6595 return(NULL);
6596 }
6597 }
6598 buf[len++] = cur;
6599 NEXT;
6600 cur=CUR;
6601 }
6602 buf[len] = 0;
6603 return(buf);
6604}
6605
6606/**
6607 * xmlParseVersionInfo:
6608 * @ctxt: an XML parser context
6609 *
6610 * parse the XML version.
6611 *
6612 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6613 *
6614 * [25] Eq ::= S? '=' S?
6615 *
6616 * Returns the version string, e.g. "1.0"
6617 */
6618
6619xmlChar *
6620xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6621 xmlChar *version = NULL;
6622 const xmlChar *q;
6623
6624 if ((RAW == 'v') && (NXT(1) == 'e') &&
6625 (NXT(2) == 'r') && (NXT(3) == 's') &&
6626 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6627 (NXT(6) == 'n')) {
6628 SKIP(7);
6629 SKIP_BLANKS;
6630 if (RAW != '=') {
6631 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6633 ctxt->sax->error(ctxt->userData,
6634 "xmlParseVersionInfo : expected '='\n");
6635 ctxt->wellFormed = 0;
6636 ctxt->disableSAX = 1;
6637 return(NULL);
6638 }
6639 NEXT;
6640 SKIP_BLANKS;
6641 if (RAW == '"') {
6642 NEXT;
6643 q = CUR_PTR;
6644 version = xmlParseVersionNum(ctxt);
6645 if (RAW != '"') {
6646 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6648 ctxt->sax->error(ctxt->userData,
6649 "String not closed\n%.50s\n", q);
6650 ctxt->wellFormed = 0;
6651 ctxt->disableSAX = 1;
6652 } else
6653 NEXT;
6654 } else if (RAW == '\''){
6655 NEXT;
6656 q = CUR_PTR;
6657 version = xmlParseVersionNum(ctxt);
6658 if (RAW != '\'') {
6659 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6661 ctxt->sax->error(ctxt->userData,
6662 "String not closed\n%.50s\n", q);
6663 ctxt->wellFormed = 0;
6664 ctxt->disableSAX = 1;
6665 } else
6666 NEXT;
6667 } else {
6668 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6670 ctxt->sax->error(ctxt->userData,
6671 "xmlParseVersionInfo : expected ' or \"\n");
6672 ctxt->wellFormed = 0;
6673 ctxt->disableSAX = 1;
6674 }
6675 }
6676 return(version);
6677}
6678
6679/**
6680 * xmlParseEncName:
6681 * @ctxt: an XML parser context
6682 *
6683 * parse the XML encoding name
6684 *
6685 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6686 *
6687 * Returns the encoding name value or NULL
6688 */
6689xmlChar *
6690xmlParseEncName(xmlParserCtxtPtr ctxt) {
6691 xmlChar *buf = NULL;
6692 int len = 0;
6693 int size = 10;
6694 xmlChar cur;
6695
6696 cur = CUR;
6697 if (((cur >= 'a') && (cur <= 'z')) ||
6698 ((cur >= 'A') && (cur <= 'Z'))) {
6699 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6700 if (buf == NULL) {
6701 xmlGenericError(xmlGenericErrorContext,
6702 "malloc of %d byte failed\n", size);
6703 return(NULL);
6704 }
6705
6706 buf[len++] = cur;
6707 NEXT;
6708 cur = CUR;
6709 while (((cur >= 'a') && (cur <= 'z')) ||
6710 ((cur >= 'A') && (cur <= 'Z')) ||
6711 ((cur >= '0') && (cur <= '9')) ||
6712 (cur == '.') || (cur == '_') ||
6713 (cur == '-')) {
6714 if (len + 1 >= size) {
6715 size *= 2;
6716 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6717 if (buf == NULL) {
6718 xmlGenericError(xmlGenericErrorContext,
6719 "realloc of %d byte failed\n", size);
6720 return(NULL);
6721 }
6722 }
6723 buf[len++] = cur;
6724 NEXT;
6725 cur = CUR;
6726 if (cur == 0) {
6727 SHRINK;
6728 GROW;
6729 cur = CUR;
6730 }
6731 }
6732 buf[len] = 0;
6733 } else {
6734 ctxt->errNo = XML_ERR_ENCODING_NAME;
6735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6736 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6737 ctxt->wellFormed = 0;
6738 ctxt->disableSAX = 1;
6739 }
6740 return(buf);
6741}
6742
6743/**
6744 * xmlParseEncodingDecl:
6745 * @ctxt: an XML parser context
6746 *
6747 * parse the XML encoding declaration
6748 *
6749 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6750 *
6751 * this setups the conversion filters.
6752 *
6753 * Returns the encoding value or NULL
6754 */
6755
6756xmlChar *
6757xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6758 xmlChar *encoding = NULL;
6759 const xmlChar *q;
6760
6761 SKIP_BLANKS;
6762 if ((RAW == 'e') && (NXT(1) == 'n') &&
6763 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6764 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6765 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6766 SKIP(8);
6767 SKIP_BLANKS;
6768 if (RAW != '=') {
6769 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6771 ctxt->sax->error(ctxt->userData,
6772 "xmlParseEncodingDecl : expected '='\n");
6773 ctxt->wellFormed = 0;
6774 ctxt->disableSAX = 1;
6775 return(NULL);
6776 }
6777 NEXT;
6778 SKIP_BLANKS;
6779 if (RAW == '"') {
6780 NEXT;
6781 q = CUR_PTR;
6782 encoding = xmlParseEncName(ctxt);
6783 if (RAW != '"') {
6784 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6786 ctxt->sax->error(ctxt->userData,
6787 "String not closed\n%.50s\n", q);
6788 ctxt->wellFormed = 0;
6789 ctxt->disableSAX = 1;
6790 } else
6791 NEXT;
6792 } else if (RAW == '\''){
6793 NEXT;
6794 q = CUR_PTR;
6795 encoding = xmlParseEncName(ctxt);
6796 if (RAW != '\'') {
6797 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6799 ctxt->sax->error(ctxt->userData,
6800 "String not closed\n%.50s\n", q);
6801 ctxt->wellFormed = 0;
6802 ctxt->disableSAX = 1;
6803 } else
6804 NEXT;
6805 } else if (RAW == '"'){
6806 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6808 ctxt->sax->error(ctxt->userData,
6809 "xmlParseEncodingDecl : expected ' or \"\n");
6810 ctxt->wellFormed = 0;
6811 ctxt->disableSAX = 1;
6812 }
6813 if (encoding != NULL) {
6814 xmlCharEncoding enc;
6815 xmlCharEncodingHandlerPtr handler;
6816
6817 if (ctxt->input->encoding != NULL)
6818 xmlFree((xmlChar *) ctxt->input->encoding);
6819 ctxt->input->encoding = encoding;
6820
6821 enc = xmlParseCharEncoding((const char *) encoding);
6822 /*
6823 * registered set of known encodings
6824 */
6825 if (enc != XML_CHAR_ENCODING_ERROR) {
6826 xmlSwitchEncoding(ctxt, enc);
6827 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6828 xmlFree(encoding);
6829 return(NULL);
6830 }
6831 } else {
6832 /*
6833 * fallback for unknown encodings
6834 */
6835 handler = xmlFindCharEncodingHandler((const char *) encoding);
6836 if (handler != NULL) {
6837 xmlSwitchToEncoding(ctxt, handler);
6838 } else {
6839 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6840 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6841 ctxt->sax->error(ctxt->userData,
6842 "Unsupported encoding %s\n", encoding);
6843 return(NULL);
6844 }
6845 }
6846 }
6847 }
6848 return(encoding);
6849}
6850
6851/**
6852 * xmlParseSDDecl:
6853 * @ctxt: an XML parser context
6854 *
6855 * parse the XML standalone declaration
6856 *
6857 * [32] SDDecl ::= S 'standalone' Eq
6858 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6859 *
6860 * [ VC: Standalone Document Declaration ]
6861 * TODO The standalone document declaration must have the value "no"
6862 * if any external markup declarations contain declarations of:
6863 * - attributes with default values, if elements to which these
6864 * attributes apply appear in the document without specifications
6865 * of values for these attributes, or
6866 * - entities (other than amp, lt, gt, apos, quot), if references
6867 * to those entities appear in the document, or
6868 * - attributes with values subject to normalization, where the
6869 * attribute appears in the document with a value which will change
6870 * as a result of normalization, or
6871 * - element types with element content, if white space occurs directly
6872 * within any instance of those types.
6873 *
6874 * Returns 1 if standalone, 0 otherwise
6875 */
6876
6877int
6878xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6879 int standalone = -1;
6880
6881 SKIP_BLANKS;
6882 if ((RAW == 's') && (NXT(1) == 't') &&
6883 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6884 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6885 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6886 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6887 SKIP(10);
6888 SKIP_BLANKS;
6889 if (RAW != '=') {
6890 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6892 ctxt->sax->error(ctxt->userData,
6893 "XML standalone declaration : expected '='\n");
6894 ctxt->wellFormed = 0;
6895 ctxt->disableSAX = 1;
6896 return(standalone);
6897 }
6898 NEXT;
6899 SKIP_BLANKS;
6900 if (RAW == '\''){
6901 NEXT;
6902 if ((RAW == 'n') && (NXT(1) == 'o')) {
6903 standalone = 0;
6904 SKIP(2);
6905 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6906 (NXT(2) == 's')) {
6907 standalone = 1;
6908 SKIP(3);
6909 } else {
6910 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6912 ctxt->sax->error(ctxt->userData,
6913 "standalone accepts only 'yes' or 'no'\n");
6914 ctxt->wellFormed = 0;
6915 ctxt->disableSAX = 1;
6916 }
6917 if (RAW != '\'') {
6918 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6920 ctxt->sax->error(ctxt->userData, "String not closed\n");
6921 ctxt->wellFormed = 0;
6922 ctxt->disableSAX = 1;
6923 } else
6924 NEXT;
6925 } else if (RAW == '"'){
6926 NEXT;
6927 if ((RAW == 'n') && (NXT(1) == 'o')) {
6928 standalone = 0;
6929 SKIP(2);
6930 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6931 (NXT(2) == 's')) {
6932 standalone = 1;
6933 SKIP(3);
6934 } else {
6935 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6937 ctxt->sax->error(ctxt->userData,
6938 "standalone accepts only 'yes' or 'no'\n");
6939 ctxt->wellFormed = 0;
6940 ctxt->disableSAX = 1;
6941 }
6942 if (RAW != '"') {
6943 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6945 ctxt->sax->error(ctxt->userData, "String not closed\n");
6946 ctxt->wellFormed = 0;
6947 ctxt->disableSAX = 1;
6948 } else
6949 NEXT;
6950 } else {
6951 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6953 ctxt->sax->error(ctxt->userData,
6954 "Standalone value not found\n");
6955 ctxt->wellFormed = 0;
6956 ctxt->disableSAX = 1;
6957 }
6958 }
6959 return(standalone);
6960}
6961
6962/**
6963 * xmlParseXMLDecl:
6964 * @ctxt: an XML parser context
6965 *
6966 * parse an XML declaration header
6967 *
6968 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
6969 */
6970
6971void
6972xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
6973 xmlChar *version;
6974
6975 /*
6976 * We know that '<?xml' is here.
6977 */
6978 SKIP(5);
6979
6980 if (!IS_BLANK(RAW)) {
6981 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6983 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
6984 ctxt->wellFormed = 0;
6985 ctxt->disableSAX = 1;
6986 }
6987 SKIP_BLANKS;
6988
6989 /*
6990 * We should have the VersionInfo here.
6991 */
6992 version = xmlParseVersionInfo(ctxt);
6993 if (version == NULL)
6994 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6995 ctxt->version = xmlStrdup(version);
6996 xmlFree(version);
6997
6998 /*
6999 * We may have the encoding declaration
7000 */
7001 if (!IS_BLANK(RAW)) {
7002 if ((RAW == '?') && (NXT(1) == '>')) {
7003 SKIP(2);
7004 return;
7005 }
7006 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7008 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7009 ctxt->wellFormed = 0;
7010 ctxt->disableSAX = 1;
7011 }
7012 xmlParseEncodingDecl(ctxt);
7013 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7014 /*
7015 * The XML REC instructs us to stop parsing right here
7016 */
7017 return;
7018 }
7019
7020 /*
7021 * We may have the standalone status.
7022 */
7023 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7024 if ((RAW == '?') && (NXT(1) == '>')) {
7025 SKIP(2);
7026 return;
7027 }
7028 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7030 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7031 ctxt->wellFormed = 0;
7032 ctxt->disableSAX = 1;
7033 }
7034 SKIP_BLANKS;
7035 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7036
7037 SKIP_BLANKS;
7038 if ((RAW == '?') && (NXT(1) == '>')) {
7039 SKIP(2);
7040 } else if (RAW == '>') {
7041 /* Deprecated old WD ... */
7042 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7044 ctxt->sax->error(ctxt->userData,
7045 "XML declaration must end-up with '?>'\n");
7046 ctxt->wellFormed = 0;
7047 ctxt->disableSAX = 1;
7048 NEXT;
7049 } else {
7050 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7051 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7052 ctxt->sax->error(ctxt->userData,
7053 "parsing XML declaration: '?>' expected\n");
7054 ctxt->wellFormed = 0;
7055 ctxt->disableSAX = 1;
7056 MOVETO_ENDTAG(CUR_PTR);
7057 NEXT;
7058 }
7059}
7060
7061/**
7062 * xmlParseMisc:
7063 * @ctxt: an XML parser context
7064 *
7065 * parse an XML Misc* optionnal field.
7066 *
7067 * [27] Misc ::= Comment | PI | S
7068 */
7069
7070void
7071xmlParseMisc(xmlParserCtxtPtr ctxt) {
7072 while (((RAW == '<') && (NXT(1) == '?')) ||
7073 ((RAW == '<') && (NXT(1) == '!') &&
7074 (NXT(2) == '-') && (NXT(3) == '-')) ||
7075 IS_BLANK(CUR)) {
7076 if ((RAW == '<') && (NXT(1) == '?')) {
7077 xmlParsePI(ctxt);
7078 } else if (IS_BLANK(CUR)) {
7079 NEXT;
7080 } else
7081 xmlParseComment(ctxt);
7082 }
7083}
7084
7085/**
7086 * xmlParseDocument:
7087 * @ctxt: an XML parser context
7088 *
7089 * parse an XML document (and build a tree if using the standard SAX
7090 * interface).
7091 *
7092 * [1] document ::= prolog element Misc*
7093 *
7094 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7095 *
7096 * Returns 0, -1 in case of error. the parser context is augmented
7097 * as a result of the parsing.
7098 */
7099
7100int
7101xmlParseDocument(xmlParserCtxtPtr ctxt) {
7102 xmlChar start[4];
7103 xmlCharEncoding enc;
7104
7105 xmlInitParser();
7106
7107 GROW;
7108
7109 /*
7110 * SAX: beginning of the document processing.
7111 */
7112 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7113 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7114
7115 /*
7116 * Get the 4 first bytes and decode the charset
7117 * if enc != XML_CHAR_ENCODING_NONE
7118 * plug some encoding conversion routines.
7119 */
7120 start[0] = RAW;
7121 start[1] = NXT(1);
7122 start[2] = NXT(2);
7123 start[3] = NXT(3);
7124 enc = xmlDetectCharEncoding(start, 4);
7125 if (enc != XML_CHAR_ENCODING_NONE) {
7126 xmlSwitchEncoding(ctxt, enc);
7127 }
7128
7129
7130 if (CUR == 0) {
7131 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7133 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7134 ctxt->wellFormed = 0;
7135 ctxt->disableSAX = 1;
7136 }
7137
7138 /*
7139 * Check for the XMLDecl in the Prolog.
7140 */
7141 GROW;
7142 if ((RAW == '<') && (NXT(1) == '?') &&
7143 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7144 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7145
7146 /*
7147 * Note that we will switch encoding on the fly.
7148 */
7149 xmlParseXMLDecl(ctxt);
7150 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7151 /*
7152 * The XML REC instructs us to stop parsing right here
7153 */
7154 return(-1);
7155 }
7156 ctxt->standalone = ctxt->input->standalone;
7157 SKIP_BLANKS;
7158 } else {
7159 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7160 }
7161 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7162 ctxt->sax->startDocument(ctxt->userData);
7163
7164 /*
7165 * The Misc part of the Prolog
7166 */
7167 GROW;
7168 xmlParseMisc(ctxt);
7169
7170 /*
7171 * Then possibly doc type declaration(s) and more Misc
7172 * (doctypedecl Misc*)?
7173 */
7174 GROW;
7175 if ((RAW == '<') && (NXT(1) == '!') &&
7176 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7177 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7178 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7179 (NXT(8) == 'E')) {
7180
7181 ctxt->inSubset = 1;
7182 xmlParseDocTypeDecl(ctxt);
7183 if (RAW == '[') {
7184 ctxt->instate = XML_PARSER_DTD;
7185 xmlParseInternalSubset(ctxt);
7186 }
7187
7188 /*
7189 * Create and update the external subset.
7190 */
7191 ctxt->inSubset = 2;
7192 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7193 (!ctxt->disableSAX))
7194 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7195 ctxt->extSubSystem, ctxt->extSubURI);
7196 ctxt->inSubset = 0;
7197
7198
7199 ctxt->instate = XML_PARSER_PROLOG;
7200 xmlParseMisc(ctxt);
7201 }
7202
7203 /*
7204 * Time to start parsing the tree itself
7205 */
7206 GROW;
7207 if (RAW != '<') {
7208 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7209 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7210 ctxt->sax->error(ctxt->userData,
7211 "Start tag expected, '<' not found\n");
7212 ctxt->wellFormed = 0;
7213 ctxt->disableSAX = 1;
7214 ctxt->instate = XML_PARSER_EOF;
7215 } else {
7216 ctxt->instate = XML_PARSER_CONTENT;
7217 xmlParseElement(ctxt);
7218 ctxt->instate = XML_PARSER_EPILOG;
7219
7220
7221 /*
7222 * The Misc part at the end
7223 */
7224 xmlParseMisc(ctxt);
7225
7226 if (RAW != 0) {
7227 ctxt->errNo = XML_ERR_DOCUMENT_END;
7228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7229 ctxt->sax->error(ctxt->userData,
7230 "Extra content at the end of the document\n");
7231 ctxt->wellFormed = 0;
7232 ctxt->disableSAX = 1;
7233 }
7234 ctxt->instate = XML_PARSER_EOF;
7235 }
7236
7237 /*
7238 * SAX: end of the document processing.
7239 */
7240 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7241 (!ctxt->disableSAX))
7242 ctxt->sax->endDocument(ctxt->userData);
7243
7244 if (! ctxt->wellFormed) return(-1);
7245 return(0);
7246}
7247
7248/**
7249 * xmlParseExtParsedEnt:
7250 * @ctxt: an XML parser context
7251 *
7252 * parse a genreral parsed entity
7253 * An external general parsed entity is well-formed if it matches the
7254 * production labeled extParsedEnt.
7255 *
7256 * [78] extParsedEnt ::= TextDecl? content
7257 *
7258 * Returns 0, -1 in case of error. the parser context is augmented
7259 * as a result of the parsing.
7260 */
7261
7262int
7263xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7264 xmlChar start[4];
7265 xmlCharEncoding enc;
7266
7267 xmlDefaultSAXHandlerInit();
7268
7269 GROW;
7270
7271 /*
7272 * SAX: beginning of the document processing.
7273 */
7274 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7275 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7276
7277 /*
7278 * Get the 4 first bytes and decode the charset
7279 * if enc != XML_CHAR_ENCODING_NONE
7280 * plug some encoding conversion routines.
7281 */
7282 start[0] = RAW;
7283 start[1] = NXT(1);
7284 start[2] = NXT(2);
7285 start[3] = NXT(3);
7286 enc = xmlDetectCharEncoding(start, 4);
7287 if (enc != XML_CHAR_ENCODING_NONE) {
7288 xmlSwitchEncoding(ctxt, enc);
7289 }
7290
7291
7292 if (CUR == 0) {
7293 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7294 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7295 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7296 ctxt->wellFormed = 0;
7297 ctxt->disableSAX = 1;
7298 }
7299
7300 /*
7301 * Check for the XMLDecl in the Prolog.
7302 */
7303 GROW;
7304 if ((RAW == '<') && (NXT(1) == '?') &&
7305 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7306 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7307
7308 /*
7309 * Note that we will switch encoding on the fly.
7310 */
7311 xmlParseXMLDecl(ctxt);
7312 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7313 /*
7314 * The XML REC instructs us to stop parsing right here
7315 */
7316 return(-1);
7317 }
7318 SKIP_BLANKS;
7319 } else {
7320 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7321 }
7322 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7323 ctxt->sax->startDocument(ctxt->userData);
7324
7325 /*
7326 * Doing validity checking on chunk doesn't make sense
7327 */
7328 ctxt->instate = XML_PARSER_CONTENT;
7329 ctxt->validate = 0;
7330 ctxt->loadsubset = 0;
7331 ctxt->depth = 0;
7332
7333 xmlParseContent(ctxt);
7334
7335 if ((RAW == '<') && (NXT(1) == '/')) {
7336 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7338 ctxt->sax->error(ctxt->userData,
7339 "chunk is not well balanced\n");
7340 ctxt->wellFormed = 0;
7341 ctxt->disableSAX = 1;
7342 } else if (RAW != 0) {
7343 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7344 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7345 ctxt->sax->error(ctxt->userData,
7346 "extra content at the end of well balanced chunk\n");
7347 ctxt->wellFormed = 0;
7348 ctxt->disableSAX = 1;
7349 }
7350
7351 /*
7352 * SAX: end of the document processing.
7353 */
7354 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7355 (!ctxt->disableSAX))
7356 ctxt->sax->endDocument(ctxt->userData);
7357
7358 if (! ctxt->wellFormed) return(-1);
7359 return(0);
7360}
7361
7362/************************************************************************
7363 * *
7364 * Progressive parsing interfaces *
7365 * *
7366 ************************************************************************/
7367
7368/**
7369 * xmlParseLookupSequence:
7370 * @ctxt: an XML parser context
7371 * @first: the first char to lookup
7372 * @next: the next char to lookup or zero
7373 * @third: the next char to lookup or zero
7374 *
7375 * Try to find if a sequence (first, next, third) or just (first next) or
7376 * (first) is available in the input stream.
7377 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7378 * to avoid rescanning sequences of bytes, it DOES change the state of the
7379 * parser, do not use liberally.
7380 *
7381 * Returns the index to the current parsing point if the full sequence
7382 * is available, -1 otherwise.
7383 */
7384int
7385xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7386 xmlChar next, xmlChar third) {
7387 int base, len;
7388 xmlParserInputPtr in;
7389 const xmlChar *buf;
7390
7391 in = ctxt->input;
7392 if (in == NULL) return(-1);
7393 base = in->cur - in->base;
7394 if (base < 0) return(-1);
7395 if (ctxt->checkIndex > base)
7396 base = ctxt->checkIndex;
7397 if (in->buf == NULL) {
7398 buf = in->base;
7399 len = in->length;
7400 } else {
7401 buf = in->buf->buffer->content;
7402 len = in->buf->buffer->use;
7403 }
7404 /* take into account the sequence length */
7405 if (third) len -= 2;
7406 else if (next) len --;
7407 for (;base < len;base++) {
7408 if (buf[base] == first) {
7409 if (third != 0) {
7410 if ((buf[base + 1] != next) ||
7411 (buf[base + 2] != third)) continue;
7412 } else if (next != 0) {
7413 if (buf[base + 1] != next) continue;
7414 }
7415 ctxt->checkIndex = 0;
7416#ifdef DEBUG_PUSH
7417 if (next == 0)
7418 xmlGenericError(xmlGenericErrorContext,
7419 "PP: lookup '%c' found at %d\n",
7420 first, base);
7421 else if (third == 0)
7422 xmlGenericError(xmlGenericErrorContext,
7423 "PP: lookup '%c%c' found at %d\n",
7424 first, next, base);
7425 else
7426 xmlGenericError(xmlGenericErrorContext,
7427 "PP: lookup '%c%c%c' found at %d\n",
7428 first, next, third, base);
7429#endif
7430 return(base - (in->cur - in->base));
7431 }
7432 }
7433 ctxt->checkIndex = base;
7434#ifdef DEBUG_PUSH
7435 if (next == 0)
7436 xmlGenericError(xmlGenericErrorContext,
7437 "PP: lookup '%c' failed\n", first);
7438 else if (third == 0)
7439 xmlGenericError(xmlGenericErrorContext,
7440 "PP: lookup '%c%c' failed\n", first, next);
7441 else
7442 xmlGenericError(xmlGenericErrorContext,
7443 "PP: lookup '%c%c%c' failed\n", first, next, third);
7444#endif
7445 return(-1);
7446}
7447
7448/**
7449 * xmlParseTryOrFinish:
7450 * @ctxt: an XML parser context
7451 * @terminate: last chunk indicator
7452 *
7453 * Try to progress on parsing
7454 *
7455 * Returns zero if no parsing was possible
7456 */
7457int
7458xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7459 int ret = 0;
7460 int avail;
7461 xmlChar cur, next;
7462
7463#ifdef DEBUG_PUSH
7464 switch (ctxt->instate) {
7465 case XML_PARSER_EOF:
7466 xmlGenericError(xmlGenericErrorContext,
7467 "PP: try EOF\n"); break;
7468 case XML_PARSER_START:
7469 xmlGenericError(xmlGenericErrorContext,
7470 "PP: try START\n"); break;
7471 case XML_PARSER_MISC:
7472 xmlGenericError(xmlGenericErrorContext,
7473 "PP: try MISC\n");break;
7474 case XML_PARSER_COMMENT:
7475 xmlGenericError(xmlGenericErrorContext,
7476 "PP: try COMMENT\n");break;
7477 case XML_PARSER_PROLOG:
7478 xmlGenericError(xmlGenericErrorContext,
7479 "PP: try PROLOG\n");break;
7480 case XML_PARSER_START_TAG:
7481 xmlGenericError(xmlGenericErrorContext,
7482 "PP: try START_TAG\n");break;
7483 case XML_PARSER_CONTENT:
7484 xmlGenericError(xmlGenericErrorContext,
7485 "PP: try CONTENT\n");break;
7486 case XML_PARSER_CDATA_SECTION:
7487 xmlGenericError(xmlGenericErrorContext,
7488 "PP: try CDATA_SECTION\n");break;
7489 case XML_PARSER_END_TAG:
7490 xmlGenericError(xmlGenericErrorContext,
7491 "PP: try END_TAG\n");break;
7492 case XML_PARSER_ENTITY_DECL:
7493 xmlGenericError(xmlGenericErrorContext,
7494 "PP: try ENTITY_DECL\n");break;
7495 case XML_PARSER_ENTITY_VALUE:
7496 xmlGenericError(xmlGenericErrorContext,
7497 "PP: try ENTITY_VALUE\n");break;
7498 case XML_PARSER_ATTRIBUTE_VALUE:
7499 xmlGenericError(xmlGenericErrorContext,
7500 "PP: try ATTRIBUTE_VALUE\n");break;
7501 case XML_PARSER_DTD:
7502 xmlGenericError(xmlGenericErrorContext,
7503 "PP: try DTD\n");break;
7504 case XML_PARSER_EPILOG:
7505 xmlGenericError(xmlGenericErrorContext,
7506 "PP: try EPILOG\n");break;
7507 case XML_PARSER_PI:
7508 xmlGenericError(xmlGenericErrorContext,
7509 "PP: try PI\n");break;
7510 case XML_PARSER_IGNORE:
7511 xmlGenericError(xmlGenericErrorContext,
7512 "PP: try IGNORE\n");break;
7513 }
7514#endif
7515
7516 while (1) {
7517 /*
7518 * Pop-up of finished entities.
7519 */
7520 while ((RAW == 0) && (ctxt->inputNr > 1))
7521 xmlPopInput(ctxt);
7522
7523 if (ctxt->input ==NULL) break;
7524 if (ctxt->input->buf == NULL)
7525 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7526 else
7527 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7528 if (avail < 1)
7529 goto done;
7530 switch (ctxt->instate) {
7531 case XML_PARSER_EOF:
7532 /*
7533 * Document parsing is done !
7534 */
7535 goto done;
7536 case XML_PARSER_START:
7537 /*
7538 * Very first chars read from the document flow.
7539 */
Owen Taylor3473f882001-02-23 17:55:21 +00007540 if (avail < 2)
7541 goto done;
7542
7543 cur = ctxt->input->cur[0];
7544 next = ctxt->input->cur[1];
7545 if (cur == 0) {
7546 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7547 ctxt->sax->setDocumentLocator(ctxt->userData,
7548 &xmlDefaultSAXLocator);
7549 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7551 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7552 ctxt->wellFormed = 0;
7553 ctxt->disableSAX = 1;
7554 ctxt->instate = XML_PARSER_EOF;
7555#ifdef DEBUG_PUSH
7556 xmlGenericError(xmlGenericErrorContext,
7557 "PP: entering EOF\n");
7558#endif
7559 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7560 ctxt->sax->endDocument(ctxt->userData);
7561 goto done;
7562 }
7563 if ((cur == '<') && (next == '?')) {
7564 /* PI or XML decl */
7565 if (avail < 5) return(ret);
7566 if ((!terminate) &&
7567 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7568 return(ret);
7569 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7570 ctxt->sax->setDocumentLocator(ctxt->userData,
7571 &xmlDefaultSAXLocator);
7572 if ((ctxt->input->cur[2] == 'x') &&
7573 (ctxt->input->cur[3] == 'm') &&
7574 (ctxt->input->cur[4] == 'l') &&
7575 (IS_BLANK(ctxt->input->cur[5]))) {
7576 ret += 5;
7577#ifdef DEBUG_PUSH
7578 xmlGenericError(xmlGenericErrorContext,
7579 "PP: Parsing XML Decl\n");
7580#endif
7581 xmlParseXMLDecl(ctxt);
7582 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7583 /*
7584 * The XML REC instructs us to stop parsing right
7585 * here
7586 */
7587 ctxt->instate = XML_PARSER_EOF;
7588 return(0);
7589 }
7590 ctxt->standalone = ctxt->input->standalone;
7591 if ((ctxt->encoding == NULL) &&
7592 (ctxt->input->encoding != NULL))
7593 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7594 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7595 (!ctxt->disableSAX))
7596 ctxt->sax->startDocument(ctxt->userData);
7597 ctxt->instate = XML_PARSER_MISC;
7598#ifdef DEBUG_PUSH
7599 xmlGenericError(xmlGenericErrorContext,
7600 "PP: entering MISC\n");
7601#endif
7602 } else {
7603 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7604 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7605 (!ctxt->disableSAX))
7606 ctxt->sax->startDocument(ctxt->userData);
7607 ctxt->instate = XML_PARSER_MISC;
7608#ifdef DEBUG_PUSH
7609 xmlGenericError(xmlGenericErrorContext,
7610 "PP: entering MISC\n");
7611#endif
7612 }
7613 } else {
7614 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7615 ctxt->sax->setDocumentLocator(ctxt->userData,
7616 &xmlDefaultSAXLocator);
7617 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7618 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7619 (!ctxt->disableSAX))
7620 ctxt->sax->startDocument(ctxt->userData);
7621 ctxt->instate = XML_PARSER_MISC;
7622#ifdef DEBUG_PUSH
7623 xmlGenericError(xmlGenericErrorContext,
7624 "PP: entering MISC\n");
7625#endif
7626 }
7627 break;
7628 case XML_PARSER_MISC:
7629 SKIP_BLANKS;
7630 if (ctxt->input->buf == NULL)
7631 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7632 else
7633 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7634 if (avail < 2)
7635 goto done;
7636 cur = ctxt->input->cur[0];
7637 next = ctxt->input->cur[1];
7638 if ((cur == '<') && (next == '?')) {
7639 if ((!terminate) &&
7640 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7641 goto done;
7642#ifdef DEBUG_PUSH
7643 xmlGenericError(xmlGenericErrorContext,
7644 "PP: Parsing PI\n");
7645#endif
7646 xmlParsePI(ctxt);
7647 } else if ((cur == '<') && (next == '!') &&
7648 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7649 if ((!terminate) &&
7650 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7651 goto done;
7652#ifdef DEBUG_PUSH
7653 xmlGenericError(xmlGenericErrorContext,
7654 "PP: Parsing Comment\n");
7655#endif
7656 xmlParseComment(ctxt);
7657 ctxt->instate = XML_PARSER_MISC;
7658 } else if ((cur == '<') && (next == '!') &&
7659 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7660 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7661 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7662 (ctxt->input->cur[8] == 'E')) {
7663 if ((!terminate) &&
7664 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7665 goto done;
7666#ifdef DEBUG_PUSH
7667 xmlGenericError(xmlGenericErrorContext,
7668 "PP: Parsing internal subset\n");
7669#endif
7670 ctxt->inSubset = 1;
7671 xmlParseDocTypeDecl(ctxt);
7672 if (RAW == '[') {
7673 ctxt->instate = XML_PARSER_DTD;
7674#ifdef DEBUG_PUSH
7675 xmlGenericError(xmlGenericErrorContext,
7676 "PP: entering DTD\n");
7677#endif
7678 } else {
7679 /*
7680 * Create and update the external subset.
7681 */
7682 ctxt->inSubset = 2;
7683 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7684 (ctxt->sax->externalSubset != NULL))
7685 ctxt->sax->externalSubset(ctxt->userData,
7686 ctxt->intSubName, ctxt->extSubSystem,
7687 ctxt->extSubURI);
7688 ctxt->inSubset = 0;
7689 ctxt->instate = XML_PARSER_PROLOG;
7690#ifdef DEBUG_PUSH
7691 xmlGenericError(xmlGenericErrorContext,
7692 "PP: entering PROLOG\n");
7693#endif
7694 }
7695 } else if ((cur == '<') && (next == '!') &&
7696 (avail < 9)) {
7697 goto done;
7698 } else {
7699 ctxt->instate = XML_PARSER_START_TAG;
7700#ifdef DEBUG_PUSH
7701 xmlGenericError(xmlGenericErrorContext,
7702 "PP: entering START_TAG\n");
7703#endif
7704 }
7705 break;
7706 case XML_PARSER_IGNORE:
7707 xmlGenericError(xmlGenericErrorContext,
7708 "PP: internal error, state == IGNORE");
7709 ctxt->instate = XML_PARSER_DTD;
7710#ifdef DEBUG_PUSH
7711 xmlGenericError(xmlGenericErrorContext,
7712 "PP: entering DTD\n");
7713#endif
7714 break;
7715 case XML_PARSER_PROLOG:
7716 SKIP_BLANKS;
7717 if (ctxt->input->buf == NULL)
7718 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7719 else
7720 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7721 if (avail < 2)
7722 goto done;
7723 cur = ctxt->input->cur[0];
7724 next = ctxt->input->cur[1];
7725 if ((cur == '<') && (next == '?')) {
7726 if ((!terminate) &&
7727 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7728 goto done;
7729#ifdef DEBUG_PUSH
7730 xmlGenericError(xmlGenericErrorContext,
7731 "PP: Parsing PI\n");
7732#endif
7733 xmlParsePI(ctxt);
7734 } else if ((cur == '<') && (next == '!') &&
7735 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7736 if ((!terminate) &&
7737 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7738 goto done;
7739#ifdef DEBUG_PUSH
7740 xmlGenericError(xmlGenericErrorContext,
7741 "PP: Parsing Comment\n");
7742#endif
7743 xmlParseComment(ctxt);
7744 ctxt->instate = XML_PARSER_PROLOG;
7745 } else if ((cur == '<') && (next == '!') &&
7746 (avail < 4)) {
7747 goto done;
7748 } else {
7749 ctxt->instate = XML_PARSER_START_TAG;
7750#ifdef DEBUG_PUSH
7751 xmlGenericError(xmlGenericErrorContext,
7752 "PP: entering START_TAG\n");
7753#endif
7754 }
7755 break;
7756 case XML_PARSER_EPILOG:
7757 SKIP_BLANKS;
7758 if (ctxt->input->buf == NULL)
7759 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7760 else
7761 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7762 if (avail < 2)
7763 goto done;
7764 cur = ctxt->input->cur[0];
7765 next = ctxt->input->cur[1];
7766 if ((cur == '<') && (next == '?')) {
7767 if ((!terminate) &&
7768 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7769 goto done;
7770#ifdef DEBUG_PUSH
7771 xmlGenericError(xmlGenericErrorContext,
7772 "PP: Parsing PI\n");
7773#endif
7774 xmlParsePI(ctxt);
7775 ctxt->instate = XML_PARSER_EPILOG;
7776 } else if ((cur == '<') && (next == '!') &&
7777 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7778 if ((!terminate) &&
7779 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7780 goto done;
7781#ifdef DEBUG_PUSH
7782 xmlGenericError(xmlGenericErrorContext,
7783 "PP: Parsing Comment\n");
7784#endif
7785 xmlParseComment(ctxt);
7786 ctxt->instate = XML_PARSER_EPILOG;
7787 } else if ((cur == '<') && (next == '!') &&
7788 (avail < 4)) {
7789 goto done;
7790 } else {
7791 ctxt->errNo = XML_ERR_DOCUMENT_END;
7792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7793 ctxt->sax->error(ctxt->userData,
7794 "Extra content at the end of the document\n");
7795 ctxt->wellFormed = 0;
7796 ctxt->disableSAX = 1;
7797 ctxt->instate = XML_PARSER_EOF;
7798#ifdef DEBUG_PUSH
7799 xmlGenericError(xmlGenericErrorContext,
7800 "PP: entering EOF\n");
7801#endif
7802 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7803 (!ctxt->disableSAX))
7804 ctxt->sax->endDocument(ctxt->userData);
7805 goto done;
7806 }
7807 break;
7808 case XML_PARSER_START_TAG: {
7809 xmlChar *name, *oldname;
7810
7811 if ((avail < 2) && (ctxt->inputNr == 1))
7812 goto done;
7813 cur = ctxt->input->cur[0];
7814 if (cur != '<') {
7815 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7817 ctxt->sax->error(ctxt->userData,
7818 "Start tag expect, '<' not found\n");
7819 ctxt->wellFormed = 0;
7820 ctxt->disableSAX = 1;
7821 ctxt->instate = XML_PARSER_EOF;
7822#ifdef DEBUG_PUSH
7823 xmlGenericError(xmlGenericErrorContext,
7824 "PP: entering EOF\n");
7825#endif
7826 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7827 (!ctxt->disableSAX))
7828 ctxt->sax->endDocument(ctxt->userData);
7829 goto done;
7830 }
7831 if ((!terminate) &&
7832 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7833 goto done;
7834 if (ctxt->spaceNr == 0)
7835 spacePush(ctxt, -1);
7836 else
7837 spacePush(ctxt, *ctxt->space);
7838 name = xmlParseStartTag(ctxt);
7839 if (name == NULL) {
7840 spacePop(ctxt);
7841 ctxt->instate = XML_PARSER_EOF;
7842#ifdef DEBUG_PUSH
7843 xmlGenericError(xmlGenericErrorContext,
7844 "PP: entering EOF\n");
7845#endif
7846 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7847 (!ctxt->disableSAX))
7848 ctxt->sax->endDocument(ctxt->userData);
7849 goto done;
7850 }
7851 namePush(ctxt, xmlStrdup(name));
7852
7853 /*
7854 * [ VC: Root Element Type ]
7855 * The Name in the document type declaration must match
7856 * the element type of the root element.
7857 */
7858 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7859 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7860 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7861
7862 /*
7863 * Check for an Empty Element.
7864 */
7865 if ((RAW == '/') && (NXT(1) == '>')) {
7866 SKIP(2);
7867 if ((ctxt->sax != NULL) &&
7868 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7869 ctxt->sax->endElement(ctxt->userData, name);
7870 xmlFree(name);
7871 oldname = namePop(ctxt);
7872 spacePop(ctxt);
7873 if (oldname != NULL) {
7874#ifdef DEBUG_STACK
7875 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7876#endif
7877 xmlFree(oldname);
7878 }
7879 if (ctxt->name == NULL) {
7880 ctxt->instate = XML_PARSER_EPILOG;
7881#ifdef DEBUG_PUSH
7882 xmlGenericError(xmlGenericErrorContext,
7883 "PP: entering EPILOG\n");
7884#endif
7885 } else {
7886 ctxt->instate = XML_PARSER_CONTENT;
7887#ifdef DEBUG_PUSH
7888 xmlGenericError(xmlGenericErrorContext,
7889 "PP: entering CONTENT\n");
7890#endif
7891 }
7892 break;
7893 }
7894 if (RAW == '>') {
7895 NEXT;
7896 } else {
7897 ctxt->errNo = XML_ERR_GT_REQUIRED;
7898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7899 ctxt->sax->error(ctxt->userData,
7900 "Couldn't find end of Start Tag %s\n",
7901 name);
7902 ctxt->wellFormed = 0;
7903 ctxt->disableSAX = 1;
7904
7905 /*
7906 * end of parsing of this node.
7907 */
7908 nodePop(ctxt);
7909 oldname = namePop(ctxt);
7910 spacePop(ctxt);
7911 if (oldname != NULL) {
7912#ifdef DEBUG_STACK
7913 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7914#endif
7915 xmlFree(oldname);
7916 }
7917 }
7918 xmlFree(name);
7919 ctxt->instate = XML_PARSER_CONTENT;
7920#ifdef DEBUG_PUSH
7921 xmlGenericError(xmlGenericErrorContext,
7922 "PP: entering CONTENT\n");
7923#endif
7924 break;
7925 }
7926 case XML_PARSER_CONTENT: {
7927 const xmlChar *test;
7928 int cons;
7929 xmlChar tok;
7930
7931 /*
7932 * Handle preparsed entities and charRef
7933 */
7934 if (ctxt->token != 0) {
7935 xmlChar cur[2] = { 0 , 0 } ;
7936
7937 cur[0] = (xmlChar) ctxt->token;
7938 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7939 (ctxt->sax->characters != NULL))
7940 ctxt->sax->characters(ctxt->userData, cur, 1);
7941 ctxt->token = 0;
7942 }
7943 if ((avail < 2) && (ctxt->inputNr == 1))
7944 goto done;
7945 cur = ctxt->input->cur[0];
7946 next = ctxt->input->cur[1];
7947
7948 test = CUR_PTR;
7949 cons = ctxt->input->consumed;
7950 tok = ctxt->token;
7951 if ((cur == '<') && (next == '?')) {
7952 if ((!terminate) &&
7953 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7954 goto done;
7955#ifdef DEBUG_PUSH
7956 xmlGenericError(xmlGenericErrorContext,
7957 "PP: Parsing PI\n");
7958#endif
7959 xmlParsePI(ctxt);
7960 } else if ((cur == '<') && (next == '!') &&
7961 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7962 if ((!terminate) &&
7963 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7964 goto done;
7965#ifdef DEBUG_PUSH
7966 xmlGenericError(xmlGenericErrorContext,
7967 "PP: Parsing Comment\n");
7968#endif
7969 xmlParseComment(ctxt);
7970 ctxt->instate = XML_PARSER_CONTENT;
7971 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7972 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7973 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7974 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7975 (ctxt->input->cur[8] == '[')) {
7976 SKIP(9);
7977 ctxt->instate = XML_PARSER_CDATA_SECTION;
7978#ifdef DEBUG_PUSH
7979 xmlGenericError(xmlGenericErrorContext,
7980 "PP: entering CDATA_SECTION\n");
7981#endif
7982 break;
7983 } else if ((cur == '<') && (next == '!') &&
7984 (avail < 9)) {
7985 goto done;
7986 } else if ((cur == '<') && (next == '/')) {
7987 ctxt->instate = XML_PARSER_END_TAG;
7988#ifdef DEBUG_PUSH
7989 xmlGenericError(xmlGenericErrorContext,
7990 "PP: entering END_TAG\n");
7991#endif
7992 break;
7993 } else if (cur == '<') {
7994 ctxt->instate = XML_PARSER_START_TAG;
7995#ifdef DEBUG_PUSH
7996 xmlGenericError(xmlGenericErrorContext,
7997 "PP: entering START_TAG\n");
7998#endif
7999 break;
8000 } else if (cur == '&') {
8001 if ((!terminate) &&
8002 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8003 goto done;
8004#ifdef DEBUG_PUSH
8005 xmlGenericError(xmlGenericErrorContext,
8006 "PP: Parsing Reference\n");
8007#endif
8008 xmlParseReference(ctxt);
8009 } else {
8010 /* TODO Avoid the extra copy, handle directly !!! */
8011 /*
8012 * Goal of the following test is:
8013 * - minimize calls to the SAX 'character' callback
8014 * when they are mergeable
8015 * - handle an problem for isBlank when we only parse
8016 * a sequence of blank chars and the next one is
8017 * not available to check against '<' presence.
8018 * - tries to homogenize the differences in SAX
8019 * callbacks beween the push and pull versions
8020 * of the parser.
8021 */
8022 if ((ctxt->inputNr == 1) &&
8023 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8024 if ((!terminate) &&
8025 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8026 goto done;
8027 }
8028 ctxt->checkIndex = 0;
8029#ifdef DEBUG_PUSH
8030 xmlGenericError(xmlGenericErrorContext,
8031 "PP: Parsing char data\n");
8032#endif
8033 xmlParseCharData(ctxt, 0);
8034 }
8035 /*
8036 * Pop-up of finished entities.
8037 */
8038 while ((RAW == 0) && (ctxt->inputNr > 1))
8039 xmlPopInput(ctxt);
8040 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8041 (tok == ctxt->token)) {
8042 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8044 ctxt->sax->error(ctxt->userData,
8045 "detected an error in element content\n");
8046 ctxt->wellFormed = 0;
8047 ctxt->disableSAX = 1;
8048 ctxt->instate = XML_PARSER_EOF;
8049 break;
8050 }
8051 break;
8052 }
8053 case XML_PARSER_CDATA_SECTION: {
8054 /*
8055 * The Push mode need to have the SAX callback for
8056 * cdataBlock merge back contiguous callbacks.
8057 */
8058 int base;
8059
8060 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8061 if (base < 0) {
8062 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8063 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8064 if (ctxt->sax->cdataBlock != NULL)
8065 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8066 XML_PARSER_BIG_BUFFER_SIZE);
8067 }
8068 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8069 ctxt->checkIndex = 0;
8070 }
8071 goto done;
8072 } else {
8073 if ((ctxt->sax != NULL) && (base > 0) &&
8074 (!ctxt->disableSAX)) {
8075 if (ctxt->sax->cdataBlock != NULL)
8076 ctxt->sax->cdataBlock(ctxt->userData,
8077 ctxt->input->cur, base);
8078 }
8079 SKIP(base + 3);
8080 ctxt->checkIndex = 0;
8081 ctxt->instate = XML_PARSER_CONTENT;
8082#ifdef DEBUG_PUSH
8083 xmlGenericError(xmlGenericErrorContext,
8084 "PP: entering CONTENT\n");
8085#endif
8086 }
8087 break;
8088 }
8089 case XML_PARSER_END_TAG:
8090 if (avail < 2)
8091 goto done;
8092 if ((!terminate) &&
8093 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8094 goto done;
8095 xmlParseEndTag(ctxt);
8096 if (ctxt->name == NULL) {
8097 ctxt->instate = XML_PARSER_EPILOG;
8098#ifdef DEBUG_PUSH
8099 xmlGenericError(xmlGenericErrorContext,
8100 "PP: entering EPILOG\n");
8101#endif
8102 } else {
8103 ctxt->instate = XML_PARSER_CONTENT;
8104#ifdef DEBUG_PUSH
8105 xmlGenericError(xmlGenericErrorContext,
8106 "PP: entering CONTENT\n");
8107#endif
8108 }
8109 break;
8110 case XML_PARSER_DTD: {
8111 /*
8112 * Sorry but progressive parsing of the internal subset
8113 * is not expected to be supported. We first check that
8114 * the full content of the internal subset is available and
8115 * the parsing is launched only at that point.
8116 * Internal subset ends up with "']' S? '>'" in an unescaped
8117 * section and not in a ']]>' sequence which are conditional
8118 * sections (whoever argued to keep that crap in XML deserve
8119 * a place in hell !).
8120 */
8121 int base, i;
8122 xmlChar *buf;
8123 xmlChar quote = 0;
8124
8125 base = ctxt->input->cur - ctxt->input->base;
8126 if (base < 0) return(0);
8127 if (ctxt->checkIndex > base)
8128 base = ctxt->checkIndex;
8129 buf = ctxt->input->buf->buffer->content;
8130 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8131 base++) {
8132 if (quote != 0) {
8133 if (buf[base] == quote)
8134 quote = 0;
8135 continue;
8136 }
8137 if (buf[base] == '"') {
8138 quote = '"';
8139 continue;
8140 }
8141 if (buf[base] == '\'') {
8142 quote = '\'';
8143 continue;
8144 }
8145 if (buf[base] == ']') {
8146 if ((unsigned int) base +1 >=
8147 ctxt->input->buf->buffer->use)
8148 break;
8149 if (buf[base + 1] == ']') {
8150 /* conditional crap, skip both ']' ! */
8151 base++;
8152 continue;
8153 }
8154 for (i = 0;
8155 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8156 i++) {
8157 if (buf[base + i] == '>')
8158 goto found_end_int_subset;
8159 }
8160 break;
8161 }
8162 }
8163 /*
8164 * We didn't found the end of the Internal subset
8165 */
8166 if (quote == 0)
8167 ctxt->checkIndex = base;
8168#ifdef DEBUG_PUSH
8169 if (next == 0)
8170 xmlGenericError(xmlGenericErrorContext,
8171 "PP: lookup of int subset end filed\n");
8172#endif
8173 goto done;
8174
8175found_end_int_subset:
8176 xmlParseInternalSubset(ctxt);
8177 ctxt->inSubset = 2;
8178 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8179 (ctxt->sax->externalSubset != NULL))
8180 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8181 ctxt->extSubSystem, ctxt->extSubURI);
8182 ctxt->inSubset = 0;
8183 ctxt->instate = XML_PARSER_PROLOG;
8184 ctxt->checkIndex = 0;
8185#ifdef DEBUG_PUSH
8186 xmlGenericError(xmlGenericErrorContext,
8187 "PP: entering PROLOG\n");
8188#endif
8189 break;
8190 }
8191 case XML_PARSER_COMMENT:
8192 xmlGenericError(xmlGenericErrorContext,
8193 "PP: internal error, state == COMMENT\n");
8194 ctxt->instate = XML_PARSER_CONTENT;
8195#ifdef DEBUG_PUSH
8196 xmlGenericError(xmlGenericErrorContext,
8197 "PP: entering CONTENT\n");
8198#endif
8199 break;
8200 case XML_PARSER_PI:
8201 xmlGenericError(xmlGenericErrorContext,
8202 "PP: internal error, state == PI\n");
8203 ctxt->instate = XML_PARSER_CONTENT;
8204#ifdef DEBUG_PUSH
8205 xmlGenericError(xmlGenericErrorContext,
8206 "PP: entering CONTENT\n");
8207#endif
8208 break;
8209 case XML_PARSER_ENTITY_DECL:
8210 xmlGenericError(xmlGenericErrorContext,
8211 "PP: internal error, state == ENTITY_DECL\n");
8212 ctxt->instate = XML_PARSER_DTD;
8213#ifdef DEBUG_PUSH
8214 xmlGenericError(xmlGenericErrorContext,
8215 "PP: entering DTD\n");
8216#endif
8217 break;
8218 case XML_PARSER_ENTITY_VALUE:
8219 xmlGenericError(xmlGenericErrorContext,
8220 "PP: internal error, state == ENTITY_VALUE\n");
8221 ctxt->instate = XML_PARSER_CONTENT;
8222#ifdef DEBUG_PUSH
8223 xmlGenericError(xmlGenericErrorContext,
8224 "PP: entering DTD\n");
8225#endif
8226 break;
8227 case XML_PARSER_ATTRIBUTE_VALUE:
8228 xmlGenericError(xmlGenericErrorContext,
8229 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8230 ctxt->instate = XML_PARSER_START_TAG;
8231#ifdef DEBUG_PUSH
8232 xmlGenericError(xmlGenericErrorContext,
8233 "PP: entering START_TAG\n");
8234#endif
8235 break;
8236 case XML_PARSER_SYSTEM_LITERAL:
8237 xmlGenericError(xmlGenericErrorContext,
8238 "PP: internal error, state == SYSTEM_LITERAL\n");
8239 ctxt->instate = XML_PARSER_START_TAG;
8240#ifdef DEBUG_PUSH
8241 xmlGenericError(xmlGenericErrorContext,
8242 "PP: entering START_TAG\n");
8243#endif
8244 break;
8245 }
8246 }
8247done:
8248#ifdef DEBUG_PUSH
8249 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8250#endif
8251 return(ret);
8252}
8253
8254/**
8255 * xmlParseTry:
8256 * @ctxt: an XML parser context
8257 *
8258 * Try to progress on parsing
8259 *
8260 * Returns zero if no parsing was possible
8261 */
8262int
8263xmlParseTry(xmlParserCtxtPtr ctxt) {
8264 return(xmlParseTryOrFinish(ctxt, 0));
8265}
8266
8267/**
8268 * xmlParseChunk:
8269 * @ctxt: an XML parser context
8270 * @chunk: an char array
8271 * @size: the size in byte of the chunk
8272 * @terminate: last chunk indicator
8273 *
8274 * Parse a Chunk of memory
8275 *
8276 * Returns zero if no error, the xmlParserErrors otherwise.
8277 */
8278int
8279xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8280 int terminate) {
8281 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8282 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8283 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8284 int cur = ctxt->input->cur - ctxt->input->base;
8285
8286 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8287 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8288 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008289 ctxt->input->end =
8290 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008291#ifdef DEBUG_PUSH
8292 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8293#endif
8294
8295 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8296 xmlParseTryOrFinish(ctxt, terminate);
8297 } else if (ctxt->instate != XML_PARSER_EOF) {
8298 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8299 xmlParserInputBufferPtr in = ctxt->input->buf;
8300 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8301 (in->raw != NULL)) {
8302 int nbchars;
8303
8304 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8305 if (nbchars < 0) {
8306 xmlGenericError(xmlGenericErrorContext,
8307 "xmlParseChunk: encoder error\n");
8308 return(XML_ERR_INVALID_ENCODING);
8309 }
8310 }
8311 }
8312 }
8313 xmlParseTryOrFinish(ctxt, terminate);
8314 if (terminate) {
8315 /*
8316 * Check for termination
8317 */
8318 if ((ctxt->instate != XML_PARSER_EOF) &&
8319 (ctxt->instate != XML_PARSER_EPILOG)) {
8320 ctxt->errNo = XML_ERR_DOCUMENT_END;
8321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8322 ctxt->sax->error(ctxt->userData,
8323 "Extra content at the end of the document\n");
8324 ctxt->wellFormed = 0;
8325 ctxt->disableSAX = 1;
8326 }
8327 if (ctxt->instate != XML_PARSER_EOF) {
8328 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8329 (!ctxt->disableSAX))
8330 ctxt->sax->endDocument(ctxt->userData);
8331 }
8332 ctxt->instate = XML_PARSER_EOF;
8333 }
8334 return((xmlParserErrors) ctxt->errNo);
8335}
8336
8337/************************************************************************
8338 * *
8339 * I/O front end functions to the parser *
8340 * *
8341 ************************************************************************/
8342
8343/**
8344 * xmlStopParser:
8345 * @ctxt: an XML parser context
8346 *
8347 * Blocks further parser processing
8348 */
8349void
8350xmlStopParser(xmlParserCtxtPtr ctxt) {
8351 ctxt->instate = XML_PARSER_EOF;
8352 if (ctxt->input != NULL)
8353 ctxt->input->cur = BAD_CAST"";
8354}
8355
8356/**
8357 * xmlCreatePushParserCtxt:
8358 * @sax: a SAX handler
8359 * @user_data: The user data returned on SAX callbacks
8360 * @chunk: a pointer to an array of chars
8361 * @size: number of chars in the array
8362 * @filename: an optional file name or URI
8363 *
8364 * Create a parser context for using the XML parser in push mode
8365 * To allow content encoding detection, @size should be >= 4
8366 * The value of @filename is used for fetching external entities
8367 * and error/warning reports.
8368 *
8369 * Returns the new parser context or NULL
8370 */
8371xmlParserCtxtPtr
8372xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8373 const char *chunk, int size, const char *filename) {
8374 xmlParserCtxtPtr ctxt;
8375 xmlParserInputPtr inputStream;
8376 xmlParserInputBufferPtr buf;
8377 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8378
8379 /*
8380 * plug some encoding conversion routines
8381 */
8382 if ((chunk != NULL) && (size >= 4))
8383 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8384
8385 buf = xmlAllocParserInputBuffer(enc);
8386 if (buf == NULL) return(NULL);
8387
8388 ctxt = xmlNewParserCtxt();
8389 if (ctxt == NULL) {
8390 xmlFree(buf);
8391 return(NULL);
8392 }
8393 if (sax != NULL) {
8394 if (ctxt->sax != &xmlDefaultSAXHandler)
8395 xmlFree(ctxt->sax);
8396 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8397 if (ctxt->sax == NULL) {
8398 xmlFree(buf);
8399 xmlFree(ctxt);
8400 return(NULL);
8401 }
8402 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8403 if (user_data != NULL)
8404 ctxt->userData = user_data;
8405 }
8406 if (filename == NULL) {
8407 ctxt->directory = NULL;
8408 } else {
8409 ctxt->directory = xmlParserGetDirectory(filename);
8410 }
8411
8412 inputStream = xmlNewInputStream(ctxt);
8413 if (inputStream == NULL) {
8414 xmlFreeParserCtxt(ctxt);
8415 return(NULL);
8416 }
8417
8418 if (filename == NULL)
8419 inputStream->filename = NULL;
8420 else
8421 inputStream->filename = xmlMemStrdup(filename);
8422 inputStream->buf = buf;
8423 inputStream->base = inputStream->buf->buffer->content;
8424 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008425 inputStream->end =
8426 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008427 if (enc != XML_CHAR_ENCODING_NONE) {
8428 xmlSwitchEncoding(ctxt, enc);
8429 }
8430
8431 inputPush(ctxt, inputStream);
8432
8433 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8434 (ctxt->input->buf != NULL)) {
8435 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8436#ifdef DEBUG_PUSH
8437 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8438#endif
8439 }
8440
8441 return(ctxt);
8442}
8443
8444/**
8445 * xmlCreateIOParserCtxt:
8446 * @sax: a SAX handler
8447 * @user_data: The user data returned on SAX callbacks
8448 * @ioread: an I/O read function
8449 * @ioclose: an I/O close function
8450 * @ioctx: an I/O handler
8451 * @enc: the charset encoding if known
8452 *
8453 * Create a parser context for using the XML parser with an existing
8454 * I/O stream
8455 *
8456 * Returns the new parser context or NULL
8457 */
8458xmlParserCtxtPtr
8459xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8460 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8461 void *ioctx, xmlCharEncoding enc) {
8462 xmlParserCtxtPtr ctxt;
8463 xmlParserInputPtr inputStream;
8464 xmlParserInputBufferPtr buf;
8465
8466 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8467 if (buf == NULL) return(NULL);
8468
8469 ctxt = xmlNewParserCtxt();
8470 if (ctxt == NULL) {
8471 xmlFree(buf);
8472 return(NULL);
8473 }
8474 if (sax != NULL) {
8475 if (ctxt->sax != &xmlDefaultSAXHandler)
8476 xmlFree(ctxt->sax);
8477 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8478 if (ctxt->sax == NULL) {
8479 xmlFree(buf);
8480 xmlFree(ctxt);
8481 return(NULL);
8482 }
8483 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8484 if (user_data != NULL)
8485 ctxt->userData = user_data;
8486 }
8487
8488 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8489 if (inputStream == NULL) {
8490 xmlFreeParserCtxt(ctxt);
8491 return(NULL);
8492 }
8493 inputPush(ctxt, inputStream);
8494
8495 return(ctxt);
8496}
8497
8498/************************************************************************
8499 * *
8500 * Front ends when parsing a Dtd *
8501 * *
8502 ************************************************************************/
8503
8504/**
8505 * xmlIOParseDTD:
8506 * @sax: the SAX handler block or NULL
8507 * @input: an Input Buffer
8508 * @enc: the charset encoding if known
8509 *
8510 * Load and parse a DTD
8511 *
8512 * Returns the resulting xmlDtdPtr or NULL in case of error.
8513 * @input will be freed at parsing end.
8514 */
8515
8516xmlDtdPtr
8517xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8518 xmlCharEncoding enc) {
8519 xmlDtdPtr ret = NULL;
8520 xmlParserCtxtPtr ctxt;
8521 xmlParserInputPtr pinput = NULL;
8522
8523 if (input == NULL)
8524 return(NULL);
8525
8526 ctxt = xmlNewParserCtxt();
8527 if (ctxt == NULL) {
8528 return(NULL);
8529 }
8530
8531 /*
8532 * Set-up the SAX context
8533 */
8534 if (sax != NULL) {
8535 if (ctxt->sax != NULL)
8536 xmlFree(ctxt->sax);
8537 ctxt->sax = sax;
8538 ctxt->userData = NULL;
8539 }
8540
8541 /*
8542 * generate a parser input from the I/O handler
8543 */
8544
8545 pinput = xmlNewIOInputStream(ctxt, input, enc);
8546 if (pinput == NULL) {
8547 if (sax != NULL) ctxt->sax = NULL;
8548 xmlFreeParserCtxt(ctxt);
8549 return(NULL);
8550 }
8551
8552 /*
8553 * plug some encoding conversion routines here.
8554 */
8555 xmlPushInput(ctxt, pinput);
8556
8557 pinput->filename = NULL;
8558 pinput->line = 1;
8559 pinput->col = 1;
8560 pinput->base = ctxt->input->cur;
8561 pinput->cur = ctxt->input->cur;
8562 pinput->free = NULL;
8563
8564 /*
8565 * let's parse that entity knowing it's an external subset.
8566 */
8567 ctxt->inSubset = 2;
8568 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8569 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8570 BAD_CAST "none", BAD_CAST "none");
8571 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8572
8573 if (ctxt->myDoc != NULL) {
8574 if (ctxt->wellFormed) {
8575 ret = ctxt->myDoc->extSubset;
8576 ctxt->myDoc->extSubset = NULL;
8577 } else {
8578 ret = NULL;
8579 }
8580 xmlFreeDoc(ctxt->myDoc);
8581 ctxt->myDoc = NULL;
8582 }
8583 if (sax != NULL) ctxt->sax = NULL;
8584 xmlFreeParserCtxt(ctxt);
8585
8586 return(ret);
8587}
8588
8589/**
8590 * xmlSAXParseDTD:
8591 * @sax: the SAX handler block
8592 * @ExternalID: a NAME* containing the External ID of the DTD
8593 * @SystemID: a NAME* containing the URL to the DTD
8594 *
8595 * Load and parse an external subset.
8596 *
8597 * Returns the resulting xmlDtdPtr or NULL in case of error.
8598 */
8599
8600xmlDtdPtr
8601xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8602 const xmlChar *SystemID) {
8603 xmlDtdPtr ret = NULL;
8604 xmlParserCtxtPtr ctxt;
8605 xmlParserInputPtr input = NULL;
8606 xmlCharEncoding enc;
8607
8608 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8609
8610 ctxt = xmlNewParserCtxt();
8611 if (ctxt == NULL) {
8612 return(NULL);
8613 }
8614
8615 /*
8616 * Set-up the SAX context
8617 */
8618 if (sax != NULL) {
8619 if (ctxt->sax != NULL)
8620 xmlFree(ctxt->sax);
8621 ctxt->sax = sax;
8622 ctxt->userData = NULL;
8623 }
8624
8625 /*
8626 * Ask the Entity resolver to load the damn thing
8627 */
8628
8629 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8630 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8631 if (input == NULL) {
8632 if (sax != NULL) ctxt->sax = NULL;
8633 xmlFreeParserCtxt(ctxt);
8634 return(NULL);
8635 }
8636
8637 /*
8638 * plug some encoding conversion routines here.
8639 */
8640 xmlPushInput(ctxt, input);
8641 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8642 xmlSwitchEncoding(ctxt, enc);
8643
8644 if (input->filename == NULL)
8645 input->filename = (char *) xmlStrdup(SystemID);
8646 input->line = 1;
8647 input->col = 1;
8648 input->base = ctxt->input->cur;
8649 input->cur = ctxt->input->cur;
8650 input->free = NULL;
8651
8652 /*
8653 * let's parse that entity knowing it's an external subset.
8654 */
8655 ctxt->inSubset = 2;
8656 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8657 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8658 ExternalID, SystemID);
8659 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8660
8661 if (ctxt->myDoc != NULL) {
8662 if (ctxt->wellFormed) {
8663 ret = ctxt->myDoc->extSubset;
8664 ctxt->myDoc->extSubset = NULL;
8665 } else {
8666 ret = NULL;
8667 }
8668 xmlFreeDoc(ctxt->myDoc);
8669 ctxt->myDoc = NULL;
8670 }
8671 if (sax != NULL) ctxt->sax = NULL;
8672 xmlFreeParserCtxt(ctxt);
8673
8674 return(ret);
8675}
8676
8677/**
8678 * xmlParseDTD:
8679 * @ExternalID: a NAME* containing the External ID of the DTD
8680 * @SystemID: a NAME* containing the URL to the DTD
8681 *
8682 * Load and parse an external subset.
8683 *
8684 * Returns the resulting xmlDtdPtr or NULL in case of error.
8685 */
8686
8687xmlDtdPtr
8688xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8689 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8690}
8691
8692/************************************************************************
8693 * *
8694 * Front ends when parsing an Entity *
8695 * *
8696 ************************************************************************/
8697
8698/**
8699 * xmlSAXParseBalancedChunk:
8700 * @ctx: an XML parser context (possibly NULL)
8701 * @sax: the SAX handler bloc (possibly NULL)
8702 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8703 * @input: a parser input stream
8704 * @enc: the encoding
8705 *
8706 * Parse a well-balanced chunk of an XML document
8707 * The user has to provide SAX callback block whose routines will be
8708 * called by the parser
8709 * The allowed sequence for the Well Balanced Chunk is the one defined by
8710 * the content production in the XML grammar:
8711 *
8712 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8713 *
8714 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
8715 * the error code otherwise
8716 */
8717
8718int
8719xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8720 void *user_data, xmlParserInputPtr input,
8721 xmlCharEncoding enc) {
8722 xmlParserCtxtPtr ctxt;
8723 int ret;
8724
8725 if (input == NULL) return(-1);
8726
8727 if (ctx != NULL)
8728 ctxt = ctx;
8729 else {
8730 ctxt = xmlNewParserCtxt();
8731 if (ctxt == NULL)
8732 return(-1);
8733 if (sax == NULL)
8734 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8735 }
8736
8737 /*
8738 * Set-up the SAX context
8739 */
8740 if (sax != NULL) {
8741 if (ctxt->sax != NULL)
8742 xmlFree(ctxt->sax);
8743 ctxt->sax = sax;
8744 ctxt->userData = user_data;
8745 }
8746
8747 /*
8748 * plug some encoding conversion routines here.
8749 */
8750 xmlPushInput(ctxt, input);
8751 if (enc != XML_CHAR_ENCODING_NONE)
8752 xmlSwitchEncoding(ctxt, enc);
8753
8754 /*
8755 * let's parse that entity knowing it's an external subset.
8756 */
8757 xmlParseContent(ctxt);
8758 ret = ctxt->errNo;
8759
8760 if (ctx == NULL) {
8761 if (sax != NULL)
8762 ctxt->sax = NULL;
8763 else
8764 xmlFreeDoc(ctxt->myDoc);
8765 xmlFreeParserCtxt(ctxt);
8766 }
8767 return(ret);
8768}
8769
8770/**
8771 * xmlParseCtxtExternalEntity:
8772 * @ctx: the existing parsing context
8773 * @URL: the URL for the entity to load
8774 * @ID: the System ID for the entity to load
8775 * @list: the return value for the set of parsed nodes
8776 *
8777 * Parse an external general entity within an existing parsing context
8778 * An external general parsed entity is well-formed if it matches the
8779 * production labeled extParsedEnt.
8780 *
8781 * [78] extParsedEnt ::= TextDecl? content
8782 *
8783 * Returns 0 if the entity is well formed, -1 in case of args problem and
8784 * the parser error code otherwise
8785 */
8786
8787int
8788xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8789 const xmlChar *ID, xmlNodePtr *list) {
8790 xmlParserCtxtPtr ctxt;
8791 xmlDocPtr newDoc;
8792 xmlSAXHandlerPtr oldsax = NULL;
8793 int ret = 0;
8794
8795 if (ctx->depth > 40) {
8796 return(XML_ERR_ENTITY_LOOP);
8797 }
8798
8799 if (list != NULL)
8800 *list = NULL;
8801 if ((URL == NULL) && (ID == NULL))
8802 return(-1);
8803 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8804 return(-1);
8805
8806
8807 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8808 if (ctxt == NULL) return(-1);
8809 ctxt->userData = ctxt;
8810 oldsax = ctxt->sax;
8811 ctxt->sax = ctx->sax;
8812 newDoc = xmlNewDoc(BAD_CAST "1.0");
8813 if (newDoc == NULL) {
8814 xmlFreeParserCtxt(ctxt);
8815 return(-1);
8816 }
8817 if (ctx->myDoc != NULL) {
8818 newDoc->intSubset = ctx->myDoc->intSubset;
8819 newDoc->extSubset = ctx->myDoc->extSubset;
8820 }
8821 if (ctx->myDoc->URL != NULL) {
8822 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8823 }
8824 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8825 if (newDoc->children == NULL) {
8826 ctxt->sax = oldsax;
8827 xmlFreeParserCtxt(ctxt);
8828 newDoc->intSubset = NULL;
8829 newDoc->extSubset = NULL;
8830 xmlFreeDoc(newDoc);
8831 return(-1);
8832 }
8833 nodePush(ctxt, newDoc->children);
8834 if (ctx->myDoc == NULL) {
8835 ctxt->myDoc = newDoc;
8836 } else {
8837 ctxt->myDoc = ctx->myDoc;
8838 newDoc->children->doc = ctx->myDoc;
8839 }
8840
8841 /*
8842 * Parse a possible text declaration first
8843 */
8844 GROW;
8845 if ((RAW == '<') && (NXT(1) == '?') &&
8846 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8847 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8848 xmlParseTextDecl(ctxt);
8849 }
8850
8851 /*
8852 * Doing validity checking on chunk doesn't make sense
8853 */
8854 ctxt->instate = XML_PARSER_CONTENT;
8855 ctxt->validate = ctx->validate;
8856 ctxt->loadsubset = ctx->loadsubset;
8857 ctxt->depth = ctx->depth + 1;
8858 ctxt->replaceEntities = ctx->replaceEntities;
8859 if (ctxt->validate) {
8860 ctxt->vctxt.error = ctx->vctxt.error;
8861 ctxt->vctxt.warning = ctx->vctxt.warning;
8862 /* Allocate the Node stack */
8863 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8864 if (ctxt->vctxt.nodeTab == NULL) {
8865 xmlGenericError(xmlGenericErrorContext,
8866 "xmlParseCtxtExternalEntity: out of memory\n");
8867 ctxt->validate = 0;
8868 ctxt->vctxt.error = NULL;
8869 ctxt->vctxt.warning = NULL;
8870 } else {
8871 ctxt->vctxt.nodeNr = 0;
8872 ctxt->vctxt.nodeMax = 4;
8873 ctxt->vctxt.node = NULL;
8874 }
8875 } else {
8876 ctxt->vctxt.error = NULL;
8877 ctxt->vctxt.warning = NULL;
8878 }
8879
8880 xmlParseContent(ctxt);
8881
8882 if ((RAW == '<') && (NXT(1) == '/')) {
8883 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8884 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8885 ctxt->sax->error(ctxt->userData,
8886 "chunk is not well balanced\n");
8887 ctxt->wellFormed = 0;
8888 ctxt->disableSAX = 1;
8889 } else if (RAW != 0) {
8890 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8892 ctxt->sax->error(ctxt->userData,
8893 "extra content at the end of well balanced chunk\n");
8894 ctxt->wellFormed = 0;
8895 ctxt->disableSAX = 1;
8896 }
8897 if (ctxt->node != newDoc->children) {
8898 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8899 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8900 ctxt->sax->error(ctxt->userData,
8901 "chunk is not well balanced\n");
8902 ctxt->wellFormed = 0;
8903 ctxt->disableSAX = 1;
8904 }
8905
8906 if (!ctxt->wellFormed) {
8907 if (ctxt->errNo == 0)
8908 ret = 1;
8909 else
8910 ret = ctxt->errNo;
8911 } else {
8912 if (list != NULL) {
8913 xmlNodePtr cur;
8914
8915 /*
8916 * Return the newly created nodeset after unlinking it from
8917 * they pseudo parent.
8918 */
8919 cur = newDoc->children->children;
8920 *list = cur;
8921 while (cur != NULL) {
8922 cur->parent = NULL;
8923 cur = cur->next;
8924 }
8925 newDoc->children->children = NULL;
8926 }
8927 ret = 0;
8928 }
8929 ctxt->sax = oldsax;
8930 xmlFreeParserCtxt(ctxt);
8931 newDoc->intSubset = NULL;
8932 newDoc->extSubset = NULL;
8933 xmlFreeDoc(newDoc);
8934
8935 return(ret);
8936}
8937
8938/**
8939 * xmlParseExternalEntity:
8940 * @doc: the document the chunk pertains to
8941 * @sax: the SAX handler bloc (possibly NULL)
8942 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8943 * @depth: Used for loop detection, use 0
8944 * @URL: the URL for the entity to load
8945 * @ID: the System ID for the entity to load
8946 * @list: the return value for the set of parsed nodes
8947 *
8948 * Parse an external general entity
8949 * An external general parsed entity is well-formed if it matches the
8950 * production labeled extParsedEnt.
8951 *
8952 * [78] extParsedEnt ::= TextDecl? content
8953 *
8954 * Returns 0 if the entity is well formed, -1 in case of args problem and
8955 * the parser error code otherwise
8956 */
8957
8958int
8959xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
8960 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
8961 xmlParserCtxtPtr ctxt;
8962 xmlDocPtr newDoc;
8963 xmlSAXHandlerPtr oldsax = NULL;
8964 int ret = 0;
8965
8966 if (depth > 40) {
8967 return(XML_ERR_ENTITY_LOOP);
8968 }
8969
8970
8971
8972 if (list != NULL)
8973 *list = NULL;
8974 if ((URL == NULL) && (ID == NULL))
8975 return(-1);
8976 if (doc == NULL) /* @@ relax but check for dereferences */
8977 return(-1);
8978
8979
8980 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8981 if (ctxt == NULL) return(-1);
8982 ctxt->userData = ctxt;
8983 if (sax != NULL) {
8984 oldsax = ctxt->sax;
8985 ctxt->sax = sax;
8986 if (user_data != NULL)
8987 ctxt->userData = user_data;
8988 }
8989 newDoc = xmlNewDoc(BAD_CAST "1.0");
8990 if (newDoc == NULL) {
8991 xmlFreeParserCtxt(ctxt);
8992 return(-1);
8993 }
8994 if (doc != NULL) {
8995 newDoc->intSubset = doc->intSubset;
8996 newDoc->extSubset = doc->extSubset;
8997 }
8998 if (doc->URL != NULL) {
8999 newDoc->URL = xmlStrdup(doc->URL);
9000 }
9001 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9002 if (newDoc->children == NULL) {
9003 if (sax != NULL)
9004 ctxt->sax = oldsax;
9005 xmlFreeParserCtxt(ctxt);
9006 newDoc->intSubset = NULL;
9007 newDoc->extSubset = NULL;
9008 xmlFreeDoc(newDoc);
9009 return(-1);
9010 }
9011 nodePush(ctxt, newDoc->children);
9012 if (doc == NULL) {
9013 ctxt->myDoc = newDoc;
9014 } else {
9015 ctxt->myDoc = doc;
9016 newDoc->children->doc = doc;
9017 }
9018
9019 /*
9020 * Parse a possible text declaration first
9021 */
9022 GROW;
9023 if ((RAW == '<') && (NXT(1) == '?') &&
9024 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9025 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9026 xmlParseTextDecl(ctxt);
9027 }
9028
9029 /*
9030 * Doing validity checking on chunk doesn't make sense
9031 */
9032 ctxt->instate = XML_PARSER_CONTENT;
9033 ctxt->validate = 0;
9034 ctxt->loadsubset = 0;
9035 ctxt->depth = depth;
9036
9037 xmlParseContent(ctxt);
9038
9039 if ((RAW == '<') && (NXT(1) == '/')) {
9040 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9042 ctxt->sax->error(ctxt->userData,
9043 "chunk is not well balanced\n");
9044 ctxt->wellFormed = 0;
9045 ctxt->disableSAX = 1;
9046 } else if (RAW != 0) {
9047 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9049 ctxt->sax->error(ctxt->userData,
9050 "extra content at the end of well balanced chunk\n");
9051 ctxt->wellFormed = 0;
9052 ctxt->disableSAX = 1;
9053 }
9054 if (ctxt->node != newDoc->children) {
9055 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9057 ctxt->sax->error(ctxt->userData,
9058 "chunk is not well balanced\n");
9059 ctxt->wellFormed = 0;
9060 ctxt->disableSAX = 1;
9061 }
9062
9063 if (!ctxt->wellFormed) {
9064 if (ctxt->errNo == 0)
9065 ret = 1;
9066 else
9067 ret = ctxt->errNo;
9068 } else {
9069 if (list != NULL) {
9070 xmlNodePtr cur;
9071
9072 /*
9073 * Return the newly created nodeset after unlinking it from
9074 * they pseudo parent.
9075 */
9076 cur = newDoc->children->children;
9077 *list = cur;
9078 while (cur != NULL) {
9079 cur->parent = NULL;
9080 cur = cur->next;
9081 }
9082 newDoc->children->children = NULL;
9083 }
9084 ret = 0;
9085 }
9086 if (sax != NULL)
9087 ctxt->sax = oldsax;
9088 xmlFreeParserCtxt(ctxt);
9089 newDoc->intSubset = NULL;
9090 newDoc->extSubset = NULL;
9091 xmlFreeDoc(newDoc);
9092
9093 return(ret);
9094}
9095
9096/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009097 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009098 * @doc: the document the chunk pertains to
9099 * @sax: the SAX handler bloc (possibly NULL)
9100 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9101 * @depth: Used for loop detection, use 0
9102 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9103 * @list: the return value for the set of parsed nodes
9104 *
9105 * Parse a well-balanced chunk of an XML document
9106 * called by the parser
9107 * The allowed sequence for the Well Balanced Chunk is the one defined by
9108 * the content production in the XML grammar:
9109 *
9110 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9111 *
9112 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9113 * the parser error code otherwise
9114 */
9115
9116int
9117xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9118 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9119 xmlParserCtxtPtr ctxt;
9120 xmlDocPtr newDoc;
9121 xmlSAXHandlerPtr oldsax = NULL;
9122 int size;
9123 int ret = 0;
9124
9125 if (depth > 40) {
9126 return(XML_ERR_ENTITY_LOOP);
9127 }
9128
9129
9130 if (list != NULL)
9131 *list = NULL;
9132 if (string == NULL)
9133 return(-1);
9134
9135 size = xmlStrlen(string);
9136
9137 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9138 if (ctxt == NULL) return(-1);
9139 ctxt->userData = ctxt;
9140 if (sax != NULL) {
9141 oldsax = ctxt->sax;
9142 ctxt->sax = sax;
9143 if (user_data != NULL)
9144 ctxt->userData = user_data;
9145 }
9146 newDoc = xmlNewDoc(BAD_CAST "1.0");
9147 if (newDoc == NULL) {
9148 xmlFreeParserCtxt(ctxt);
9149 return(-1);
9150 }
9151 if (doc != NULL) {
9152 newDoc->intSubset = doc->intSubset;
9153 newDoc->extSubset = doc->extSubset;
9154 }
9155 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9156 if (newDoc->children == NULL) {
9157 if (sax != NULL)
9158 ctxt->sax = oldsax;
9159 xmlFreeParserCtxt(ctxt);
9160 newDoc->intSubset = NULL;
9161 newDoc->extSubset = NULL;
9162 xmlFreeDoc(newDoc);
9163 return(-1);
9164 }
9165 nodePush(ctxt, newDoc->children);
9166 if (doc == NULL) {
9167 ctxt->myDoc = newDoc;
9168 } else {
9169 ctxt->myDoc = doc;
9170 newDoc->children->doc = doc;
9171 }
9172 ctxt->instate = XML_PARSER_CONTENT;
9173 ctxt->depth = depth;
9174
9175 /*
9176 * Doing validity checking on chunk doesn't make sense
9177 */
9178 ctxt->validate = 0;
9179 ctxt->loadsubset = 0;
9180
9181 xmlParseContent(ctxt);
9182
9183 if ((RAW == '<') && (NXT(1) == '/')) {
9184 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9186 ctxt->sax->error(ctxt->userData,
9187 "chunk is not well balanced\n");
9188 ctxt->wellFormed = 0;
9189 ctxt->disableSAX = 1;
9190 } else if (RAW != 0) {
9191 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9193 ctxt->sax->error(ctxt->userData,
9194 "extra content at the end of well balanced chunk\n");
9195 ctxt->wellFormed = 0;
9196 ctxt->disableSAX = 1;
9197 }
9198 if (ctxt->node != newDoc->children) {
9199 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9201 ctxt->sax->error(ctxt->userData,
9202 "chunk is not well balanced\n");
9203 ctxt->wellFormed = 0;
9204 ctxt->disableSAX = 1;
9205 }
9206
9207 if (!ctxt->wellFormed) {
9208 if (ctxt->errNo == 0)
9209 ret = 1;
9210 else
9211 ret = ctxt->errNo;
9212 } else {
9213 if (list != NULL) {
9214 xmlNodePtr cur;
9215
9216 /*
9217 * Return the newly created nodeset after unlinking it from
9218 * they pseudo parent.
9219 */
9220 cur = newDoc->children->children;
9221 *list = cur;
9222 while (cur != NULL) {
9223 cur->parent = NULL;
9224 cur = cur->next;
9225 }
9226 newDoc->children->children = NULL;
9227 }
9228 ret = 0;
9229 }
9230 if (sax != NULL)
9231 ctxt->sax = oldsax;
9232 xmlFreeParserCtxt(ctxt);
9233 newDoc->intSubset = NULL;
9234 newDoc->extSubset = NULL;
9235 xmlFreeDoc(newDoc);
9236
9237 return(ret);
9238}
9239
9240/**
9241 * xmlSAXParseEntity:
9242 * @sax: the SAX handler block
9243 * @filename: the filename
9244 *
9245 * parse an XML external entity out of context and build a tree.
9246 * It use the given SAX function block to handle the parsing callback.
9247 * If sax is NULL, fallback to the default DOM tree building routines.
9248 *
9249 * [78] extParsedEnt ::= TextDecl? content
9250 *
9251 * This correspond to a "Well Balanced" chunk
9252 *
9253 * Returns the resulting document tree
9254 */
9255
9256xmlDocPtr
9257xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9258 xmlDocPtr ret;
9259 xmlParserCtxtPtr ctxt;
9260 char *directory = NULL;
9261
9262 ctxt = xmlCreateFileParserCtxt(filename);
9263 if (ctxt == NULL) {
9264 return(NULL);
9265 }
9266 if (sax != NULL) {
9267 if (ctxt->sax != NULL)
9268 xmlFree(ctxt->sax);
9269 ctxt->sax = sax;
9270 ctxt->userData = NULL;
9271 }
9272
9273 if ((ctxt->directory == NULL) && (directory == NULL))
9274 directory = xmlParserGetDirectory(filename);
9275
9276 xmlParseExtParsedEnt(ctxt);
9277
9278 if (ctxt->wellFormed)
9279 ret = ctxt->myDoc;
9280 else {
9281 ret = NULL;
9282 xmlFreeDoc(ctxt->myDoc);
9283 ctxt->myDoc = NULL;
9284 }
9285 if (sax != NULL)
9286 ctxt->sax = NULL;
9287 xmlFreeParserCtxt(ctxt);
9288
9289 return(ret);
9290}
9291
9292/**
9293 * xmlParseEntity:
9294 * @filename: the filename
9295 *
9296 * parse an XML external entity out of context and build a tree.
9297 *
9298 * [78] extParsedEnt ::= TextDecl? content
9299 *
9300 * This correspond to a "Well Balanced" chunk
9301 *
9302 * Returns the resulting document tree
9303 */
9304
9305xmlDocPtr
9306xmlParseEntity(const char *filename) {
9307 return(xmlSAXParseEntity(NULL, filename));
9308}
9309
9310/**
9311 * xmlCreateEntityParserCtxt:
9312 * @URL: the entity URL
9313 * @ID: the entity PUBLIC ID
9314 * @base: a posible base for the target URI
9315 *
9316 * Create a parser context for an external entity
9317 * Automatic support for ZLIB/Compress compressed document is provided
9318 * by default if found at compile-time.
9319 *
9320 * Returns the new parser context or NULL
9321 */
9322xmlParserCtxtPtr
9323xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9324 const xmlChar *base) {
9325 xmlParserCtxtPtr ctxt;
9326 xmlParserInputPtr inputStream;
9327 char *directory = NULL;
9328 xmlChar *uri;
9329
9330 ctxt = xmlNewParserCtxt();
9331 if (ctxt == NULL) {
9332 return(NULL);
9333 }
9334
9335 uri = xmlBuildURI(URL, base);
9336
9337 if (uri == NULL) {
9338 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9339 if (inputStream == NULL) {
9340 xmlFreeParserCtxt(ctxt);
9341 return(NULL);
9342 }
9343
9344 inputPush(ctxt, inputStream);
9345
9346 if ((ctxt->directory == NULL) && (directory == NULL))
9347 directory = xmlParserGetDirectory((char *)URL);
9348 if ((ctxt->directory == NULL) && (directory != NULL))
9349 ctxt->directory = directory;
9350 } else {
9351 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9352 if (inputStream == NULL) {
9353 xmlFree(uri);
9354 xmlFreeParserCtxt(ctxt);
9355 return(NULL);
9356 }
9357
9358 inputPush(ctxt, inputStream);
9359
9360 if ((ctxt->directory == NULL) && (directory == NULL))
9361 directory = xmlParserGetDirectory((char *)uri);
9362 if ((ctxt->directory == NULL) && (directory != NULL))
9363 ctxt->directory = directory;
9364 xmlFree(uri);
9365 }
9366
9367 return(ctxt);
9368}
9369
9370/************************************************************************
9371 * *
9372 * Front ends when parsing from a file *
9373 * *
9374 ************************************************************************/
9375
9376/**
9377 * xmlCreateFileParserCtxt:
9378 * @filename: the filename
9379 *
9380 * Create a parser context for a file content.
9381 * Automatic support for ZLIB/Compress compressed document is provided
9382 * by default if found at compile-time.
9383 *
9384 * Returns the new parser context or NULL
9385 */
9386xmlParserCtxtPtr
9387xmlCreateFileParserCtxt(const char *filename)
9388{
9389 xmlParserCtxtPtr ctxt;
9390 xmlParserInputPtr inputStream;
9391 xmlParserInputBufferPtr buf;
9392 char *directory = NULL;
9393
9394 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9395 if (buf == NULL) {
9396 return(NULL);
9397 }
9398
9399 ctxt = xmlNewParserCtxt();
9400 if (ctxt == NULL) {
9401 if (xmlDefaultSAXHandler.error != NULL) {
9402 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9403 }
9404 return(NULL);
9405 }
9406
9407 inputStream = xmlNewInputStream(ctxt);
9408 if (inputStream == NULL) {
9409 xmlFreeParserCtxt(ctxt);
9410 return(NULL);
9411 }
9412
9413 inputStream->filename = xmlMemStrdup(filename);
9414 inputStream->buf = buf;
9415 inputStream->base = inputStream->buf->buffer->content;
9416 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009417 inputStream->end =
9418 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009419
9420 inputPush(ctxt, inputStream);
9421 if ((ctxt->directory == NULL) && (directory == NULL))
9422 directory = xmlParserGetDirectory(filename);
9423 if ((ctxt->directory == NULL) && (directory != NULL))
9424 ctxt->directory = directory;
9425
9426 return(ctxt);
9427}
9428
9429/**
9430 * xmlSAXParseFile:
9431 * @sax: the SAX handler block
9432 * @filename: the filename
9433 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9434 * documents
9435 *
9436 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9437 * compressed document is provided by default if found at compile-time.
9438 * It use the given SAX function block to handle the parsing callback.
9439 * If sax is NULL, fallback to the default DOM tree building routines.
9440 *
9441 * Returns the resulting document tree
9442 */
9443
9444xmlDocPtr
9445xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9446 int recovery) {
9447 xmlDocPtr ret;
9448 xmlParserCtxtPtr ctxt;
9449 char *directory = NULL;
9450
9451 ctxt = xmlCreateFileParserCtxt(filename);
9452 if (ctxt == NULL) {
9453 return(NULL);
9454 }
9455 if (sax != NULL) {
9456 if (ctxt->sax != NULL)
9457 xmlFree(ctxt->sax);
9458 ctxt->sax = sax;
9459 ctxt->userData = NULL;
9460 }
9461
9462 if ((ctxt->directory == NULL) && (directory == NULL))
9463 directory = xmlParserGetDirectory(filename);
9464 if ((ctxt->directory == NULL) && (directory != NULL))
9465 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9466
9467 xmlParseDocument(ctxt);
9468
9469 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9470 else {
9471 ret = NULL;
9472 xmlFreeDoc(ctxt->myDoc);
9473 ctxt->myDoc = NULL;
9474 }
9475 if (sax != NULL)
9476 ctxt->sax = NULL;
9477 xmlFreeParserCtxt(ctxt);
9478
9479 return(ret);
9480}
9481
9482/**
9483 * xmlRecoverDoc:
9484 * @cur: a pointer to an array of xmlChar
9485 *
9486 * parse an XML in-memory document and build a tree.
9487 * In the case the document is not Well Formed, a tree is built anyway
9488 *
9489 * Returns the resulting document tree
9490 */
9491
9492xmlDocPtr
9493xmlRecoverDoc(xmlChar *cur) {
9494 return(xmlSAXParseDoc(NULL, cur, 1));
9495}
9496
9497/**
9498 * xmlParseFile:
9499 * @filename: the filename
9500 *
9501 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9502 * compressed document is provided by default if found at compile-time.
9503 *
9504 * Returns the resulting document tree
9505 */
9506
9507xmlDocPtr
9508xmlParseFile(const char *filename) {
9509 return(xmlSAXParseFile(NULL, filename, 0));
9510}
9511
9512/**
9513 * xmlRecoverFile:
9514 * @filename: the filename
9515 *
9516 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9517 * compressed document is provided by default if found at compile-time.
9518 * In the case the document is not Well Formed, a tree is built anyway
9519 *
9520 * Returns the resulting document tree
9521 */
9522
9523xmlDocPtr
9524xmlRecoverFile(const char *filename) {
9525 return(xmlSAXParseFile(NULL, filename, 1));
9526}
9527
9528
9529/**
9530 * xmlSetupParserForBuffer:
9531 * @ctxt: an XML parser context
9532 * @buffer: a xmlChar * buffer
9533 * @filename: a file name
9534 *
9535 * Setup the parser context to parse a new buffer; Clears any prior
9536 * contents from the parser context. The buffer parameter must not be
9537 * NULL, but the filename parameter can be
9538 */
9539void
9540xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9541 const char* filename)
9542{
9543 xmlParserInputPtr input;
9544
9545 input = xmlNewInputStream(ctxt);
9546 if (input == NULL) {
9547 perror("malloc");
9548 xmlFree(ctxt);
9549 return;
9550 }
9551
9552 xmlClearParserCtxt(ctxt);
9553 if (filename != NULL)
9554 input->filename = xmlMemStrdup(filename);
9555 input->base = buffer;
9556 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009557 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009558 inputPush(ctxt, input);
9559}
9560
9561/**
9562 * xmlSAXUserParseFile:
9563 * @sax: a SAX handler
9564 * @user_data: The user data returned on SAX callbacks
9565 * @filename: a file name
9566 *
9567 * parse an XML file and call the given SAX handler routines.
9568 * Automatic support for ZLIB/Compress compressed document is provided
9569 *
9570 * Returns 0 in case of success or a error number otherwise
9571 */
9572int
9573xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9574 const char *filename) {
9575 int ret = 0;
9576 xmlParserCtxtPtr ctxt;
9577
9578 ctxt = xmlCreateFileParserCtxt(filename);
9579 if (ctxt == NULL) return -1;
9580 if (ctxt->sax != &xmlDefaultSAXHandler)
9581 xmlFree(ctxt->sax);
9582 ctxt->sax = sax;
9583 if (user_data != NULL)
9584 ctxt->userData = user_data;
9585
9586 xmlParseDocument(ctxt);
9587
9588 if (ctxt->wellFormed)
9589 ret = 0;
9590 else {
9591 if (ctxt->errNo != 0)
9592 ret = ctxt->errNo;
9593 else
9594 ret = -1;
9595 }
9596 if (sax != NULL)
9597 ctxt->sax = NULL;
9598 xmlFreeParserCtxt(ctxt);
9599
9600 return ret;
9601}
9602
9603/************************************************************************
9604 * *
9605 * Front ends when parsing from memory *
9606 * *
9607 ************************************************************************/
9608
9609/**
9610 * xmlCreateMemoryParserCtxt:
9611 * @buffer: a pointer to a char array
9612 * @size: the size of the array
9613 *
9614 * Create a parser context for an XML in-memory document.
9615 *
9616 * Returns the new parser context or NULL
9617 */
9618xmlParserCtxtPtr
9619xmlCreateMemoryParserCtxt(char *buffer, int size) {
9620 xmlParserCtxtPtr ctxt;
9621 xmlParserInputPtr input;
9622 xmlParserInputBufferPtr buf;
9623
9624 if (buffer == NULL)
9625 return(NULL);
9626 if (size <= 0)
9627 return(NULL);
9628
9629 ctxt = xmlNewParserCtxt();
9630 if (ctxt == NULL)
9631 return(NULL);
9632
9633 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9634 if (buf == NULL) return(NULL);
9635
9636 input = xmlNewInputStream(ctxt);
9637 if (input == NULL) {
9638 xmlFreeParserCtxt(ctxt);
9639 return(NULL);
9640 }
9641
9642 input->filename = NULL;
9643 input->buf = buf;
9644 input->base = input->buf->buffer->content;
9645 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009646 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009647
9648 inputPush(ctxt, input);
9649 return(ctxt);
9650}
9651
9652/**
9653 * xmlSAXParseMemory:
9654 * @sax: the SAX handler block
9655 * @buffer: an pointer to a char array
9656 * @size: the size of the array
9657 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9658 * documents
9659 *
9660 * parse an XML in-memory block and use the given SAX function block
9661 * to handle the parsing callback. If sax is NULL, fallback to the default
9662 * DOM tree building routines.
9663 *
9664 * Returns the resulting document tree
9665 */
9666xmlDocPtr
9667xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9668 xmlDocPtr ret;
9669 xmlParserCtxtPtr ctxt;
9670
9671 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9672 if (ctxt == NULL) return(NULL);
9673 if (sax != NULL) {
9674 ctxt->sax = sax;
9675 ctxt->userData = NULL;
9676 }
9677
9678 xmlParseDocument(ctxt);
9679
9680 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9681 else {
9682 ret = NULL;
9683 xmlFreeDoc(ctxt->myDoc);
9684 ctxt->myDoc = NULL;
9685 }
9686 if (sax != NULL)
9687 ctxt->sax = NULL;
9688 xmlFreeParserCtxt(ctxt);
9689
9690 return(ret);
9691}
9692
9693/**
9694 * xmlParseMemory:
9695 * @buffer: an pointer to a char array
9696 * @size: the size of the array
9697 *
9698 * parse an XML in-memory block and build a tree.
9699 *
9700 * Returns the resulting document tree
9701 */
9702
9703xmlDocPtr xmlParseMemory(char *buffer, int size) {
9704 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9705}
9706
9707/**
9708 * xmlRecoverMemory:
9709 * @buffer: an pointer to a char array
9710 * @size: the size of the array
9711 *
9712 * parse an XML in-memory block and build a tree.
9713 * In the case the document is not Well Formed, a tree is built anyway
9714 *
9715 * Returns the resulting document tree
9716 */
9717
9718xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9719 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9720}
9721
9722/**
9723 * xmlSAXUserParseMemory:
9724 * @sax: a SAX handler
9725 * @user_data: The user data returned on SAX callbacks
9726 * @buffer: an in-memory XML document input
9727 * @size: the length of the XML document in bytes
9728 *
9729 * A better SAX parsing routine.
9730 * parse an XML in-memory buffer and call the given SAX handler routines.
9731 *
9732 * Returns 0 in case of success or a error number otherwise
9733 */
9734int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9735 char *buffer, int size) {
9736 int ret = 0;
9737 xmlParserCtxtPtr ctxt;
9738 xmlSAXHandlerPtr oldsax = NULL;
9739
9740 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9741 if (ctxt == NULL) return -1;
9742 if (sax != NULL) {
9743 oldsax = ctxt->sax;
9744 ctxt->sax = sax;
9745 }
9746 ctxt->userData = user_data;
9747
9748 xmlParseDocument(ctxt);
9749
9750 if (ctxt->wellFormed)
9751 ret = 0;
9752 else {
9753 if (ctxt->errNo != 0)
9754 ret = ctxt->errNo;
9755 else
9756 ret = -1;
9757 }
9758 if (sax != NULL) {
9759 ctxt->sax = oldsax;
9760 }
9761 xmlFreeParserCtxt(ctxt);
9762
9763 return ret;
9764}
9765
9766/**
9767 * xmlCreateDocParserCtxt:
9768 * @cur: a pointer to an array of xmlChar
9769 *
9770 * Creates a parser context for an XML in-memory document.
9771 *
9772 * Returns the new parser context or NULL
9773 */
9774xmlParserCtxtPtr
9775xmlCreateDocParserCtxt(xmlChar *cur) {
9776 int len;
9777
9778 if (cur == NULL)
9779 return(NULL);
9780 len = xmlStrlen(cur);
9781 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9782}
9783
9784/**
9785 * xmlSAXParseDoc:
9786 * @sax: the SAX handler block
9787 * @cur: a pointer to an array of xmlChar
9788 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9789 * documents
9790 *
9791 * parse an XML in-memory document and build a tree.
9792 * It use the given SAX function block to handle the parsing callback.
9793 * If sax is NULL, fallback to the default DOM tree building routines.
9794 *
9795 * Returns the resulting document tree
9796 */
9797
9798xmlDocPtr
9799xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9800 xmlDocPtr ret;
9801 xmlParserCtxtPtr ctxt;
9802
9803 if (cur == NULL) return(NULL);
9804
9805
9806 ctxt = xmlCreateDocParserCtxt(cur);
9807 if (ctxt == NULL) return(NULL);
9808 if (sax != NULL) {
9809 ctxt->sax = sax;
9810 ctxt->userData = NULL;
9811 }
9812
9813 xmlParseDocument(ctxt);
9814 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9815 else {
9816 ret = NULL;
9817 xmlFreeDoc(ctxt->myDoc);
9818 ctxt->myDoc = NULL;
9819 }
9820 if (sax != NULL)
9821 ctxt->sax = NULL;
9822 xmlFreeParserCtxt(ctxt);
9823
9824 return(ret);
9825}
9826
9827/**
9828 * xmlParseDoc:
9829 * @cur: a pointer to an array of xmlChar
9830 *
9831 * parse an XML in-memory document and build a tree.
9832 *
9833 * Returns the resulting document tree
9834 */
9835
9836xmlDocPtr
9837xmlParseDoc(xmlChar *cur) {
9838 return(xmlSAXParseDoc(NULL, cur, 0));
9839}
9840
9841
9842/************************************************************************
9843 * *
9844 * Miscellaneous *
9845 * *
9846 ************************************************************************/
9847
9848#ifdef LIBXML_XPATH_ENABLED
9849#include <libxml/xpath.h>
9850#endif
9851
9852static int xmlParserInitialized = 0;
9853
9854/**
9855 * xmlInitParser:
9856 *
9857 * Initialization function for the XML parser.
9858 * This is not reentrant. Call once before processing in case of
9859 * use in multithreaded programs.
9860 */
9861
9862void
9863xmlInitParser(void) {
9864 if (xmlParserInitialized) return;
9865
9866 xmlInitCharEncodingHandlers();
9867 xmlInitializePredefinedEntities();
9868 xmlDefaultSAXHandlerInit();
9869 xmlRegisterDefaultInputCallbacks();
9870 xmlRegisterDefaultOutputCallbacks();
9871#ifdef LIBXML_HTML_ENABLED
9872 htmlInitAutoClose();
9873 htmlDefaultSAXHandlerInit();
9874#endif
9875#ifdef LIBXML_XPATH_ENABLED
9876 xmlXPathInit();
9877#endif
9878 xmlParserInitialized = 1;
9879}
9880
9881/**
9882 * xmlCleanupParser:
9883 *
9884 * Cleanup function for the XML parser. It tries to reclaim all
9885 * parsing related global memory allocated for the parser processing.
9886 * It doesn't deallocate any document related memory. Calling this
9887 * function should not prevent reusing the parser.
9888 */
9889
9890void
9891xmlCleanupParser(void) {
9892 xmlParserInitialized = 0;
9893 xmlCleanupCharEncodingHandlers();
9894 xmlCleanupPredefinedEntities();
9895}
9896
9897/**
9898 * xmlPedanticParserDefault:
9899 * @val: int 0 or 1
9900 *
9901 * Set and return the previous value for enabling pedantic warnings.
9902 *
9903 * Returns the last value for 0 for no substitution, 1 for substitution.
9904 */
9905
9906int
9907xmlPedanticParserDefault(int val) {
9908 int old = xmlPedanticParserDefaultValue;
9909
9910 xmlPedanticParserDefaultValue = val;
9911 return(old);
9912}
9913
9914/**
9915 * xmlSubstituteEntitiesDefault:
9916 * @val: int 0 or 1
9917 *
9918 * Set and return the previous value for default entity support.
9919 * Initially the parser always keep entity references instead of substituting
9920 * entity values in the output. This function has to be used to change the
9921 * default parser behaviour
9922 * SAX::subtituteEntities() has to be used for changing that on a file by
9923 * file basis.
9924 *
9925 * Returns the last value for 0 for no substitution, 1 for substitution.
9926 */
9927
9928int
9929xmlSubstituteEntitiesDefault(int val) {
9930 int old = xmlSubstituteEntitiesDefaultValue;
9931
9932 xmlSubstituteEntitiesDefaultValue = val;
9933 return(old);
9934}
9935
9936/**
9937 * xmlKeepBlanksDefault:
9938 * @val: int 0 or 1
9939 *
9940 * Set and return the previous value for default blanks text nodes support.
9941 * The 1.x version of the parser used an heuristic to try to detect
9942 * ignorable white spaces. As a result the SAX callback was generating
9943 * ignorableWhitespace() callbacks instead of characters() one, and when
9944 * using the DOM output text nodes containing those blanks were not generated.
9945 * The 2.x and later version will switch to the XML standard way and
9946 * ignorableWhitespace() are only generated when running the parser in
9947 * validating mode and when the current element doesn't allow CDATA or
9948 * mixed content.
9949 * This function is provided as a way to force the standard behaviour
9950 * on 1.X libs and to switch back to the old mode for compatibility when
9951 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9952 * by using xmlIsBlankNode() commodity function to detect the "empty"
9953 * nodes generated.
9954 * This value also affect autogeneration of indentation when saving code
9955 * if blanks sections are kept, indentation is not generated.
9956 *
9957 * Returns the last value for 0 for no substitution, 1 for substitution.
9958 */
9959
9960int
9961xmlKeepBlanksDefault(int val) {
9962 int old = xmlKeepBlanksDefaultValue;
9963
9964 xmlKeepBlanksDefaultValue = val;
9965 xmlIndentTreeOutput = !val;
9966 return(old);
9967}
9968