blob: b5352f9c7b971ff71f9f817988d547a49e0a2190 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
36#ifdef WIN32
37#include "win32config.h"
38#define XML_DIR_SEP '\\'
39#else
40#include "config.h"
41#define XML_DIR_SEP '/'
42#endif
43
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <libxml/xmlmemory.h>
48#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
57
58#ifdef HAVE_CTYPE_H
59#include <ctype.h>
60#endif
61#ifdef HAVE_STDLIB_H
62#include <stdlib.h>
63#endif
64#ifdef HAVE_SYS_STAT_H
65#include <sys/stat.h>
66#endif
67#ifdef HAVE_FCNTL_H
68#include <fcntl.h>
69#endif
70#ifdef HAVE_UNISTD_H
71#include <unistd.h>
72#endif
73#ifdef HAVE_ZLIB_H
74#include <zlib.h>
75#endif
76
77
Daniel Veillard21a0f912001-02-25 19:54:14 +000078#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000079#define XML_PARSER_BUFFER_SIZE 100
80
81/*
82 * Various global defaults for parsing
83 */
84int xmlGetWarningsDefaultValue = 1;
85int xmlParserDebugEntities = 0;
86#ifdef VMS
87int xmlSubstituteEntitiesDefaultVal = 0;
88#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
89int xmlDoValidityCheckingDefaultVal = 0;
90#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
91#else
92int xmlSubstituteEntitiesDefaultValue = 0;
93int xmlDoValidityCheckingDefaultValue = 0;
94#endif
95int xmlLoadExtDtdDefaultValue = 0;
96int xmlPedanticParserDefaultValue = 0;
97int xmlKeepBlanksDefaultValue = 1;
98
99/*
100 * List of XML prefixed PI allowed by W3C specs
101 */
102
103const char *xmlW3CPIs[] = {
104 "xml-stylesheet",
105 NULL
106};
107
108/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
109void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
113
114/************************************************************************
115 * *
116 * Parser stacks related functions and macros *
117 * *
118 ************************************************************************/
119
120xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
121 const xmlChar ** str);
122
123/*
124 * Generic function for accessing stacks in the Parser Context
125 */
126
127#define PUSH_AND_POP(scope, type, name) \
128scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
129 if (ctxt->name##Nr >= ctxt->name##Max) { \
130 ctxt->name##Max *= 2; \
131 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
132 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
133 if (ctxt->name##Tab == NULL) { \
134 xmlGenericError(xmlGenericErrorContext, \
135 "realloc failed !\n"); \
136 return(0); \
137 } \
138 } \
139 ctxt->name##Tab[ctxt->name##Nr] = value; \
140 ctxt->name = value; \
141 return(ctxt->name##Nr++); \
142} \
143scope type name##Pop(xmlParserCtxtPtr ctxt) { \
144 type ret; \
145 if (ctxt->name##Nr <= 0) return(0); \
146 ctxt->name##Nr--; \
147 if (ctxt->name##Nr > 0) \
148 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
149 else \
150 ctxt->name = NULL; \
151 ret = ctxt->name##Tab[ctxt->name##Nr]; \
152 ctxt->name##Tab[ctxt->name##Nr] = 0; \
153 return(ret); \
154} \
155
156/*
157 * Those macros actually generate the functions
158 */
159PUSH_AND_POP(extern, xmlParserInputPtr, input)
160PUSH_AND_POP(extern, xmlNodePtr, node)
161PUSH_AND_POP(extern, xmlChar*, name)
162
163int spacePush(xmlParserCtxtPtr ctxt, int val) {
164 if (ctxt->spaceNr >= ctxt->spaceMax) {
165 ctxt->spaceMax *= 2;
166 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
167 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
168 if (ctxt->spaceTab == NULL) {
169 xmlGenericError(xmlGenericErrorContext,
170 "realloc failed !\n");
171 return(0);
172 }
173 }
174 ctxt->spaceTab[ctxt->spaceNr] = val;
175 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
176 return(ctxt->spaceNr++);
177}
178
179int spacePop(xmlParserCtxtPtr ctxt) {
180 int ret;
181 if (ctxt->spaceNr <= 0) return(0);
182 ctxt->spaceNr--;
183 if (ctxt->spaceNr > 0)
184 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
185 else
186 ctxt->space = NULL;
187 ret = ctxt->spaceTab[ctxt->spaceNr];
188 ctxt->spaceTab[ctxt->spaceNr] = -1;
189 return(ret);
190}
191
192/*
193 * Macros for accessing the content. Those should be used only by the parser,
194 * and not exported.
195 *
196 * Dirty macros, i.e. one often need to make assumption on the context to
197 * use them
198 *
199 * CUR_PTR return the current pointer to the xmlChar to be parsed.
200 * To be used with extreme caution since operations consuming
201 * characters may move the input buffer to a different location !
202 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
203 * This should be used internally by the parser
204 * only to compare to ASCII values otherwise it would break when
205 * running with UTF-8 encoding.
206 * RAW same as CUR but in the input buffer, bypass any token
207 * extraction that may have been done
208 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
209 * to compare on ASCII based substring.
210 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
211 * strings within the parser.
212 *
213 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
214 *
215 * NEXT Skip to the next character, this does the proper decoding
216 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
217 * NEXTL(l) Skip l xmlChars in the input buffer
218 * CUR_CHAR(l) returns the current unicode character (int), set l
219 * to the number of xmlChars used for the encoding [0-5].
220 * CUR_SCHAR same but operate on a string instead of the context
221 * COPY_BUF copy the current unicode char to the target buffer, increment
222 * the index
223 * GROW, SHRINK handling of input buffers
224 */
225
226#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
227#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
228#define NXT(val) ctxt->input->cur[(val)]
229#define CUR_PTR ctxt->input->cur
230
231#define SKIP(val) do { \
232 ctxt->nbChars += (val),ctxt->input->cur += (val); \
233 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000234 if ((*ctxt->input->cur == 0) && \
235 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
236 xmlPopInput(ctxt); \
237 } while (0)
238
Daniel Veillard48b2f892001-02-25 16:11:03 +0000239#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000240 xmlParserInputShrink(ctxt->input); \
241 if ((*ctxt->input->cur == 0) && \
242 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
243 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000244 }
Owen Taylor3473f882001-02-23 17:55:21 +0000245
Daniel Veillard48b2f892001-02-25 16:11:03 +0000246#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000247 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
248 if ((*ctxt->input->cur == 0) && \
249 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
250 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000251 }
Owen Taylor3473f882001-02-23 17:55:21 +0000252
253#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
254
255#define NEXT xmlNextChar(ctxt)
256
Daniel Veillard21a0f912001-02-25 19:54:14 +0000257#define NEXT1 { \
258 ctxt->input->cur++; \
259 ctxt->nbChars++; \
260 if (*ctxt->input->cur == 0) \
261 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
262 }
263
Owen Taylor3473f882001-02-23 17:55:21 +0000264#define NEXTL(l) do { \
265 if (*(ctxt->input->cur) == '\n') { \
266 ctxt->input->line++; ctxt->input->col = 1; \
267 } else ctxt->input->col++; \
268 ctxt->token = 0; ctxt->input->cur += l; \
269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000270 } while (0)
271
272#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
273#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
274
275#define COPY_BUF(l,b,i,v) \
276 if (l == 1) b[i++] = (xmlChar) v; \
277 else i += xmlCopyChar(l,&b[i],v)
278
279/**
280 * xmlSkipBlankChars:
281 * @ctxt: the XML parser context
282 *
283 * skip all blanks character found at that point in the input streams.
284 * It pops up finished entities in the process if allowable at that point.
285 *
286 * Returns the number of space chars skipped
287 */
288
289int
290xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
291 int cur, res = 0;
292
293 /*
294 * It's Okay to use CUR/NEXT here since all the blanks are on
295 * the ASCII range.
296 */
297 do {
298 cur = CUR;
299 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
300 NEXT;
301 cur = CUR;
302 res++;
303 }
304 while ((cur == 0) && (ctxt->inputNr > 1) &&
305 (ctxt->instate != XML_PARSER_COMMENT)) {
306 xmlPopInput(ctxt);
307 cur = CUR;
308 }
309 /*
310 * Need to handle support of entities branching here
311 */
312 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
313 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
314 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
315 return(res);
316}
317
318/************************************************************************
319 * *
320 * Commodity functions to handle entities *
321 * *
322 ************************************************************************/
323
324/**
325 * xmlPopInput:
326 * @ctxt: an XML parser context
327 *
328 * xmlPopInput: the current input pointed by ctxt->input came to an end
329 * pop it and return the next char.
330 *
331 * Returns the current xmlChar in the parser context
332 */
333xmlChar
334xmlPopInput(xmlParserCtxtPtr ctxt) {
335 if (ctxt->inputNr == 1) return(0); /* End of main Input */
336 if (xmlParserDebugEntities)
337 xmlGenericError(xmlGenericErrorContext,
338 "Popping input %d\n", ctxt->inputNr);
339 xmlFreeInputStream(inputPop(ctxt));
340 if ((*ctxt->input->cur == 0) &&
341 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
342 return(xmlPopInput(ctxt));
343 return(CUR);
344}
345
346/**
347 * xmlPushInput:
348 * @ctxt: an XML parser context
349 * @input: an XML parser input fragment (entity, XML fragment ...).
350 *
351 * xmlPushInput: switch to a new input stream which is stacked on top
352 * of the previous one(s).
353 */
354void
355xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
356 if (input == NULL) return;
357
358 if (xmlParserDebugEntities) {
359 if ((ctxt->input != NULL) && (ctxt->input->filename))
360 xmlGenericError(xmlGenericErrorContext,
361 "%s(%d): ", ctxt->input->filename,
362 ctxt->input->line);
363 xmlGenericError(xmlGenericErrorContext,
364 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
365 }
366 inputPush(ctxt, input);
367 GROW;
368}
369
370/**
371 * xmlParseCharRef:
372 * @ctxt: an XML parser context
373 *
374 * parse Reference declarations
375 *
376 * [66] CharRef ::= '&#' [0-9]+ ';' |
377 * '&#x' [0-9a-fA-F]+ ';'
378 *
379 * [ WFC: Legal Character ]
380 * Characters referred to using character references must match the
381 * production for Char.
382 *
383 * Returns the value parsed (as an int), 0 in case of error
384 */
385int
386xmlParseCharRef(xmlParserCtxtPtr ctxt) {
387 int val = 0;
388 int count = 0;
389
390 if (ctxt->token != 0) {
391 val = ctxt->token;
392 ctxt->token = 0;
393 return(val);
394 }
395 /*
396 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
397 */
398 if ((RAW == '&') && (NXT(1) == '#') &&
399 (NXT(2) == 'x')) {
400 SKIP(3);
401 GROW;
402 while (RAW != ';') { /* loop blocked by count */
403 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
404 val = val * 16 + (CUR - '0');
405 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
406 val = val * 16 + (CUR - 'a') + 10;
407 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
408 val = val * 16 + (CUR - 'A') + 10;
409 else {
410 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
412 ctxt->sax->error(ctxt->userData,
413 "xmlParseCharRef: invalid hexadecimal value\n");
414 ctxt->wellFormed = 0;
415 ctxt->disableSAX = 1;
416 val = 0;
417 break;
418 }
419 NEXT;
420 count++;
421 }
422 if (RAW == ';') {
423 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
424 ctxt->nbChars ++;
425 ctxt->input->cur++;
426 }
427 } else if ((RAW == '&') && (NXT(1) == '#')) {
428 SKIP(2);
429 GROW;
430 while (RAW != ';') { /* loop blocked by count */
431 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
432 val = val * 10 + (CUR - '0');
433 else {
434 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
436 ctxt->sax->error(ctxt->userData,
437 "xmlParseCharRef: invalid decimal value\n");
438 ctxt->wellFormed = 0;
439 ctxt->disableSAX = 1;
440 val = 0;
441 break;
442 }
443 NEXT;
444 count++;
445 }
446 if (RAW == ';') {
447 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
448 ctxt->nbChars ++;
449 ctxt->input->cur++;
450 }
451 } else {
452 ctxt->errNo = XML_ERR_INVALID_CHARREF;
453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
454 ctxt->sax->error(ctxt->userData,
455 "xmlParseCharRef: invalid value\n");
456 ctxt->wellFormed = 0;
457 ctxt->disableSAX = 1;
458 }
459
460 /*
461 * [ WFC: Legal Character ]
462 * Characters referred to using character references must match the
463 * production for Char.
464 */
465 if (IS_CHAR(val)) {
466 return(val);
467 } else {
468 ctxt->errNo = XML_ERR_INVALID_CHAR;
469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
470 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
471 val);
472 ctxt->wellFormed = 0;
473 ctxt->disableSAX = 1;
474 }
475 return(0);
476}
477
478/**
479 * xmlParseStringCharRef:
480 * @ctxt: an XML parser context
481 * @str: a pointer to an index in the string
482 *
483 * parse Reference declarations, variant parsing from a string rather
484 * than an an input flow.
485 *
486 * [66] CharRef ::= '&#' [0-9]+ ';' |
487 * '&#x' [0-9a-fA-F]+ ';'
488 *
489 * [ WFC: Legal Character ]
490 * Characters referred to using character references must match the
491 * production for Char.
492 *
493 * Returns the value parsed (as an int), 0 in case of error, str will be
494 * updated to the current value of the index
495 */
496int
497xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
498 const xmlChar *ptr;
499 xmlChar cur;
500 int val = 0;
501
502 if ((str == NULL) || (*str == NULL)) return(0);
503 ptr = *str;
504 cur = *ptr;
505 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
506 ptr += 3;
507 cur = *ptr;
508 while (cur != ';') { /* Non input consuming loop */
509 if ((cur >= '0') && (cur <= '9'))
510 val = val * 16 + (cur - '0');
511 else if ((cur >= 'a') && (cur <= 'f'))
512 val = val * 16 + (cur - 'a') + 10;
513 else if ((cur >= 'A') && (cur <= 'F'))
514 val = val * 16 + (cur - 'A') + 10;
515 else {
516 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
518 ctxt->sax->error(ctxt->userData,
519 "xmlParseStringCharRef: invalid hexadecimal value\n");
520 ctxt->wellFormed = 0;
521 ctxt->disableSAX = 1;
522 val = 0;
523 break;
524 }
525 ptr++;
526 cur = *ptr;
527 }
528 if (cur == ';')
529 ptr++;
530 } else if ((cur == '&') && (ptr[1] == '#')){
531 ptr += 2;
532 cur = *ptr;
533 while (cur != ';') { /* Non input consuming loops */
534 if ((cur >= '0') && (cur <= '9'))
535 val = val * 10 + (cur - '0');
536 else {
537 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
539 ctxt->sax->error(ctxt->userData,
540 "xmlParseStringCharRef: invalid decimal value\n");
541 ctxt->wellFormed = 0;
542 ctxt->disableSAX = 1;
543 val = 0;
544 break;
545 }
546 ptr++;
547 cur = *ptr;
548 }
549 if (cur == ';')
550 ptr++;
551 } else {
552 ctxt->errNo = XML_ERR_INVALID_CHARREF;
553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
554 ctxt->sax->error(ctxt->userData,
555 "xmlParseCharRef: invalid value\n");
556 ctxt->wellFormed = 0;
557 ctxt->disableSAX = 1;
558 return(0);
559 }
560 *str = ptr;
561
562 /*
563 * [ WFC: Legal Character ]
564 * Characters referred to using character references must match the
565 * production for Char.
566 */
567 if (IS_CHAR(val)) {
568 return(val);
569 } else {
570 ctxt->errNo = XML_ERR_INVALID_CHAR;
571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
572 ctxt->sax->error(ctxt->userData,
573 "CharRef: invalid xmlChar value %d\n", val);
574 ctxt->wellFormed = 0;
575 ctxt->disableSAX = 1;
576 }
577 return(0);
578}
579
580/**
581 * xmlParserHandlePEReference:
582 * @ctxt: the parser context
583 *
584 * [69] PEReference ::= '%' Name ';'
585 *
586 * [ WFC: No Recursion ]
587 * A parsed entity must not contain a recursive
588 * reference to itself, either directly or indirectly.
589 *
590 * [ WFC: Entity Declared ]
591 * In a document without any DTD, a document with only an internal DTD
592 * subset which contains no parameter entity references, or a document
593 * with "standalone='yes'", ... ... The declaration of a parameter
594 * entity must precede any reference to it...
595 *
596 * [ VC: Entity Declared ]
597 * In a document with an external subset or external parameter entities
598 * with "standalone='no'", ... ... The declaration of a parameter entity
599 * must precede any reference to it...
600 *
601 * [ WFC: In DTD ]
602 * Parameter-entity references may only appear in the DTD.
603 * NOTE: misleading but this is handled.
604 *
605 * A PEReference may have been detected in the current input stream
606 * the handling is done accordingly to
607 * http://www.w3.org/TR/REC-xml#entproc
608 * i.e.
609 * - Included in literal in entity values
610 * - Included as Paraemeter Entity reference within DTDs
611 */
612void
613xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
614 xmlChar *name;
615 xmlEntityPtr entity = NULL;
616 xmlParserInputPtr input;
617
618 if (ctxt->token != 0) {
619 return;
620 }
621 if (RAW != '%') return;
622 switch(ctxt->instate) {
623 case XML_PARSER_CDATA_SECTION:
624 return;
625 case XML_PARSER_COMMENT:
626 return;
627 case XML_PARSER_START_TAG:
628 return;
629 case XML_PARSER_END_TAG:
630 return;
631 case XML_PARSER_EOF:
632 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
634 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
635 ctxt->wellFormed = 0;
636 ctxt->disableSAX = 1;
637 return;
638 case XML_PARSER_PROLOG:
639 case XML_PARSER_START:
640 case XML_PARSER_MISC:
641 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
643 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
644 ctxt->wellFormed = 0;
645 ctxt->disableSAX = 1;
646 return;
647 case XML_PARSER_ENTITY_DECL:
648 case XML_PARSER_CONTENT:
649 case XML_PARSER_ATTRIBUTE_VALUE:
650 case XML_PARSER_PI:
651 case XML_PARSER_SYSTEM_LITERAL:
652 /* we just ignore it there */
653 return;
654 case XML_PARSER_EPILOG:
655 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
657 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
658 ctxt->wellFormed = 0;
659 ctxt->disableSAX = 1;
660 return;
661 case XML_PARSER_ENTITY_VALUE:
662 /*
663 * NOTE: in the case of entity values, we don't do the
664 * substitution here since we need the literal
665 * entity value to be able to save the internal
666 * subset of the document.
667 * This will be handled by xmlStringDecodeEntities
668 */
669 return;
670 case XML_PARSER_DTD:
671 /*
672 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
673 * In the internal DTD subset, parameter-entity references
674 * can occur only where markup declarations can occur, not
675 * within markup declarations.
676 * In that case this is handled in xmlParseMarkupDecl
677 */
678 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
679 return;
680 break;
681 case XML_PARSER_IGNORE:
682 return;
683 }
684
685 NEXT;
686 name = xmlParseName(ctxt);
687 if (xmlParserDebugEntities)
688 xmlGenericError(xmlGenericErrorContext,
689 "PE Reference: %s\n", name);
690 if (name == NULL) {
691 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
693 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
694 ctxt->wellFormed = 0;
695 ctxt->disableSAX = 1;
696 } else {
697 if (RAW == ';') {
698 NEXT;
699 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
700 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
701 if (entity == NULL) {
702
703 /*
704 * [ WFC: Entity Declared ]
705 * In a document without any DTD, a document with only an
706 * internal DTD subset which contains no parameter entity
707 * references, or a document with "standalone='yes'", ...
708 * ... The declaration of a parameter entity must precede
709 * any reference to it...
710 */
711 if ((ctxt->standalone == 1) ||
712 ((ctxt->hasExternalSubset == 0) &&
713 (ctxt->hasPErefs == 0))) {
714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
715 ctxt->sax->error(ctxt->userData,
716 "PEReference: %%%s; not found\n", name);
717 ctxt->wellFormed = 0;
718 ctxt->disableSAX = 1;
719 } else {
720 /*
721 * [ VC: Entity Declared ]
722 * In a document with an external subset or external
723 * parameter entities with "standalone='no'", ...
724 * ... The declaration of a parameter entity must precede
725 * any reference to it...
726 */
727 if ((!ctxt->disableSAX) &&
728 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
729 ctxt->vctxt.error(ctxt->vctxt.userData,
730 "PEReference: %%%s; not found\n", name);
731 } else if ((!ctxt->disableSAX) &&
732 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
733 ctxt->sax->warning(ctxt->userData,
734 "PEReference: %%%s; not found\n", name);
735 ctxt->valid = 0;
736 }
737 } else {
738 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
739 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
740 /*
741 * handle the extra spaces added before and after
742 * c.f. http://www.w3.org/TR/REC-xml#as-PE
743 * this is done independantly.
744 */
745 input = xmlNewEntityInputStream(ctxt, entity);
746 xmlPushInput(ctxt, input);
747 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
748 (RAW == '<') && (NXT(1) == '?') &&
749 (NXT(2) == 'x') && (NXT(3) == 'm') &&
750 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
751 xmlParseTextDecl(ctxt);
752 }
753 if (ctxt->token == 0)
754 ctxt->token = ' ';
755 } else {
756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
757 ctxt->sax->error(ctxt->userData,
758 "xmlHandlePEReference: %s is not a parameter entity\n",
759 name);
760 ctxt->wellFormed = 0;
761 ctxt->disableSAX = 1;
762 }
763 }
764 } else {
765 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
767 ctxt->sax->error(ctxt->userData,
768 "xmlHandlePEReference: expecting ';'\n");
769 ctxt->wellFormed = 0;
770 ctxt->disableSAX = 1;
771 }
772 xmlFree(name);
773 }
774}
775
776/*
777 * Macro used to grow the current buffer.
778 */
779#define growBuffer(buffer) { \
780 buffer##_size *= 2; \
781 buffer = (xmlChar *) \
782 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
783 if (buffer == NULL) { \
784 perror("realloc failed"); \
785 return(NULL); \
786 } \
787}
788
789/**
790 * xmlStringDecodeEntities:
791 * @ctxt: the parser context
792 * @str: the input string
793 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
794 * @end: an end marker xmlChar, 0 if none
795 * @end2: an end marker xmlChar, 0 if none
796 * @end3: an end marker xmlChar, 0 if none
797 *
798 * Takes a entity string content and process to do the adequate subtitutions.
799 *
800 * [67] Reference ::= EntityRef | CharRef
801 *
802 * [69] PEReference ::= '%' Name ';'
803 *
804 * Returns A newly allocated string with the substitution done. The caller
805 * must deallocate it !
806 */
807xmlChar *
808xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
809 xmlChar end, xmlChar end2, xmlChar end3) {
810 xmlChar *buffer = NULL;
811 int buffer_size = 0;
812
813 xmlChar *current = NULL;
814 xmlEntityPtr ent;
815 int c,l;
816 int nbchars = 0;
817
818 if (str == NULL)
819 return(NULL);
820
821 if (ctxt->depth > 40) {
822 ctxt->errNo = XML_ERR_ENTITY_LOOP;
823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
824 ctxt->sax->error(ctxt->userData,
825 "Detected entity reference loop\n");
826 ctxt->wellFormed = 0;
827 ctxt->disableSAX = 1;
828 return(NULL);
829 }
830
831 /*
832 * allocate a translation buffer.
833 */
834 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
835 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
836 if (buffer == NULL) {
837 perror("xmlDecodeEntities: malloc failed");
838 return(NULL);
839 }
840
841 /*
842 * Ok loop until we reach one of the ending char or a size limit.
843 * we are operating on already parsed values.
844 */
845 c = CUR_SCHAR(str, l);
846 while ((c != 0) && (c != end) && /* non input consuming loop */
847 (c != end2) && (c != end3)) {
848
849 if (c == 0) break;
850 if ((c == '&') && (str[1] == '#')) {
851 int val = xmlParseStringCharRef(ctxt, &str);
852 if (val != 0) {
853 COPY_BUF(0,buffer,nbchars,val);
854 }
855 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
856 if (xmlParserDebugEntities)
857 xmlGenericError(xmlGenericErrorContext,
858 "String decoding Entity Reference: %.30s\n",
859 str);
860 ent = xmlParseStringEntityRef(ctxt, &str);
861 if ((ent != NULL) &&
862 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
863 if (ent->content != NULL) {
864 COPY_BUF(0,buffer,nbchars,ent->content[0]);
865 } else {
866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
867 ctxt->sax->error(ctxt->userData,
868 "internal error entity has no content\n");
869 }
870 } else if ((ent != NULL) && (ent->content != NULL)) {
871 xmlChar *rep;
872
873 ctxt->depth++;
874 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
875 0, 0, 0);
876 ctxt->depth--;
877 if (rep != NULL) {
878 current = rep;
879 while (*current != 0) { /* non input consuming loop */
880 buffer[nbchars++] = *current++;
881 if (nbchars >
882 buffer_size - XML_PARSER_BUFFER_SIZE) {
883 growBuffer(buffer);
884 }
885 }
886 xmlFree(rep);
887 }
888 } else if (ent != NULL) {
889 int i = xmlStrlen(ent->name);
890 const xmlChar *cur = ent->name;
891
892 buffer[nbchars++] = '&';
893 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
894 growBuffer(buffer);
895 }
896 for (;i > 0;i--)
897 buffer[nbchars++] = *cur++;
898 buffer[nbchars++] = ';';
899 }
900 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
901 if (xmlParserDebugEntities)
902 xmlGenericError(xmlGenericErrorContext,
903 "String decoding PE Reference: %.30s\n", str);
904 ent = xmlParseStringPEReference(ctxt, &str);
905 if (ent != NULL) {
906 xmlChar *rep;
907
908 ctxt->depth++;
909 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
910 0, 0, 0);
911 ctxt->depth--;
912 if (rep != NULL) {
913 current = rep;
914 while (*current != 0) { /* non input consuming loop */
915 buffer[nbchars++] = *current++;
916 if (nbchars >
917 buffer_size - XML_PARSER_BUFFER_SIZE) {
918 growBuffer(buffer);
919 }
920 }
921 xmlFree(rep);
922 }
923 }
924 } else {
925 COPY_BUF(l,buffer,nbchars,c);
926 str += l;
927 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
928 growBuffer(buffer);
929 }
930 }
931 c = CUR_SCHAR(str, l);
932 }
933 buffer[nbchars++] = 0;
934 return(buffer);
935}
936
937
938/************************************************************************
939 * *
940 * Commodity functions to handle xmlChars *
941 * *
942 ************************************************************************/
943
944/**
945 * xmlStrndup:
946 * @cur: the input xmlChar *
947 * @len: the len of @cur
948 *
949 * a strndup for array of xmlChar's
950 *
951 * Returns a new xmlChar * or NULL
952 */
953xmlChar *
954xmlStrndup(const xmlChar *cur, int len) {
955 xmlChar *ret;
956
957 if ((cur == NULL) || (len < 0)) return(NULL);
958 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
959 if (ret == NULL) {
960 xmlGenericError(xmlGenericErrorContext,
961 "malloc of %ld byte failed\n",
962 (len + 1) * (long)sizeof(xmlChar));
963 return(NULL);
964 }
965 memcpy(ret, cur, len * sizeof(xmlChar));
966 ret[len] = 0;
967 return(ret);
968}
969
970/**
971 * xmlStrdup:
972 * @cur: the input xmlChar *
973 *
974 * a strdup for array of xmlChar's. Since they are supposed to be
975 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
976 * a termination mark of '0'.
977 *
978 * Returns a new xmlChar * or NULL
979 */
980xmlChar *
981xmlStrdup(const xmlChar *cur) {
982 const xmlChar *p = cur;
983
984 if (cur == NULL) return(NULL);
985 while (*p != 0) p++; /* non input consuming */
986 return(xmlStrndup(cur, p - cur));
987}
988
989/**
990 * xmlCharStrndup:
991 * @cur: the input char *
992 * @len: the len of @cur
993 *
994 * a strndup for char's to xmlChar's
995 *
996 * Returns a new xmlChar * or NULL
997 */
998
999xmlChar *
1000xmlCharStrndup(const char *cur, int len) {
1001 int i;
1002 xmlChar *ret;
1003
1004 if ((cur == NULL) || (len < 0)) return(NULL);
1005 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1006 if (ret == NULL) {
1007 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1008 (len + 1) * (long)sizeof(xmlChar));
1009 return(NULL);
1010 }
1011 for (i = 0;i < len;i++)
1012 ret[i] = (xmlChar) cur[i];
1013 ret[len] = 0;
1014 return(ret);
1015}
1016
1017/**
1018 * xmlCharStrdup:
1019 * @cur: the input char *
1020 * @len: the len of @cur
1021 *
1022 * a strdup for char's to xmlChar's
1023 *
1024 * Returns a new xmlChar * or NULL
1025 */
1026
1027xmlChar *
1028xmlCharStrdup(const char *cur) {
1029 const char *p = cur;
1030
1031 if (cur == NULL) return(NULL);
1032 while (*p != '\0') p++; /* non input consuming */
1033 return(xmlCharStrndup(cur, p - cur));
1034}
1035
1036/**
1037 * xmlStrcmp:
1038 * @str1: the first xmlChar *
1039 * @str2: the second xmlChar *
1040 *
1041 * a strcmp for xmlChar's
1042 *
1043 * Returns the integer result of the comparison
1044 */
1045
1046int
1047xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1048 register int tmp;
1049
1050 if (str1 == str2) return(0);
1051 if (str1 == NULL) return(-1);
1052 if (str2 == NULL) return(1);
1053 do {
1054 tmp = *str1++ - *str2;
1055 if (tmp != 0) return(tmp);
1056 } while (*str2++ != 0);
1057 return 0;
1058}
1059
1060/**
1061 * xmlStrEqual:
1062 * @str1: the first xmlChar *
1063 * @str2: the second xmlChar *
1064 *
1065 * Check if both string are equal of have same content
1066 * Should be a bit more readable and faster than xmlStrEqual()
1067 *
1068 * Returns 1 if they are equal, 0 if they are different
1069 */
1070
1071int
1072xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1073 if (str1 == str2) return(1);
1074 if (str1 == NULL) return(0);
1075 if (str2 == NULL) return(0);
1076 do {
1077 if (*str1++ != *str2) return(0);
1078 } while (*str2++);
1079 return(1);
1080}
1081
1082/**
1083 * xmlStrncmp:
1084 * @str1: the first xmlChar *
1085 * @str2: the second xmlChar *
1086 * @len: the max comparison length
1087 *
1088 * a strncmp for xmlChar's
1089 *
1090 * Returns the integer result of the comparison
1091 */
1092
1093int
1094xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1095 register int tmp;
1096
1097 if (len <= 0) return(0);
1098 if (str1 == str2) return(0);
1099 if (str1 == NULL) return(-1);
1100 if (str2 == NULL) return(1);
1101 do {
1102 tmp = *str1++ - *str2;
1103 if (tmp != 0 || --len == 0) return(tmp);
1104 } while (*str2++ != 0);
1105 return 0;
1106}
1107
1108static xmlChar casemap[256] = {
1109 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1110 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1111 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1112 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1113 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1114 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1115 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1116 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1117 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1118 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1119 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1120 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1121 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1122 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1123 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1124 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1125 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1126 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1127 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1128 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1129 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1130 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1131 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1132 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1133 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1134 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1135 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1136 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1137 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1138 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1139 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1140 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1141};
1142
1143/**
1144 * xmlStrcasecmp:
1145 * @str1: the first xmlChar *
1146 * @str2: the second xmlChar *
1147 *
1148 * a strcasecmp for xmlChar's
1149 *
1150 * Returns the integer result of the comparison
1151 */
1152
1153int
1154xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1155 register int tmp;
1156
1157 if (str1 == str2) return(0);
1158 if (str1 == NULL) return(-1);
1159 if (str2 == NULL) return(1);
1160 do {
1161 tmp = casemap[*str1++] - casemap[*str2];
1162 if (tmp != 0) return(tmp);
1163 } while (*str2++ != 0);
1164 return 0;
1165}
1166
1167/**
1168 * xmlStrncasecmp:
1169 * @str1: the first xmlChar *
1170 * @str2: the second xmlChar *
1171 * @len: the max comparison length
1172 *
1173 * a strncasecmp for xmlChar's
1174 *
1175 * Returns the integer result of the comparison
1176 */
1177
1178int
1179xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1180 register int tmp;
1181
1182 if (len <= 0) return(0);
1183 if (str1 == str2) return(0);
1184 if (str1 == NULL) return(-1);
1185 if (str2 == NULL) return(1);
1186 do {
1187 tmp = casemap[*str1++] - casemap[*str2];
1188 if (tmp != 0 || --len == 0) return(tmp);
1189 } while (*str2++ != 0);
1190 return 0;
1191}
1192
1193/**
1194 * xmlStrchr:
1195 * @str: the xmlChar * array
1196 * @val: the xmlChar to search
1197 *
1198 * a strchr for xmlChar's
1199 *
1200 * Returns the xmlChar * for the first occurence or NULL.
1201 */
1202
1203const xmlChar *
1204xmlStrchr(const xmlChar *str, xmlChar val) {
1205 if (str == NULL) return(NULL);
1206 while (*str != 0) { /* non input consuming */
1207 if (*str == val) return((xmlChar *) str);
1208 str++;
1209 }
1210 return(NULL);
1211}
1212
1213/**
1214 * xmlStrstr:
1215 * @str: the xmlChar * array (haystack)
1216 * @val: the xmlChar to search (needle)
1217 *
1218 * a strstr for xmlChar's
1219 *
1220 * Returns the xmlChar * for the first occurence or NULL.
1221 */
1222
1223const xmlChar *
1224xmlStrstr(const xmlChar *str, xmlChar *val) {
1225 int n;
1226
1227 if (str == NULL) return(NULL);
1228 if (val == NULL) return(NULL);
1229 n = xmlStrlen(val);
1230
1231 if (n == 0) return(str);
1232 while (*str != 0) { /* non input consuming */
1233 if (*str == *val) {
1234 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1235 }
1236 str++;
1237 }
1238 return(NULL);
1239}
1240
1241/**
1242 * xmlStrcasestr:
1243 * @str: the xmlChar * array (haystack)
1244 * @val: the xmlChar to search (needle)
1245 *
1246 * a case-ignoring strstr for xmlChar's
1247 *
1248 * Returns the xmlChar * for the first occurence or NULL.
1249 */
1250
1251const xmlChar *
1252xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1253 int n;
1254
1255 if (str == NULL) return(NULL);
1256 if (val == NULL) return(NULL);
1257 n = xmlStrlen(val);
1258
1259 if (n == 0) return(str);
1260 while (*str != 0) { /* non input consuming */
1261 if (casemap[*str] == casemap[*val])
1262 if (!xmlStrncasecmp(str, val, n)) return(str);
1263 str++;
1264 }
1265 return(NULL);
1266}
1267
1268/**
1269 * xmlStrsub:
1270 * @str: the xmlChar * array (haystack)
1271 * @start: the index of the first char (zero based)
1272 * @len: the length of the substring
1273 *
1274 * Extract a substring of a given string
1275 *
1276 * Returns the xmlChar * for the first occurence or NULL.
1277 */
1278
1279xmlChar *
1280xmlStrsub(const xmlChar *str, int start, int len) {
1281 int i;
1282
1283 if (str == NULL) return(NULL);
1284 if (start < 0) return(NULL);
1285 if (len < 0) return(NULL);
1286
1287 for (i = 0;i < start;i++) {
1288 if (*str == 0) return(NULL);
1289 str++;
1290 }
1291 if (*str == 0) return(NULL);
1292 return(xmlStrndup(str, len));
1293}
1294
1295/**
1296 * xmlStrlen:
1297 * @str: the xmlChar * array
1298 *
1299 * length of a xmlChar's string
1300 *
1301 * Returns the number of xmlChar contained in the ARRAY.
1302 */
1303
1304int
1305xmlStrlen(const xmlChar *str) {
1306 int len = 0;
1307
1308 if (str == NULL) return(0);
1309 while (*str != 0) { /* non input consuming */
1310 str++;
1311 len++;
1312 }
1313 return(len);
1314}
1315
1316/**
1317 * xmlStrncat:
1318 * @cur: the original xmlChar * array
1319 * @add: the xmlChar * array added
1320 * @len: the length of @add
1321 *
1322 * a strncat for array of xmlChar's, it will extend cur with the len
1323 * first bytes of @add.
1324 *
1325 * Returns a new xmlChar *, the original @cur is reallocated if needed
1326 * and should not be freed
1327 */
1328
1329xmlChar *
1330xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1331 int size;
1332 xmlChar *ret;
1333
1334 if ((add == NULL) || (len == 0))
1335 return(cur);
1336 if (cur == NULL)
1337 return(xmlStrndup(add, len));
1338
1339 size = xmlStrlen(cur);
1340 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1341 if (ret == NULL) {
1342 xmlGenericError(xmlGenericErrorContext,
1343 "xmlStrncat: realloc of %ld byte failed\n",
1344 (size + len + 1) * (long)sizeof(xmlChar));
1345 return(cur);
1346 }
1347 memcpy(&ret[size], add, len * sizeof(xmlChar));
1348 ret[size + len] = 0;
1349 return(ret);
1350}
1351
1352/**
1353 * xmlStrcat:
1354 * @cur: the original xmlChar * array
1355 * @add: the xmlChar * array added
1356 *
1357 * a strcat for array of xmlChar's. Since they are supposed to be
1358 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1359 * a termination mark of '0'.
1360 *
1361 * Returns a new xmlChar * containing the concatenated string.
1362 */
1363xmlChar *
1364xmlStrcat(xmlChar *cur, const xmlChar *add) {
1365 const xmlChar *p = add;
1366
1367 if (add == NULL) return(cur);
1368 if (cur == NULL)
1369 return(xmlStrdup(add));
1370
1371 while (*p != 0) p++; /* non input consuming */
1372 return(xmlStrncat(cur, add, p - add));
1373}
1374
1375/************************************************************************
1376 * *
1377 * Commodity functions, cleanup needed ? *
1378 * *
1379 ************************************************************************/
1380
1381/**
1382 * areBlanks:
1383 * @ctxt: an XML parser context
1384 * @str: a xmlChar *
1385 * @len: the size of @str
1386 *
1387 * Is this a sequence of blank chars that one can ignore ?
1388 *
1389 * Returns 1 if ignorable 0 otherwise.
1390 */
1391
1392static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1393 int i, ret;
1394 xmlNodePtr lastChild;
1395
Daniel Veillard2f362242001-03-02 17:36:21 +00001396 if (ctxt->keepBlanks)
1397 return(0);
1398
Owen Taylor3473f882001-02-23 17:55:21 +00001399 /*
1400 * Check for xml:space value.
1401 */
1402 if (*(ctxt->space) == 1)
1403 return(0);
1404
1405 /*
1406 * Check that the string is made of blanks
1407 */
1408 for (i = 0;i < len;i++)
1409 if (!(IS_BLANK(str[i]))) return(0);
1410
1411 /*
1412 * Look if the element is mixed content in the Dtd if available
1413 */
1414 if (ctxt->myDoc != NULL) {
1415 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1416 if (ret == 0) return(1);
1417 if (ret == 1) return(0);
1418 }
1419
1420 /*
1421 * Otherwise, heuristic :-\
1422 */
Owen Taylor3473f882001-02-23 17:55:21 +00001423 if (RAW != '<') return(0);
1424 if (ctxt->node == NULL) return(0);
1425 if ((ctxt->node->children == NULL) &&
1426 (RAW == '<') && (NXT(1) == '/')) return(0);
1427
1428 lastChild = xmlGetLastChild(ctxt->node);
1429 if (lastChild == NULL) {
1430 if (ctxt->node->content != NULL) return(0);
1431 } else if (xmlNodeIsText(lastChild))
1432 return(0);
1433 else if ((ctxt->node->children != NULL) &&
1434 (xmlNodeIsText(ctxt->node->children)))
1435 return(0);
1436 return(1);
1437}
1438
1439/*
1440 * Forward definition for recusive behaviour.
1441 */
1442void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1443void xmlParseReference(xmlParserCtxtPtr ctxt);
1444
1445/************************************************************************
1446 * *
1447 * Extra stuff for namespace support *
1448 * Relates to http://www.w3.org/TR/WD-xml-names *
1449 * *
1450 ************************************************************************/
1451
1452/**
1453 * xmlSplitQName:
1454 * @ctxt: an XML parser context
1455 * @name: an XML parser context
1456 * @prefix: a xmlChar **
1457 *
1458 * parse an UTF8 encoded XML qualified name string
1459 *
1460 * [NS 5] QName ::= (Prefix ':')? LocalPart
1461 *
1462 * [NS 6] Prefix ::= NCName
1463 *
1464 * [NS 7] LocalPart ::= NCName
1465 *
1466 * Returns the local part, and prefix is updated
1467 * to get the Prefix if any.
1468 */
1469
1470xmlChar *
1471xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1472 xmlChar buf[XML_MAX_NAMELEN + 5];
1473 xmlChar *buffer = NULL;
1474 int len = 0;
1475 int max = XML_MAX_NAMELEN;
1476 xmlChar *ret = NULL;
1477 const xmlChar *cur = name;
1478 int c;
1479
1480 *prefix = NULL;
1481
1482#ifndef XML_XML_NAMESPACE
1483 /* xml: prefix is not really a namespace */
1484 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1485 (cur[2] == 'l') && (cur[3] == ':'))
1486 return(xmlStrdup(name));
1487#endif
1488
1489 /* nasty but valid */
1490 if (cur[0] == ':')
1491 return(xmlStrdup(name));
1492
1493 c = *cur++;
1494 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1495 buf[len++] = c;
1496 c = *cur++;
1497 }
1498 if (len >= max) {
1499 /*
1500 * Okay someone managed to make a huge name, so he's ready to pay
1501 * for the processing speed.
1502 */
1503 max = len * 2;
1504
1505 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1506 if (buffer == NULL) {
1507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1508 ctxt->sax->error(ctxt->userData,
1509 "xmlSplitQName: out of memory\n");
1510 return(NULL);
1511 }
1512 memcpy(buffer, buf, len);
1513 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1514 if (len + 10 > max) {
1515 max *= 2;
1516 buffer = (xmlChar *) xmlRealloc(buffer,
1517 max * sizeof(xmlChar));
1518 if (buffer == NULL) {
1519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1520 ctxt->sax->error(ctxt->userData,
1521 "xmlSplitQName: out of memory\n");
1522 return(NULL);
1523 }
1524 }
1525 buffer[len++] = c;
1526 c = *cur++;
1527 }
1528 buffer[len] = 0;
1529 }
1530
1531 if (buffer == NULL)
1532 ret = xmlStrndup(buf, len);
1533 else {
1534 ret = buffer;
1535 buffer = NULL;
1536 max = XML_MAX_NAMELEN;
1537 }
1538
1539
1540 if (c == ':') {
1541 c = *cur++;
1542 if (c == 0) return(ret);
1543 *prefix = ret;
1544 len = 0;
1545
1546 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1547 buf[len++] = c;
1548 c = *cur++;
1549 }
1550 if (len >= max) {
1551 /*
1552 * Okay someone managed to make a huge name, so he's ready to pay
1553 * for the processing speed.
1554 */
1555 max = len * 2;
1556
1557 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1558 if (buffer == NULL) {
1559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1560 ctxt->sax->error(ctxt->userData,
1561 "xmlSplitQName: out of memory\n");
1562 return(NULL);
1563 }
1564 memcpy(buffer, buf, len);
1565 while (c != 0) { /* tested bigname2.xml */
1566 if (len + 10 > max) {
1567 max *= 2;
1568 buffer = (xmlChar *) xmlRealloc(buffer,
1569 max * sizeof(xmlChar));
1570 if (buffer == NULL) {
1571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1572 ctxt->sax->error(ctxt->userData,
1573 "xmlSplitQName: out of memory\n");
1574 return(NULL);
1575 }
1576 }
1577 buffer[len++] = c;
1578 c = *cur++;
1579 }
1580 buffer[len] = 0;
1581 }
1582
1583 if (buffer == NULL)
1584 ret = xmlStrndup(buf, len);
1585 else {
1586 ret = buffer;
1587 }
1588 }
1589
1590 return(ret);
1591}
1592
1593/************************************************************************
1594 * *
1595 * The parser itself *
1596 * Relates to http://www.w3.org/TR/REC-xml *
1597 * *
1598 ************************************************************************/
1599
Daniel Veillard21a0f912001-02-25 19:54:14 +00001600xmlChar *xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001601/**
1602 * xmlParseName:
1603 * @ctxt: an XML parser context
1604 *
1605 * parse an XML name.
1606 *
1607 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1608 * CombiningChar | Extender
1609 *
1610 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1611 *
1612 * [6] Names ::= Name (S Name)*
1613 *
1614 * Returns the Name parsed or NULL
1615 */
1616
1617xmlChar *
1618xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001619 const xmlChar *in;
1620 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001621 int count = 0;
1622
1623 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001624
1625 /*
1626 * Accelerator for simple ASCII names
1627 */
1628 in = ctxt->input->cur;
1629 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1630 ((*in >= 0x41) && (*in <= 0x5A)) ||
1631 (*in == '_') || (*in == ':')) {
1632 in++;
1633 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1634 ((*in >= 0x41) && (*in <= 0x5A)) ||
1635 ((*in >= 0x30) && (*in <= 0x39)) ||
1636 (*in == '_') || (*in == ':'))
1637 in++;
1638 if ((*in == ' ') || (*in == '>') || (*in == '/')) {
1639 count = in - ctxt->input->cur;
1640 ret = xmlStrndup(ctxt->input->cur, count);
1641 ctxt->input->cur = in;
1642 return(ret);
1643 }
1644 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001645 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001646}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001647
Daniel Veillard21a0f912001-02-25 19:54:14 +00001648xmlChar *
1649xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1650 xmlChar buf[XML_MAX_NAMELEN + 5];
1651 int len = 0, l;
1652 int c;
1653 int count = 0;
1654
1655 /*
1656 * Handler for more complex cases
1657 */
1658 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001659 c = CUR_CHAR(l);
1660 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1661 (!IS_LETTER(c) && (c != '_') &&
1662 (c != ':'))) {
1663 return(NULL);
1664 }
1665
1666 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1667 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1668 (c == '.') || (c == '-') ||
1669 (c == '_') || (c == ':') ||
1670 (IS_COMBINING(c)) ||
1671 (IS_EXTENDER(c)))) {
1672 if (count++ > 100) {
1673 count = 0;
1674 GROW;
1675 }
1676 COPY_BUF(l,buf,len,c);
1677 NEXTL(l);
1678 c = CUR_CHAR(l);
1679 if (len >= XML_MAX_NAMELEN) {
1680 /*
1681 * Okay someone managed to make a huge name, so he's ready to pay
1682 * for the processing speed.
1683 */
1684 xmlChar *buffer;
1685 int max = len * 2;
1686
1687 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1688 if (buffer == NULL) {
1689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1690 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001691 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001692 return(NULL);
1693 }
1694 memcpy(buffer, buf, len);
1695 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1696 (c == '.') || (c == '-') ||
1697 (c == '_') || (c == ':') ||
1698 (IS_COMBINING(c)) ||
1699 (IS_EXTENDER(c))) {
1700 if (count++ > 100) {
1701 count = 0;
1702 GROW;
1703 }
1704 if (len + 10 > max) {
1705 max *= 2;
1706 buffer = (xmlChar *) xmlRealloc(buffer,
1707 max * sizeof(xmlChar));
1708 if (buffer == NULL) {
1709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1710 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001711 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001712 return(NULL);
1713 }
1714 }
1715 COPY_BUF(l,buffer,len,c);
1716 NEXTL(l);
1717 c = CUR_CHAR(l);
1718 }
1719 buffer[len] = 0;
1720 return(buffer);
1721 }
1722 }
1723 return(xmlStrndup(buf, len));
1724}
1725
1726/**
1727 * xmlParseStringName:
1728 * @ctxt: an XML parser context
1729 * @str: a pointer to the string pointer (IN/OUT)
1730 *
1731 * parse an XML name.
1732 *
1733 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1734 * CombiningChar | Extender
1735 *
1736 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1737 *
1738 * [6] Names ::= Name (S Name)*
1739 *
1740 * Returns the Name parsed or NULL. The str pointer
1741 * is updated to the current location in the string.
1742 */
1743
1744xmlChar *
1745xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1746 xmlChar buf[XML_MAX_NAMELEN + 5];
1747 const xmlChar *cur = *str;
1748 int len = 0, l;
1749 int c;
1750
1751 c = CUR_SCHAR(cur, l);
1752 if (!IS_LETTER(c) && (c != '_') &&
1753 (c != ':')) {
1754 return(NULL);
1755 }
1756
1757 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1758 (c == '.') || (c == '-') ||
1759 (c == '_') || (c == ':') ||
1760 (IS_COMBINING(c)) ||
1761 (IS_EXTENDER(c))) {
1762 COPY_BUF(l,buf,len,c);
1763 cur += l;
1764 c = CUR_SCHAR(cur, l);
1765 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1766 /*
1767 * Okay someone managed to make a huge name, so he's ready to pay
1768 * for the processing speed.
1769 */
1770 xmlChar *buffer;
1771 int max = len * 2;
1772
1773 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1774 if (buffer == NULL) {
1775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1776 ctxt->sax->error(ctxt->userData,
1777 "xmlParseStringName: out of memory\n");
1778 return(NULL);
1779 }
1780 memcpy(buffer, buf, len);
1781 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1782 (c == '.') || (c == '-') ||
1783 (c == '_') || (c == ':') ||
1784 (IS_COMBINING(c)) ||
1785 (IS_EXTENDER(c))) {
1786 if (len + 10 > max) {
1787 max *= 2;
1788 buffer = (xmlChar *) xmlRealloc(buffer,
1789 max * sizeof(xmlChar));
1790 if (buffer == NULL) {
1791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1792 ctxt->sax->error(ctxt->userData,
1793 "xmlParseStringName: out of memory\n");
1794 return(NULL);
1795 }
1796 }
1797 COPY_BUF(l,buffer,len,c);
1798 cur += l;
1799 c = CUR_SCHAR(cur, l);
1800 }
1801 buffer[len] = 0;
1802 *str = cur;
1803 return(buffer);
1804 }
1805 }
1806 *str = cur;
1807 return(xmlStrndup(buf, len));
1808}
1809
1810/**
1811 * xmlParseNmtoken:
1812 * @ctxt: an XML parser context
1813 *
1814 * parse an XML Nmtoken.
1815 *
1816 * [7] Nmtoken ::= (NameChar)+
1817 *
1818 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1819 *
1820 * Returns the Nmtoken parsed or NULL
1821 */
1822
1823xmlChar *
1824xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1825 xmlChar buf[XML_MAX_NAMELEN + 5];
1826 int len = 0, l;
1827 int c;
1828 int count = 0;
1829
1830 GROW;
1831 c = CUR_CHAR(l);
1832
1833 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1834 (c == '.') || (c == '-') ||
1835 (c == '_') || (c == ':') ||
1836 (IS_COMBINING(c)) ||
1837 (IS_EXTENDER(c))) {
1838 if (count++ > 100) {
1839 count = 0;
1840 GROW;
1841 }
1842 COPY_BUF(l,buf,len,c);
1843 NEXTL(l);
1844 c = CUR_CHAR(l);
1845 if (len >= XML_MAX_NAMELEN) {
1846 /*
1847 * Okay someone managed to make a huge token, so he's ready to pay
1848 * for the processing speed.
1849 */
1850 xmlChar *buffer;
1851 int max = len * 2;
1852
1853 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1854 if (buffer == NULL) {
1855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1856 ctxt->sax->error(ctxt->userData,
1857 "xmlParseNmtoken: out of memory\n");
1858 return(NULL);
1859 }
1860 memcpy(buffer, buf, len);
1861 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1862 (c == '.') || (c == '-') ||
1863 (c == '_') || (c == ':') ||
1864 (IS_COMBINING(c)) ||
1865 (IS_EXTENDER(c))) {
1866 if (count++ > 100) {
1867 count = 0;
1868 GROW;
1869 }
1870 if (len + 10 > max) {
1871 max *= 2;
1872 buffer = (xmlChar *) xmlRealloc(buffer,
1873 max * sizeof(xmlChar));
1874 if (buffer == NULL) {
1875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1876 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001877 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001878 return(NULL);
1879 }
1880 }
1881 COPY_BUF(l,buffer,len,c);
1882 NEXTL(l);
1883 c = CUR_CHAR(l);
1884 }
1885 buffer[len] = 0;
1886 return(buffer);
1887 }
1888 }
1889 if (len == 0)
1890 return(NULL);
1891 return(xmlStrndup(buf, len));
1892}
1893
1894/**
1895 * xmlParseEntityValue:
1896 * @ctxt: an XML parser context
1897 * @orig: if non-NULL store a copy of the original entity value
1898 *
1899 * parse a value for ENTITY declarations
1900 *
1901 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1902 * "'" ([^%&'] | PEReference | Reference)* "'"
1903 *
1904 * Returns the EntityValue parsed with reference substitued or NULL
1905 */
1906
1907xmlChar *
1908xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1909 xmlChar *buf = NULL;
1910 int len = 0;
1911 int size = XML_PARSER_BUFFER_SIZE;
1912 int c, l;
1913 xmlChar stop;
1914 xmlChar *ret = NULL;
1915 const xmlChar *cur = NULL;
1916 xmlParserInputPtr input;
1917
1918 if (RAW == '"') stop = '"';
1919 else if (RAW == '\'') stop = '\'';
1920 else {
1921 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1923 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1924 ctxt->wellFormed = 0;
1925 ctxt->disableSAX = 1;
1926 return(NULL);
1927 }
1928 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1929 if (buf == NULL) {
1930 xmlGenericError(xmlGenericErrorContext,
1931 "malloc of %d byte failed\n", size);
1932 return(NULL);
1933 }
1934
1935 /*
1936 * The content of the entity definition is copied in a buffer.
1937 */
1938
1939 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1940 input = ctxt->input;
1941 GROW;
1942 NEXT;
1943 c = CUR_CHAR(l);
1944 /*
1945 * NOTE: 4.4.5 Included in Literal
1946 * When a parameter entity reference appears in a literal entity
1947 * value, ... a single or double quote character in the replacement
1948 * text is always treated as a normal data character and will not
1949 * terminate the literal.
1950 * In practice it means we stop the loop only when back at parsing
1951 * the initial entity and the quote is found
1952 */
1953 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1954 (ctxt->input != input))) {
1955 if (len + 5 >= size) {
1956 size *= 2;
1957 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1958 if (buf == NULL) {
1959 xmlGenericError(xmlGenericErrorContext,
1960 "realloc of %d byte failed\n", size);
1961 return(NULL);
1962 }
1963 }
1964 COPY_BUF(l,buf,len,c);
1965 NEXTL(l);
1966 /*
1967 * Pop-up of finished entities.
1968 */
1969 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1970 xmlPopInput(ctxt);
1971
1972 GROW;
1973 c = CUR_CHAR(l);
1974 if (c == 0) {
1975 GROW;
1976 c = CUR_CHAR(l);
1977 }
1978 }
1979 buf[len] = 0;
1980
1981 /*
1982 * Raise problem w.r.t. '&' and '%' being used in non-entities
1983 * reference constructs. Note Charref will be handled in
1984 * xmlStringDecodeEntities()
1985 */
1986 cur = buf;
1987 while (*cur != 0) { /* non input consuming */
1988 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1989 xmlChar *name;
1990 xmlChar tmp = *cur;
1991
1992 cur++;
1993 name = xmlParseStringName(ctxt, &cur);
1994 if ((name == NULL) || (*cur != ';')) {
1995 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1997 ctxt->sax->error(ctxt->userData,
1998 "EntityValue: '%c' forbidden except for entities references\n",
1999 tmp);
2000 ctxt->wellFormed = 0;
2001 ctxt->disableSAX = 1;
2002 }
2003 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2004 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2006 ctxt->sax->error(ctxt->userData,
2007 "EntityValue: PEReferences forbidden in internal subset\n",
2008 tmp);
2009 ctxt->wellFormed = 0;
2010 ctxt->disableSAX = 1;
2011 }
2012 if (name != NULL)
2013 xmlFree(name);
2014 }
2015 cur++;
2016 }
2017
2018 /*
2019 * Then PEReference entities are substituted.
2020 */
2021 if (c != stop) {
2022 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2024 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2025 ctxt->wellFormed = 0;
2026 ctxt->disableSAX = 1;
2027 xmlFree(buf);
2028 } else {
2029 NEXT;
2030 /*
2031 * NOTE: 4.4.7 Bypassed
2032 * When a general entity reference appears in the EntityValue in
2033 * an entity declaration, it is bypassed and left as is.
2034 * so XML_SUBSTITUTE_REF is not set here.
2035 */
2036 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2037 0, 0, 0);
2038 if (orig != NULL)
2039 *orig = buf;
2040 else
2041 xmlFree(buf);
2042 }
2043
2044 return(ret);
2045}
2046
2047/**
2048 * xmlParseAttValue:
2049 * @ctxt: an XML parser context
2050 *
2051 * parse a value for an attribute
2052 * Note: the parser won't do substitution of entities here, this
2053 * will be handled later in xmlStringGetNodeList
2054 *
2055 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2056 * "'" ([^<&'] | Reference)* "'"
2057 *
2058 * 3.3.3 Attribute-Value Normalization:
2059 * Before the value of an attribute is passed to the application or
2060 * checked for validity, the XML processor must normalize it as follows:
2061 * - a character reference is processed by appending the referenced
2062 * character to the attribute value
2063 * - an entity reference is processed by recursively processing the
2064 * replacement text of the entity
2065 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2066 * appending #x20 to the normalized value, except that only a single
2067 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2068 * parsed entity or the literal entity value of an internal parsed entity
2069 * - other characters are processed by appending them to the normalized value
2070 * If the declared value is not CDATA, then the XML processor must further
2071 * process the normalized attribute value by discarding any leading and
2072 * trailing space (#x20) characters, and by replacing sequences of space
2073 * (#x20) characters by a single space (#x20) character.
2074 * All attributes for which no declaration has been read should be treated
2075 * by a non-validating parser as if declared CDATA.
2076 *
2077 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2078 */
2079
2080xmlChar *
2081xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2082 xmlChar limit = 0;
2083 xmlChar *buf = NULL;
2084 int len = 0;
2085 int buf_size = 0;
2086 int c, l;
2087 xmlChar *current = NULL;
2088 xmlEntityPtr ent;
2089
2090
2091 SHRINK;
2092 if (NXT(0) == '"') {
2093 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2094 limit = '"';
2095 NEXT;
2096 } else if (NXT(0) == '\'') {
2097 limit = '\'';
2098 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2099 NEXT;
2100 } else {
2101 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2103 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2104 ctxt->wellFormed = 0;
2105 ctxt->disableSAX = 1;
2106 return(NULL);
2107 }
2108
2109 /*
2110 * allocate a translation buffer.
2111 */
2112 buf_size = XML_PARSER_BUFFER_SIZE;
2113 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2114 if (buf == NULL) {
2115 perror("xmlParseAttValue: malloc failed");
2116 return(NULL);
2117 }
2118
2119 /*
2120 * Ok loop until we reach one of the ending char or a size limit.
2121 */
2122 c = CUR_CHAR(l);
2123 while (((NXT(0) != limit) && /* checked */
2124 (c != '<')) || (ctxt->token != 0)) {
2125 if (c == 0) break;
2126 if (ctxt->token == '&') {
2127 /*
2128 * The reparsing will be done in xmlStringGetNodeList()
2129 * called by the attribute() function in SAX.c
2130 */
2131 static xmlChar buffer[6] = "&#38;";
2132
2133 if (len > buf_size - 10) {
2134 growBuffer(buf);
2135 }
2136 current = &buffer[0];
2137 while (*current != 0) { /* non input consuming */
2138 buf[len++] = *current++;
2139 }
2140 ctxt->token = 0;
2141 } else if (c == '&') {
2142 if (NXT(1) == '#') {
2143 int val = xmlParseCharRef(ctxt);
2144 if (val == '&') {
2145 /*
2146 * The reparsing will be done in xmlStringGetNodeList()
2147 * called by the attribute() function in SAX.c
2148 */
2149 static xmlChar buffer[6] = "&#38;";
2150
2151 if (len > buf_size - 10) {
2152 growBuffer(buf);
2153 }
2154 current = &buffer[0];
2155 while (*current != 0) { /* non input consuming */
2156 buf[len++] = *current++;
2157 }
2158 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002159 if (len > buf_size - 10) {
2160 growBuffer(buf);
2161 }
Owen Taylor3473f882001-02-23 17:55:21 +00002162 len += xmlCopyChar(0, &buf[len], val);
2163 }
2164 } else {
2165 ent = xmlParseEntityRef(ctxt);
2166 if ((ent != NULL) &&
2167 (ctxt->replaceEntities != 0)) {
2168 xmlChar *rep;
2169
2170 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2171 rep = xmlStringDecodeEntities(ctxt, ent->content,
2172 XML_SUBSTITUTE_REF, 0, 0, 0);
2173 if (rep != NULL) {
2174 current = rep;
2175 while (*current != 0) { /* non input consuming */
2176 buf[len++] = *current++;
2177 if (len > buf_size - 10) {
2178 growBuffer(buf);
2179 }
2180 }
2181 xmlFree(rep);
2182 }
2183 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002184 if (len > buf_size - 10) {
2185 growBuffer(buf);
2186 }
Owen Taylor3473f882001-02-23 17:55:21 +00002187 if (ent->content != NULL)
2188 buf[len++] = ent->content[0];
2189 }
2190 } else if (ent != NULL) {
2191 int i = xmlStrlen(ent->name);
2192 const xmlChar *cur = ent->name;
2193
2194 /*
2195 * This may look absurd but is needed to detect
2196 * entities problems
2197 */
2198 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2199 (ent->content != NULL)) {
2200 xmlChar *rep;
2201 rep = xmlStringDecodeEntities(ctxt, ent->content,
2202 XML_SUBSTITUTE_REF, 0, 0, 0);
2203 if (rep != NULL)
2204 xmlFree(rep);
2205 }
2206
2207 /*
2208 * Just output the reference
2209 */
2210 buf[len++] = '&';
2211 if (len > buf_size - i - 10) {
2212 growBuffer(buf);
2213 }
2214 for (;i > 0;i--)
2215 buf[len++] = *cur++;
2216 buf[len++] = ';';
2217 }
2218 }
2219 } else {
2220 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2221 COPY_BUF(l,buf,len,0x20);
2222 if (len > buf_size - 10) {
2223 growBuffer(buf);
2224 }
2225 } else {
2226 COPY_BUF(l,buf,len,c);
2227 if (len > buf_size - 10) {
2228 growBuffer(buf);
2229 }
2230 }
2231 NEXTL(l);
2232 }
2233 GROW;
2234 c = CUR_CHAR(l);
2235 }
2236 buf[len++] = 0;
2237 if (RAW == '<') {
2238 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2239 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2240 ctxt->sax->error(ctxt->userData,
2241 "Unescaped '<' not allowed in attributes values\n");
2242 ctxt->wellFormed = 0;
2243 ctxt->disableSAX = 1;
2244 } else if (RAW != limit) {
2245 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2247 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2248 ctxt->wellFormed = 0;
2249 ctxt->disableSAX = 1;
2250 } else
2251 NEXT;
2252 return(buf);
2253}
2254
2255/**
2256 * xmlParseSystemLiteral:
2257 * @ctxt: an XML parser context
2258 *
2259 * parse an XML Literal
2260 *
2261 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2262 *
2263 * Returns the SystemLiteral parsed or NULL
2264 */
2265
2266xmlChar *
2267xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2268 xmlChar *buf = NULL;
2269 int len = 0;
2270 int size = XML_PARSER_BUFFER_SIZE;
2271 int cur, l;
2272 xmlChar stop;
2273 int state = ctxt->instate;
2274 int count = 0;
2275
2276 SHRINK;
2277 if (RAW == '"') {
2278 NEXT;
2279 stop = '"';
2280 } else if (RAW == '\'') {
2281 NEXT;
2282 stop = '\'';
2283 } else {
2284 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2286 ctxt->sax->error(ctxt->userData,
2287 "SystemLiteral \" or ' expected\n");
2288 ctxt->wellFormed = 0;
2289 ctxt->disableSAX = 1;
2290 return(NULL);
2291 }
2292
2293 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2294 if (buf == NULL) {
2295 xmlGenericError(xmlGenericErrorContext,
2296 "malloc of %d byte failed\n", size);
2297 return(NULL);
2298 }
2299 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2300 cur = CUR_CHAR(l);
2301 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2302 if (len + 5 >= size) {
2303 size *= 2;
2304 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2305 if (buf == NULL) {
2306 xmlGenericError(xmlGenericErrorContext,
2307 "realloc of %d byte failed\n", size);
2308 ctxt->instate = (xmlParserInputState) state;
2309 return(NULL);
2310 }
2311 }
2312 count++;
2313 if (count > 50) {
2314 GROW;
2315 count = 0;
2316 }
2317 COPY_BUF(l,buf,len,cur);
2318 NEXTL(l);
2319 cur = CUR_CHAR(l);
2320 if (cur == 0) {
2321 GROW;
2322 SHRINK;
2323 cur = CUR_CHAR(l);
2324 }
2325 }
2326 buf[len] = 0;
2327 ctxt->instate = (xmlParserInputState) state;
2328 if (!IS_CHAR(cur)) {
2329 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2331 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2332 ctxt->wellFormed = 0;
2333 ctxt->disableSAX = 1;
2334 } else {
2335 NEXT;
2336 }
2337 return(buf);
2338}
2339
2340/**
2341 * xmlParsePubidLiteral:
2342 * @ctxt: an XML parser context
2343 *
2344 * parse an XML public literal
2345 *
2346 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2347 *
2348 * Returns the PubidLiteral parsed or NULL.
2349 */
2350
2351xmlChar *
2352xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2353 xmlChar *buf = NULL;
2354 int len = 0;
2355 int size = XML_PARSER_BUFFER_SIZE;
2356 xmlChar cur;
2357 xmlChar stop;
2358 int count = 0;
2359
2360 SHRINK;
2361 if (RAW == '"') {
2362 NEXT;
2363 stop = '"';
2364 } else if (RAW == '\'') {
2365 NEXT;
2366 stop = '\'';
2367 } else {
2368 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2370 ctxt->sax->error(ctxt->userData,
2371 "SystemLiteral \" or ' expected\n");
2372 ctxt->wellFormed = 0;
2373 ctxt->disableSAX = 1;
2374 return(NULL);
2375 }
2376 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2377 if (buf == NULL) {
2378 xmlGenericError(xmlGenericErrorContext,
2379 "malloc of %d byte failed\n", size);
2380 return(NULL);
2381 }
2382 cur = CUR;
2383 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2384 if (len + 1 >= size) {
2385 size *= 2;
2386 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2387 if (buf == NULL) {
2388 xmlGenericError(xmlGenericErrorContext,
2389 "realloc of %d byte failed\n", size);
2390 return(NULL);
2391 }
2392 }
2393 buf[len++] = cur;
2394 count++;
2395 if (count > 50) {
2396 GROW;
2397 count = 0;
2398 }
2399 NEXT;
2400 cur = CUR;
2401 if (cur == 0) {
2402 GROW;
2403 SHRINK;
2404 cur = CUR;
2405 }
2406 }
2407 buf[len] = 0;
2408 if (cur != stop) {
2409 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2411 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2412 ctxt->wellFormed = 0;
2413 ctxt->disableSAX = 1;
2414 } else {
2415 NEXT;
2416 }
2417 return(buf);
2418}
2419
Daniel Veillard48b2f892001-02-25 16:11:03 +00002420void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002421/**
2422 * xmlParseCharData:
2423 * @ctxt: an XML parser context
2424 * @cdata: int indicating whether we are within a CDATA section
2425 *
2426 * parse a CharData section.
2427 * if we are within a CDATA section ']]>' marks an end of section.
2428 *
2429 * The right angle bracket (>) may be represented using the string "&gt;",
2430 * and must, for compatibility, be escaped using "&gt;" or a character
2431 * reference when it appears in the string "]]>" in content, when that
2432 * string is not marking the end of a CDATA section.
2433 *
2434 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2435 */
2436
2437void
2438xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002439 const xmlChar *in;
2440 int nbchar = 0;
2441
2442 SHRINK;
2443 GROW;
2444 /*
2445 * Accelerated common case where input don't need to be
2446 * modified before passing it to the handler.
2447 */
2448 if ((ctxt->token == 0) && (!cdata)) {
2449 in = ctxt->input->cur;
2450 do {
2451 while (((*in >= 0x20) && (*in != '<') &&
2452 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2453 in++;
2454 if (*in == 0xA) {
2455 ctxt->input->line++;
2456 continue; /* while */
2457 }
2458 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002459 if (nbchar > 0) {
2460 if (IS_BLANK(*ctxt->input->cur) &&
2461 areBlanks(ctxt, ctxt->input->cur, nbchar)) {
2462 if (ctxt->sax->ignorableWhitespace != NULL)
2463 ctxt->sax->ignorableWhitespace(ctxt->userData,
2464 ctxt->input->cur, nbchar);
2465 } else {
2466 if (ctxt->sax->characters != NULL)
2467 ctxt->sax->characters(ctxt->userData,
2468 ctxt->input->cur, nbchar);
2469 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002470 }
2471 ctxt->input->cur = in;
2472 if (*in == 0xD) {
2473 in++;
2474 if (*in == 0xA) {
2475 ctxt->input->cur = in;
2476 in++;
2477 ctxt->input->line++;
2478 continue; /* while */
2479 }
2480 in--;
2481 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002482 if (*in == '<') {
2483 return;
2484 }
2485 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002486 return;
2487 }
2488 SHRINK;
2489 GROW;
2490 in = ctxt->input->cur;
2491 } while ((*in >= 0x20) && (*in <= 0x7F));
2492 nbchar = 0;
2493 }
2494 xmlParseCharDataComplex(ctxt, cdata);
2495}
2496
2497void
2498xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002499 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2500 int nbchar = 0;
2501 int cur, l;
2502 int count = 0;
2503
2504 SHRINK;
2505 GROW;
2506 cur = CUR_CHAR(l);
2507 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2508 ((cur != '&') || (ctxt->token == '&')) &&
2509 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2510 if ((cur == ']') && (NXT(1) == ']') &&
2511 (NXT(2) == '>')) {
2512 if (cdata) break;
2513 else {
2514 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2516 ctxt->sax->error(ctxt->userData,
2517 "Sequence ']]>' not allowed in content\n");
2518 /* Should this be relaxed ??? I see a "must here */
2519 ctxt->wellFormed = 0;
2520 ctxt->disableSAX = 1;
2521 }
2522 }
2523 COPY_BUF(l,buf,nbchar,cur);
2524 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2525 /*
2526 * Ok the segment is to be consumed as chars.
2527 */
2528 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2529 if (areBlanks(ctxt, buf, nbchar)) {
2530 if (ctxt->sax->ignorableWhitespace != NULL)
2531 ctxt->sax->ignorableWhitespace(ctxt->userData,
2532 buf, nbchar);
2533 } else {
2534 if (ctxt->sax->characters != NULL)
2535 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2536 }
2537 }
2538 nbchar = 0;
2539 }
2540 count++;
2541 if (count > 50) {
2542 GROW;
2543 count = 0;
2544 }
2545 NEXTL(l);
2546 cur = CUR_CHAR(l);
2547 }
2548 if (nbchar != 0) {
2549 /*
2550 * Ok the segment is to be consumed as chars.
2551 */
2552 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2553 if (areBlanks(ctxt, buf, nbchar)) {
2554 if (ctxt->sax->ignorableWhitespace != NULL)
2555 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2556 } else {
2557 if (ctxt->sax->characters != NULL)
2558 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2559 }
2560 }
2561 }
2562}
2563
2564/**
2565 * xmlParseExternalID:
2566 * @ctxt: an XML parser context
2567 * @publicID: a xmlChar** receiving PubidLiteral
2568 * @strict: indicate whether we should restrict parsing to only
2569 * production [75], see NOTE below
2570 *
2571 * Parse an External ID or a Public ID
2572 *
2573 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2574 * 'PUBLIC' S PubidLiteral S SystemLiteral
2575 *
2576 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2577 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2578 *
2579 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2580 *
2581 * Returns the function returns SystemLiteral and in the second
2582 * case publicID receives PubidLiteral, is strict is off
2583 * it is possible to return NULL and have publicID set.
2584 */
2585
2586xmlChar *
2587xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2588 xmlChar *URI = NULL;
2589
2590 SHRINK;
2591 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2592 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2593 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2594 SKIP(6);
2595 if (!IS_BLANK(CUR)) {
2596 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2598 ctxt->sax->error(ctxt->userData,
2599 "Space required after 'SYSTEM'\n");
2600 ctxt->wellFormed = 0;
2601 ctxt->disableSAX = 1;
2602 }
2603 SKIP_BLANKS;
2604 URI = xmlParseSystemLiteral(ctxt);
2605 if (URI == NULL) {
2606 ctxt->errNo = XML_ERR_URI_REQUIRED;
2607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2608 ctxt->sax->error(ctxt->userData,
2609 "xmlParseExternalID: SYSTEM, no URI\n");
2610 ctxt->wellFormed = 0;
2611 ctxt->disableSAX = 1;
2612 }
2613 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2614 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2615 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2616 SKIP(6);
2617 if (!IS_BLANK(CUR)) {
2618 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2620 ctxt->sax->error(ctxt->userData,
2621 "Space required after 'PUBLIC'\n");
2622 ctxt->wellFormed = 0;
2623 ctxt->disableSAX = 1;
2624 }
2625 SKIP_BLANKS;
2626 *publicID = xmlParsePubidLiteral(ctxt);
2627 if (*publicID == NULL) {
2628 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2630 ctxt->sax->error(ctxt->userData,
2631 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2632 ctxt->wellFormed = 0;
2633 ctxt->disableSAX = 1;
2634 }
2635 if (strict) {
2636 /*
2637 * We don't handle [83] so "S SystemLiteral" is required.
2638 */
2639 if (!IS_BLANK(CUR)) {
2640 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2642 ctxt->sax->error(ctxt->userData,
2643 "Space required after the Public Identifier\n");
2644 ctxt->wellFormed = 0;
2645 ctxt->disableSAX = 1;
2646 }
2647 } else {
2648 /*
2649 * We handle [83] so we return immediately, if
2650 * "S SystemLiteral" is not detected. From a purely parsing
2651 * point of view that's a nice mess.
2652 */
2653 const xmlChar *ptr;
2654 GROW;
2655
2656 ptr = CUR_PTR;
2657 if (!IS_BLANK(*ptr)) return(NULL);
2658
2659 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2660 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2661 }
2662 SKIP_BLANKS;
2663 URI = xmlParseSystemLiteral(ctxt);
2664 if (URI == NULL) {
2665 ctxt->errNo = XML_ERR_URI_REQUIRED;
2666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2667 ctxt->sax->error(ctxt->userData,
2668 "xmlParseExternalID: PUBLIC, no URI\n");
2669 ctxt->wellFormed = 0;
2670 ctxt->disableSAX = 1;
2671 }
2672 }
2673 return(URI);
2674}
2675
2676/**
2677 * xmlParseComment:
2678 * @ctxt: an XML parser context
2679 *
2680 * Skip an XML (SGML) comment <!-- .... -->
2681 * The spec says that "For compatibility, the string "--" (double-hyphen)
2682 * must not occur within comments. "
2683 *
2684 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2685 */
2686void
2687xmlParseComment(xmlParserCtxtPtr ctxt) {
2688 xmlChar *buf = NULL;
2689 int len;
2690 int size = XML_PARSER_BUFFER_SIZE;
2691 int q, ql;
2692 int r, rl;
2693 int cur, l;
2694 xmlParserInputState state;
2695 xmlParserInputPtr input = ctxt->input;
2696 int count = 0;
2697
2698 /*
2699 * Check that there is a comment right here.
2700 */
2701 if ((RAW != '<') || (NXT(1) != '!') ||
2702 (NXT(2) != '-') || (NXT(3) != '-')) return;
2703
2704 state = ctxt->instate;
2705 ctxt->instate = XML_PARSER_COMMENT;
2706 SHRINK;
2707 SKIP(4);
2708 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2709 if (buf == NULL) {
2710 xmlGenericError(xmlGenericErrorContext,
2711 "malloc of %d byte failed\n", size);
2712 ctxt->instate = state;
2713 return;
2714 }
2715 q = CUR_CHAR(ql);
2716 NEXTL(ql);
2717 r = CUR_CHAR(rl);
2718 NEXTL(rl);
2719 cur = CUR_CHAR(l);
2720 len = 0;
2721 while (IS_CHAR(cur) && /* checked */
2722 ((cur != '>') ||
2723 (r != '-') || (q != '-'))) {
2724 if ((r == '-') && (q == '-') && (len > 1)) {
2725 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2727 ctxt->sax->error(ctxt->userData,
2728 "Comment must not contain '--' (double-hyphen)`\n");
2729 ctxt->wellFormed = 0;
2730 ctxt->disableSAX = 1;
2731 }
2732 if (len + 5 >= size) {
2733 size *= 2;
2734 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2735 if (buf == NULL) {
2736 xmlGenericError(xmlGenericErrorContext,
2737 "realloc of %d byte failed\n", size);
2738 ctxt->instate = state;
2739 return;
2740 }
2741 }
2742 COPY_BUF(ql,buf,len,q);
2743 q = r;
2744 ql = rl;
2745 r = cur;
2746 rl = l;
2747
2748 count++;
2749 if (count > 50) {
2750 GROW;
2751 count = 0;
2752 }
2753 NEXTL(l);
2754 cur = CUR_CHAR(l);
2755 if (cur == 0) {
2756 SHRINK;
2757 GROW;
2758 cur = CUR_CHAR(l);
2759 }
2760 }
2761 buf[len] = 0;
2762 if (!IS_CHAR(cur)) {
2763 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2765 ctxt->sax->error(ctxt->userData,
2766 "Comment not terminated \n<!--%.50s\n", buf);
2767 ctxt->wellFormed = 0;
2768 ctxt->disableSAX = 1;
2769 xmlFree(buf);
2770 } else {
2771 if (input != ctxt->input) {
2772 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2774 ctxt->sax->error(ctxt->userData,
2775"Comment doesn't start and stop in the same entity\n");
2776 ctxt->wellFormed = 0;
2777 ctxt->disableSAX = 1;
2778 }
2779 NEXT;
2780 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2781 (!ctxt->disableSAX))
2782 ctxt->sax->comment(ctxt->userData, buf);
2783 xmlFree(buf);
2784 }
2785 ctxt->instate = state;
2786}
2787
2788/**
2789 * xmlParsePITarget:
2790 * @ctxt: an XML parser context
2791 *
2792 * parse the name of a PI
2793 *
2794 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2795 *
2796 * Returns the PITarget name or NULL
2797 */
2798
2799xmlChar *
2800xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2801 xmlChar *name;
2802
2803 name = xmlParseName(ctxt);
2804 if ((name != NULL) &&
2805 ((name[0] == 'x') || (name[0] == 'X')) &&
2806 ((name[1] == 'm') || (name[1] == 'M')) &&
2807 ((name[2] == 'l') || (name[2] == 'L'))) {
2808 int i;
2809 if ((name[0] == 'x') && (name[1] == 'm') &&
2810 (name[2] == 'l') && (name[3] == 0)) {
2811 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2813 ctxt->sax->error(ctxt->userData,
2814 "XML declaration allowed only at the start of the document\n");
2815 ctxt->wellFormed = 0;
2816 ctxt->disableSAX = 1;
2817 return(name);
2818 } else if (name[3] == 0) {
2819 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2821 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2822 ctxt->wellFormed = 0;
2823 ctxt->disableSAX = 1;
2824 return(name);
2825 }
2826 for (i = 0;;i++) {
2827 if (xmlW3CPIs[i] == NULL) break;
2828 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2829 return(name);
2830 }
2831 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2832 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2833 ctxt->sax->warning(ctxt->userData,
2834 "xmlParsePItarget: invalid name prefix 'xml'\n");
2835 }
2836 }
2837 return(name);
2838}
2839
2840/**
2841 * xmlParsePI:
2842 * @ctxt: an XML parser context
2843 *
2844 * parse an XML Processing Instruction.
2845 *
2846 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2847 *
2848 * The processing is transfered to SAX once parsed.
2849 */
2850
2851void
2852xmlParsePI(xmlParserCtxtPtr ctxt) {
2853 xmlChar *buf = NULL;
2854 int len = 0;
2855 int size = XML_PARSER_BUFFER_SIZE;
2856 int cur, l;
2857 xmlChar *target;
2858 xmlParserInputState state;
2859 int count = 0;
2860
2861 if ((RAW == '<') && (NXT(1) == '?')) {
2862 xmlParserInputPtr input = ctxt->input;
2863 state = ctxt->instate;
2864 ctxt->instate = XML_PARSER_PI;
2865 /*
2866 * this is a Processing Instruction.
2867 */
2868 SKIP(2);
2869 SHRINK;
2870
2871 /*
2872 * Parse the target name and check for special support like
2873 * namespace.
2874 */
2875 target = xmlParsePITarget(ctxt);
2876 if (target != NULL) {
2877 if ((RAW == '?') && (NXT(1) == '>')) {
2878 if (input != ctxt->input) {
2879 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2881 ctxt->sax->error(ctxt->userData,
2882 "PI declaration doesn't start and stop in the same entity\n");
2883 ctxt->wellFormed = 0;
2884 ctxt->disableSAX = 1;
2885 }
2886 SKIP(2);
2887
2888 /*
2889 * SAX: PI detected.
2890 */
2891 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2892 (ctxt->sax->processingInstruction != NULL))
2893 ctxt->sax->processingInstruction(ctxt->userData,
2894 target, NULL);
2895 ctxt->instate = state;
2896 xmlFree(target);
2897 return;
2898 }
2899 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2900 if (buf == NULL) {
2901 xmlGenericError(xmlGenericErrorContext,
2902 "malloc of %d byte failed\n", size);
2903 ctxt->instate = state;
2904 return;
2905 }
2906 cur = CUR;
2907 if (!IS_BLANK(cur)) {
2908 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2910 ctxt->sax->error(ctxt->userData,
2911 "xmlParsePI: PI %s space expected\n", target);
2912 ctxt->wellFormed = 0;
2913 ctxt->disableSAX = 1;
2914 }
2915 SKIP_BLANKS;
2916 cur = CUR_CHAR(l);
2917 while (IS_CHAR(cur) && /* checked */
2918 ((cur != '?') || (NXT(1) != '>'))) {
2919 if (len + 5 >= size) {
2920 size *= 2;
2921 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2922 if (buf == NULL) {
2923 xmlGenericError(xmlGenericErrorContext,
2924 "realloc of %d byte failed\n", size);
2925 ctxt->instate = state;
2926 return;
2927 }
2928 }
2929 count++;
2930 if (count > 50) {
2931 GROW;
2932 count = 0;
2933 }
2934 COPY_BUF(l,buf,len,cur);
2935 NEXTL(l);
2936 cur = CUR_CHAR(l);
2937 if (cur == 0) {
2938 SHRINK;
2939 GROW;
2940 cur = CUR_CHAR(l);
2941 }
2942 }
2943 buf[len] = 0;
2944 if (cur != '?') {
2945 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
2946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2947 ctxt->sax->error(ctxt->userData,
2948 "xmlParsePI: PI %s never end ...\n", target);
2949 ctxt->wellFormed = 0;
2950 ctxt->disableSAX = 1;
2951 } else {
2952 if (input != ctxt->input) {
2953 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2955 ctxt->sax->error(ctxt->userData,
2956 "PI declaration doesn't start and stop in the same entity\n");
2957 ctxt->wellFormed = 0;
2958 ctxt->disableSAX = 1;
2959 }
2960 SKIP(2);
2961
2962 /*
2963 * SAX: PI detected.
2964 */
2965 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2966 (ctxt->sax->processingInstruction != NULL))
2967 ctxt->sax->processingInstruction(ctxt->userData,
2968 target, buf);
2969 }
2970 xmlFree(buf);
2971 xmlFree(target);
2972 } else {
2973 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
2974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2975 ctxt->sax->error(ctxt->userData,
2976 "xmlParsePI : no target name\n");
2977 ctxt->wellFormed = 0;
2978 ctxt->disableSAX = 1;
2979 }
2980 ctxt->instate = state;
2981 }
2982}
2983
2984/**
2985 * xmlParseNotationDecl:
2986 * @ctxt: an XML parser context
2987 *
2988 * parse a notation declaration
2989 *
2990 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2991 *
2992 * Hence there is actually 3 choices:
2993 * 'PUBLIC' S PubidLiteral
2994 * 'PUBLIC' S PubidLiteral S SystemLiteral
2995 * and 'SYSTEM' S SystemLiteral
2996 *
2997 * See the NOTE on xmlParseExternalID().
2998 */
2999
3000void
3001xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3002 xmlChar *name;
3003 xmlChar *Pubid;
3004 xmlChar *Systemid;
3005
3006 if ((RAW == '<') && (NXT(1) == '!') &&
3007 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3008 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3009 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3010 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3011 xmlParserInputPtr input = ctxt->input;
3012 SHRINK;
3013 SKIP(10);
3014 if (!IS_BLANK(CUR)) {
3015 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3017 ctxt->sax->error(ctxt->userData,
3018 "Space required after '<!NOTATION'\n");
3019 ctxt->wellFormed = 0;
3020 ctxt->disableSAX = 1;
3021 return;
3022 }
3023 SKIP_BLANKS;
3024
Daniel Veillard29631a82001-03-05 09:49:20 +00003025 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003026 if (name == NULL) {
3027 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3029 ctxt->sax->error(ctxt->userData,
3030 "NOTATION: Name expected here\n");
3031 ctxt->wellFormed = 0;
3032 ctxt->disableSAX = 1;
3033 return;
3034 }
3035 if (!IS_BLANK(CUR)) {
3036 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3038 ctxt->sax->error(ctxt->userData,
3039 "Space required after the NOTATION name'\n");
3040 ctxt->wellFormed = 0;
3041 ctxt->disableSAX = 1;
3042 return;
3043 }
3044 SKIP_BLANKS;
3045
3046 /*
3047 * Parse the IDs.
3048 */
3049 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3050 SKIP_BLANKS;
3051
3052 if (RAW == '>') {
3053 if (input != ctxt->input) {
3054 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3055 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3056 ctxt->sax->error(ctxt->userData,
3057"Notation declaration doesn't start and stop in the same entity\n");
3058 ctxt->wellFormed = 0;
3059 ctxt->disableSAX = 1;
3060 }
3061 NEXT;
3062 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3063 (ctxt->sax->notationDecl != NULL))
3064 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3065 } else {
3066 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3067 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3068 ctxt->sax->error(ctxt->userData,
3069 "'>' required to close NOTATION declaration\n");
3070 ctxt->wellFormed = 0;
3071 ctxt->disableSAX = 1;
3072 }
3073 xmlFree(name);
3074 if (Systemid != NULL) xmlFree(Systemid);
3075 if (Pubid != NULL) xmlFree(Pubid);
3076 }
3077}
3078
3079/**
3080 * xmlParseEntityDecl:
3081 * @ctxt: an XML parser context
3082 *
3083 * parse <!ENTITY declarations
3084 *
3085 * [70] EntityDecl ::= GEDecl | PEDecl
3086 *
3087 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3088 *
3089 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3090 *
3091 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3092 *
3093 * [74] PEDef ::= EntityValue | ExternalID
3094 *
3095 * [76] NDataDecl ::= S 'NDATA' S Name
3096 *
3097 * [ VC: Notation Declared ]
3098 * The Name must match the declared name of a notation.
3099 */
3100
3101void
3102xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3103 xmlChar *name = NULL;
3104 xmlChar *value = NULL;
3105 xmlChar *URI = NULL, *literal = NULL;
3106 xmlChar *ndata = NULL;
3107 int isParameter = 0;
3108 xmlChar *orig = NULL;
3109
3110 GROW;
3111 if ((RAW == '<') && (NXT(1) == '!') &&
3112 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3113 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3114 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3115 xmlParserInputPtr input = ctxt->input;
3116 ctxt->instate = XML_PARSER_ENTITY_DECL;
3117 SHRINK;
3118 SKIP(8);
3119 if (!IS_BLANK(CUR)) {
3120 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3122 ctxt->sax->error(ctxt->userData,
3123 "Space required after '<!ENTITY'\n");
3124 ctxt->wellFormed = 0;
3125 ctxt->disableSAX = 1;
3126 }
3127 SKIP_BLANKS;
3128
3129 if (RAW == '%') {
3130 NEXT;
3131 if (!IS_BLANK(CUR)) {
3132 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3133 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3134 ctxt->sax->error(ctxt->userData,
3135 "Space required after '%'\n");
3136 ctxt->wellFormed = 0;
3137 ctxt->disableSAX = 1;
3138 }
3139 SKIP_BLANKS;
3140 isParameter = 1;
3141 }
3142
Daniel Veillard29631a82001-03-05 09:49:20 +00003143 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003144 if (name == NULL) {
3145 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3146 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3147 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3148 ctxt->wellFormed = 0;
3149 ctxt->disableSAX = 1;
3150 return;
3151 }
3152 if (!IS_BLANK(CUR)) {
3153 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3154 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3155 ctxt->sax->error(ctxt->userData,
3156 "Space required after the entity name\n");
3157 ctxt->wellFormed = 0;
3158 ctxt->disableSAX = 1;
3159 }
3160 SKIP_BLANKS;
3161
3162 /*
3163 * handle the various case of definitions...
3164 */
3165 if (isParameter) {
3166 if ((RAW == '"') || (RAW == '\'')) {
3167 value = xmlParseEntityValue(ctxt, &orig);
3168 if (value) {
3169 if ((ctxt->sax != NULL) &&
3170 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3171 ctxt->sax->entityDecl(ctxt->userData, name,
3172 XML_INTERNAL_PARAMETER_ENTITY,
3173 NULL, NULL, value);
3174 }
3175 } else {
3176 URI = xmlParseExternalID(ctxt, &literal, 1);
3177 if ((URI == NULL) && (literal == NULL)) {
3178 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3179 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3180 ctxt->sax->error(ctxt->userData,
3181 "Entity value required\n");
3182 ctxt->wellFormed = 0;
3183 ctxt->disableSAX = 1;
3184 }
3185 if (URI) {
3186 xmlURIPtr uri;
3187
3188 uri = xmlParseURI((const char *) URI);
3189 if (uri == NULL) {
3190 ctxt->errNo = XML_ERR_INVALID_URI;
3191 if ((ctxt->sax != NULL) &&
3192 (!ctxt->disableSAX) &&
3193 (ctxt->sax->error != NULL))
3194 ctxt->sax->error(ctxt->userData,
3195 "Invalid URI: %s\n", URI);
3196 ctxt->wellFormed = 0;
3197 } else {
3198 if (uri->fragment != NULL) {
3199 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3200 if ((ctxt->sax != NULL) &&
3201 (!ctxt->disableSAX) &&
3202 (ctxt->sax->error != NULL))
3203 ctxt->sax->error(ctxt->userData,
3204 "Fragment not allowed: %s\n", URI);
3205 ctxt->wellFormed = 0;
3206 } else {
3207 if ((ctxt->sax != NULL) &&
3208 (!ctxt->disableSAX) &&
3209 (ctxt->sax->entityDecl != NULL))
3210 ctxt->sax->entityDecl(ctxt->userData, name,
3211 XML_EXTERNAL_PARAMETER_ENTITY,
3212 literal, URI, NULL);
3213 }
3214 xmlFreeURI(uri);
3215 }
3216 }
3217 }
3218 } else {
3219 if ((RAW == '"') || (RAW == '\'')) {
3220 value = xmlParseEntityValue(ctxt, &orig);
3221 if ((ctxt->sax != NULL) &&
3222 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3223 ctxt->sax->entityDecl(ctxt->userData, name,
3224 XML_INTERNAL_GENERAL_ENTITY,
3225 NULL, NULL, value);
3226 } else {
3227 URI = xmlParseExternalID(ctxt, &literal, 1);
3228 if ((URI == NULL) && (literal == NULL)) {
3229 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3231 ctxt->sax->error(ctxt->userData,
3232 "Entity value required\n");
3233 ctxt->wellFormed = 0;
3234 ctxt->disableSAX = 1;
3235 }
3236 if (URI) {
3237 xmlURIPtr uri;
3238
3239 uri = xmlParseURI((const char *)URI);
3240 if (uri == NULL) {
3241 ctxt->errNo = XML_ERR_INVALID_URI;
3242 if ((ctxt->sax != NULL) &&
3243 (!ctxt->disableSAX) &&
3244 (ctxt->sax->error != NULL))
3245 ctxt->sax->error(ctxt->userData,
3246 "Invalid URI: %s\n", URI);
3247 ctxt->wellFormed = 0;
3248 } else {
3249 if (uri->fragment != NULL) {
3250 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3251 if ((ctxt->sax != NULL) &&
3252 (!ctxt->disableSAX) &&
3253 (ctxt->sax->error != NULL))
3254 ctxt->sax->error(ctxt->userData,
3255 "Fragment not allowed: %s\n", URI);
3256 ctxt->wellFormed = 0;
3257 }
3258 xmlFreeURI(uri);
3259 }
3260 }
3261 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3262 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3263 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3264 ctxt->sax->error(ctxt->userData,
3265 "Space required before 'NDATA'\n");
3266 ctxt->wellFormed = 0;
3267 ctxt->disableSAX = 1;
3268 }
3269 SKIP_BLANKS;
3270 if ((RAW == 'N') && (NXT(1) == 'D') &&
3271 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3272 (NXT(4) == 'A')) {
3273 SKIP(5);
3274 if (!IS_BLANK(CUR)) {
3275 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3277 ctxt->sax->error(ctxt->userData,
3278 "Space required after 'NDATA'\n");
3279 ctxt->wellFormed = 0;
3280 ctxt->disableSAX = 1;
3281 }
3282 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003283 ndata = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003284 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3285 (ctxt->sax->unparsedEntityDecl != NULL))
3286 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3287 literal, URI, ndata);
3288 } else {
3289 if ((ctxt->sax != NULL) &&
3290 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3291 ctxt->sax->entityDecl(ctxt->userData, name,
3292 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3293 literal, URI, NULL);
3294 }
3295 }
3296 }
3297 SKIP_BLANKS;
3298 if (RAW != '>') {
3299 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3301 ctxt->sax->error(ctxt->userData,
3302 "xmlParseEntityDecl: entity %s not terminated\n", name);
3303 ctxt->wellFormed = 0;
3304 ctxt->disableSAX = 1;
3305 } else {
3306 if (input != ctxt->input) {
3307 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3308 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3309 ctxt->sax->error(ctxt->userData,
3310"Entity declaration doesn't start and stop in the same entity\n");
3311 ctxt->wellFormed = 0;
3312 ctxt->disableSAX = 1;
3313 }
3314 NEXT;
3315 }
3316 if (orig != NULL) {
3317 /*
3318 * Ugly mechanism to save the raw entity value.
3319 */
3320 xmlEntityPtr cur = NULL;
3321
3322 if (isParameter) {
3323 if ((ctxt->sax != NULL) &&
3324 (ctxt->sax->getParameterEntity != NULL))
3325 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3326 } else {
3327 if ((ctxt->sax != NULL) &&
3328 (ctxt->sax->getEntity != NULL))
3329 cur = ctxt->sax->getEntity(ctxt->userData, name);
3330 }
3331 if (cur != NULL) {
3332 if (cur->orig != NULL)
3333 xmlFree(orig);
3334 else
3335 cur->orig = orig;
3336 } else
3337 xmlFree(orig);
3338 }
3339 if (name != NULL) xmlFree(name);
3340 if (value != NULL) xmlFree(value);
3341 if (URI != NULL) xmlFree(URI);
3342 if (literal != NULL) xmlFree(literal);
3343 if (ndata != NULL) xmlFree(ndata);
3344 }
3345}
3346
3347/**
3348 * xmlParseDefaultDecl:
3349 * @ctxt: an XML parser context
3350 * @value: Receive a possible fixed default value for the attribute
3351 *
3352 * Parse an attribute default declaration
3353 *
3354 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3355 *
3356 * [ VC: Required Attribute ]
3357 * if the default declaration is the keyword #REQUIRED, then the
3358 * attribute must be specified for all elements of the type in the
3359 * attribute-list declaration.
3360 *
3361 * [ VC: Attribute Default Legal ]
3362 * The declared default value must meet the lexical constraints of
3363 * the declared attribute type c.f. xmlValidateAttributeDecl()
3364 *
3365 * [ VC: Fixed Attribute Default ]
3366 * if an attribute has a default value declared with the #FIXED
3367 * keyword, instances of that attribute must match the default value.
3368 *
3369 * [ WFC: No < in Attribute Values ]
3370 * handled in xmlParseAttValue()
3371 *
3372 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3373 * or XML_ATTRIBUTE_FIXED.
3374 */
3375
3376int
3377xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3378 int val;
3379 xmlChar *ret;
3380
3381 *value = NULL;
3382 if ((RAW == '#') && (NXT(1) == 'R') &&
3383 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3384 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3385 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3386 (NXT(8) == 'D')) {
3387 SKIP(9);
3388 return(XML_ATTRIBUTE_REQUIRED);
3389 }
3390 if ((RAW == '#') && (NXT(1) == 'I') &&
3391 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3392 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3393 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3394 SKIP(8);
3395 return(XML_ATTRIBUTE_IMPLIED);
3396 }
3397 val = XML_ATTRIBUTE_NONE;
3398 if ((RAW == '#') && (NXT(1) == 'F') &&
3399 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3400 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3401 SKIP(6);
3402 val = XML_ATTRIBUTE_FIXED;
3403 if (!IS_BLANK(CUR)) {
3404 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3406 ctxt->sax->error(ctxt->userData,
3407 "Space required after '#FIXED'\n");
3408 ctxt->wellFormed = 0;
3409 ctxt->disableSAX = 1;
3410 }
3411 SKIP_BLANKS;
3412 }
3413 ret = xmlParseAttValue(ctxt);
3414 ctxt->instate = XML_PARSER_DTD;
3415 if (ret == NULL) {
3416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3417 ctxt->sax->error(ctxt->userData,
3418 "Attribute default value declaration error\n");
3419 ctxt->wellFormed = 0;
3420 ctxt->disableSAX = 1;
3421 } else
3422 *value = ret;
3423 return(val);
3424}
3425
3426/**
3427 * xmlParseNotationType:
3428 * @ctxt: an XML parser context
3429 *
3430 * parse an Notation attribute type.
3431 *
3432 * Note: the leading 'NOTATION' S part has already being parsed...
3433 *
3434 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3435 *
3436 * [ VC: Notation Attributes ]
3437 * Values of this type must match one of the notation names included
3438 * in the declaration; all notation names in the declaration must be declared.
3439 *
3440 * Returns: the notation attribute tree built while parsing
3441 */
3442
3443xmlEnumerationPtr
3444xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3445 xmlChar *name;
3446 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3447
3448 if (RAW != '(') {
3449 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3450 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3451 ctxt->sax->error(ctxt->userData,
3452 "'(' required to start 'NOTATION'\n");
3453 ctxt->wellFormed = 0;
3454 ctxt->disableSAX = 1;
3455 return(NULL);
3456 }
3457 SHRINK;
3458 do {
3459 NEXT;
3460 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003461 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003462 if (name == NULL) {
3463 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3464 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3465 ctxt->sax->error(ctxt->userData,
3466 "Name expected in NOTATION declaration\n");
3467 ctxt->wellFormed = 0;
3468 ctxt->disableSAX = 1;
3469 return(ret);
3470 }
3471 cur = xmlCreateEnumeration(name);
3472 xmlFree(name);
3473 if (cur == NULL) return(ret);
3474 if (last == NULL) ret = last = cur;
3475 else {
3476 last->next = cur;
3477 last = cur;
3478 }
3479 SKIP_BLANKS;
3480 } while (RAW == '|');
3481 if (RAW != ')') {
3482 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3484 ctxt->sax->error(ctxt->userData,
3485 "')' required to finish NOTATION declaration\n");
3486 ctxt->wellFormed = 0;
3487 ctxt->disableSAX = 1;
3488 if ((last != NULL) && (last != ret))
3489 xmlFreeEnumeration(last);
3490 return(ret);
3491 }
3492 NEXT;
3493 return(ret);
3494}
3495
3496/**
3497 * xmlParseEnumerationType:
3498 * @ctxt: an XML parser context
3499 *
3500 * parse an Enumeration attribute type.
3501 *
3502 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3503 *
3504 * [ VC: Enumeration ]
3505 * Values of this type must match one of the Nmtoken tokens in
3506 * the declaration
3507 *
3508 * Returns: the enumeration attribute tree built while parsing
3509 */
3510
3511xmlEnumerationPtr
3512xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3513 xmlChar *name;
3514 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3515
3516 if (RAW != '(') {
3517 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3518 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3519 ctxt->sax->error(ctxt->userData,
3520 "'(' required to start ATTLIST enumeration\n");
3521 ctxt->wellFormed = 0;
3522 ctxt->disableSAX = 1;
3523 return(NULL);
3524 }
3525 SHRINK;
3526 do {
3527 NEXT;
3528 SKIP_BLANKS;
3529 name = xmlParseNmtoken(ctxt);
3530 if (name == NULL) {
3531 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3533 ctxt->sax->error(ctxt->userData,
3534 "NmToken expected in ATTLIST enumeration\n");
3535 ctxt->wellFormed = 0;
3536 ctxt->disableSAX = 1;
3537 return(ret);
3538 }
3539 cur = xmlCreateEnumeration(name);
3540 xmlFree(name);
3541 if (cur == NULL) return(ret);
3542 if (last == NULL) ret = last = cur;
3543 else {
3544 last->next = cur;
3545 last = cur;
3546 }
3547 SKIP_BLANKS;
3548 } while (RAW == '|');
3549 if (RAW != ')') {
3550 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3552 ctxt->sax->error(ctxt->userData,
3553 "')' required to finish ATTLIST enumeration\n");
3554 ctxt->wellFormed = 0;
3555 ctxt->disableSAX = 1;
3556 return(ret);
3557 }
3558 NEXT;
3559 return(ret);
3560}
3561
3562/**
3563 * xmlParseEnumeratedType:
3564 * @ctxt: an XML parser context
3565 * @tree: the enumeration tree built while parsing
3566 *
3567 * parse an Enumerated attribute type.
3568 *
3569 * [57] EnumeratedType ::= NotationType | Enumeration
3570 *
3571 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3572 *
3573 *
3574 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3575 */
3576
3577int
3578xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3579 if ((RAW == 'N') && (NXT(1) == 'O') &&
3580 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3581 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3582 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3583 SKIP(8);
3584 if (!IS_BLANK(CUR)) {
3585 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3587 ctxt->sax->error(ctxt->userData,
3588 "Space required after 'NOTATION'\n");
3589 ctxt->wellFormed = 0;
3590 ctxt->disableSAX = 1;
3591 return(0);
3592 }
3593 SKIP_BLANKS;
3594 *tree = xmlParseNotationType(ctxt);
3595 if (*tree == NULL) return(0);
3596 return(XML_ATTRIBUTE_NOTATION);
3597 }
3598 *tree = xmlParseEnumerationType(ctxt);
3599 if (*tree == NULL) return(0);
3600 return(XML_ATTRIBUTE_ENUMERATION);
3601}
3602
3603/**
3604 * xmlParseAttributeType:
3605 * @ctxt: an XML parser context
3606 * @tree: the enumeration tree built while parsing
3607 *
3608 * parse the Attribute list def for an element
3609 *
3610 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3611 *
3612 * [55] StringType ::= 'CDATA'
3613 *
3614 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3615 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3616 *
3617 * Validity constraints for attribute values syntax are checked in
3618 * xmlValidateAttributeValue()
3619 *
3620 * [ VC: ID ]
3621 * Values of type ID must match the Name production. A name must not
3622 * appear more than once in an XML document as a value of this type;
3623 * i.e., ID values must uniquely identify the elements which bear them.
3624 *
3625 * [ VC: One ID per Element Type ]
3626 * No element type may have more than one ID attribute specified.
3627 *
3628 * [ VC: ID Attribute Default ]
3629 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3630 *
3631 * [ VC: IDREF ]
3632 * Values of type IDREF must match the Name production, and values
3633 * of type IDREFS must match Names; each IDREF Name must match the value
3634 * of an ID attribute on some element in the XML document; i.e. IDREF
3635 * values must match the value of some ID attribute.
3636 *
3637 * [ VC: Entity Name ]
3638 * Values of type ENTITY must match the Name production, values
3639 * of type ENTITIES must match Names; each Entity Name must match the
3640 * name of an unparsed entity declared in the DTD.
3641 *
3642 * [ VC: Name Token ]
3643 * Values of type NMTOKEN must match the Nmtoken production; values
3644 * of type NMTOKENS must match Nmtokens.
3645 *
3646 * Returns the attribute type
3647 */
3648int
3649xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3650 SHRINK;
3651 if ((RAW == 'C') && (NXT(1) == 'D') &&
3652 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3653 (NXT(4) == 'A')) {
3654 SKIP(5);
3655 return(XML_ATTRIBUTE_CDATA);
3656 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3657 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3658 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3659 SKIP(6);
3660 return(XML_ATTRIBUTE_IDREFS);
3661 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3662 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3663 (NXT(4) == 'F')) {
3664 SKIP(5);
3665 return(XML_ATTRIBUTE_IDREF);
3666 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3667 SKIP(2);
3668 return(XML_ATTRIBUTE_ID);
3669 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3670 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3671 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3672 SKIP(6);
3673 return(XML_ATTRIBUTE_ENTITY);
3674 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3675 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3676 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3677 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3678 SKIP(8);
3679 return(XML_ATTRIBUTE_ENTITIES);
3680 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3681 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3682 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3683 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3684 SKIP(8);
3685 return(XML_ATTRIBUTE_NMTOKENS);
3686 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3687 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3688 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3689 (NXT(6) == 'N')) {
3690 SKIP(7);
3691 return(XML_ATTRIBUTE_NMTOKEN);
3692 }
3693 return(xmlParseEnumeratedType(ctxt, tree));
3694}
3695
3696/**
3697 * xmlParseAttributeListDecl:
3698 * @ctxt: an XML parser context
3699 *
3700 * : parse the Attribute list def for an element
3701 *
3702 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3703 *
3704 * [53] AttDef ::= S Name S AttType S DefaultDecl
3705 *
3706 */
3707void
3708xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3709 xmlChar *elemName;
3710 xmlChar *attrName;
3711 xmlEnumerationPtr tree;
3712
3713 if ((RAW == '<') && (NXT(1) == '!') &&
3714 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3715 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3716 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3717 (NXT(8) == 'T')) {
3718 xmlParserInputPtr input = ctxt->input;
3719
3720 SKIP(9);
3721 if (!IS_BLANK(CUR)) {
3722 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3724 ctxt->sax->error(ctxt->userData,
3725 "Space required after '<!ATTLIST'\n");
3726 ctxt->wellFormed = 0;
3727 ctxt->disableSAX = 1;
3728 }
3729 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003730 elemName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003731 if (elemName == NULL) {
3732 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3734 ctxt->sax->error(ctxt->userData,
3735 "ATTLIST: no name for Element\n");
3736 ctxt->wellFormed = 0;
3737 ctxt->disableSAX = 1;
3738 return;
3739 }
3740 SKIP_BLANKS;
3741 GROW;
3742 while (RAW != '>') {
3743 const xmlChar *check = CUR_PTR;
3744 int type;
3745 int def;
3746 xmlChar *defaultValue = NULL;
3747
3748 GROW;
3749 tree = NULL;
Daniel Veillard29631a82001-03-05 09:49:20 +00003750 attrName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003751 if (attrName == NULL) {
3752 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3753 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3754 ctxt->sax->error(ctxt->userData,
3755 "ATTLIST: no name for Attribute\n");
3756 ctxt->wellFormed = 0;
3757 ctxt->disableSAX = 1;
3758 break;
3759 }
3760 GROW;
3761 if (!IS_BLANK(CUR)) {
3762 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3764 ctxt->sax->error(ctxt->userData,
3765 "Space required after the attribute name\n");
3766 ctxt->wellFormed = 0;
3767 ctxt->disableSAX = 1;
3768 if (attrName != NULL)
3769 xmlFree(attrName);
3770 if (defaultValue != NULL)
3771 xmlFree(defaultValue);
3772 break;
3773 }
3774 SKIP_BLANKS;
3775
3776 type = xmlParseAttributeType(ctxt, &tree);
3777 if (type <= 0) {
3778 if (attrName != NULL)
3779 xmlFree(attrName);
3780 if (defaultValue != NULL)
3781 xmlFree(defaultValue);
3782 break;
3783 }
3784
3785 GROW;
3786 if (!IS_BLANK(CUR)) {
3787 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3789 ctxt->sax->error(ctxt->userData,
3790 "Space required after the attribute type\n");
3791 ctxt->wellFormed = 0;
3792 ctxt->disableSAX = 1;
3793 if (attrName != NULL)
3794 xmlFree(attrName);
3795 if (defaultValue != NULL)
3796 xmlFree(defaultValue);
3797 if (tree != NULL)
3798 xmlFreeEnumeration(tree);
3799 break;
3800 }
3801 SKIP_BLANKS;
3802
3803 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3804 if (def <= 0) {
3805 if (attrName != NULL)
3806 xmlFree(attrName);
3807 if (defaultValue != NULL)
3808 xmlFree(defaultValue);
3809 if (tree != NULL)
3810 xmlFreeEnumeration(tree);
3811 break;
3812 }
3813
3814 GROW;
3815 if (RAW != '>') {
3816 if (!IS_BLANK(CUR)) {
3817 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3819 ctxt->sax->error(ctxt->userData,
3820 "Space required after the attribute default value\n");
3821 ctxt->wellFormed = 0;
3822 ctxt->disableSAX = 1;
3823 if (attrName != NULL)
3824 xmlFree(attrName);
3825 if (defaultValue != NULL)
3826 xmlFree(defaultValue);
3827 if (tree != NULL)
3828 xmlFreeEnumeration(tree);
3829 break;
3830 }
3831 SKIP_BLANKS;
3832 }
3833 if (check == CUR_PTR) {
3834 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3836 ctxt->sax->error(ctxt->userData,
3837 "xmlParseAttributeListDecl: detected internal error\n");
3838 if (attrName != NULL)
3839 xmlFree(attrName);
3840 if (defaultValue != NULL)
3841 xmlFree(defaultValue);
3842 if (tree != NULL)
3843 xmlFreeEnumeration(tree);
3844 break;
3845 }
3846 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3847 (ctxt->sax->attributeDecl != NULL))
3848 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3849 type, def, defaultValue, tree);
3850 if (attrName != NULL)
3851 xmlFree(attrName);
3852 if (defaultValue != NULL)
3853 xmlFree(defaultValue);
3854 GROW;
3855 }
3856 if (RAW == '>') {
3857 if (input != ctxt->input) {
3858 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3860 ctxt->sax->error(ctxt->userData,
3861"Attribute list declaration doesn't start and stop in the same entity\n");
3862 ctxt->wellFormed = 0;
3863 ctxt->disableSAX = 1;
3864 }
3865 NEXT;
3866 }
3867
3868 xmlFree(elemName);
3869 }
3870}
3871
3872/**
3873 * xmlParseElementMixedContentDecl:
3874 * @ctxt: an XML parser context
3875 *
3876 * parse the declaration for a Mixed Element content
3877 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3878 *
3879 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3880 * '(' S? '#PCDATA' S? ')'
3881 *
3882 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3883 *
3884 * [ VC: No Duplicate Types ]
3885 * The same name must not appear more than once in a single
3886 * mixed-content declaration.
3887 *
3888 * returns: the list of the xmlElementContentPtr describing the element choices
3889 */
3890xmlElementContentPtr
3891xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3892 xmlElementContentPtr ret = NULL, cur = NULL, n;
3893 xmlChar *elem = NULL;
3894
3895 GROW;
3896 if ((RAW == '#') && (NXT(1) == 'P') &&
3897 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3898 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3899 (NXT(6) == 'A')) {
3900 SKIP(7);
3901 SKIP_BLANKS;
3902 SHRINK;
3903 if (RAW == ')') {
3904 ctxt->entity = ctxt->input;
3905 NEXT;
3906 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3907 if (RAW == '*') {
3908 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3909 NEXT;
3910 }
3911 return(ret);
3912 }
3913 if ((RAW == '(') || (RAW == '|')) {
3914 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3915 if (ret == NULL) return(NULL);
3916 }
3917 while (RAW == '|') {
3918 NEXT;
3919 if (elem == NULL) {
3920 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3921 if (ret == NULL) return(NULL);
3922 ret->c1 = cur;
3923 cur = ret;
3924 } else {
3925 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3926 if (n == NULL) return(NULL);
3927 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3928 cur->c2 = n;
3929 cur = n;
3930 xmlFree(elem);
3931 }
3932 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003933 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003934 if (elem == NULL) {
3935 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3937 ctxt->sax->error(ctxt->userData,
3938 "xmlParseElementMixedContentDecl : Name expected\n");
3939 ctxt->wellFormed = 0;
3940 ctxt->disableSAX = 1;
3941 xmlFreeElementContent(cur);
3942 return(NULL);
3943 }
3944 SKIP_BLANKS;
3945 GROW;
3946 }
3947 if ((RAW == ')') && (NXT(1) == '*')) {
3948 if (elem != NULL) {
3949 cur->c2 = xmlNewElementContent(elem,
3950 XML_ELEMENT_CONTENT_ELEMENT);
3951 xmlFree(elem);
3952 }
3953 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3954 ctxt->entity = ctxt->input;
3955 SKIP(2);
3956 } else {
3957 if (elem != NULL) xmlFree(elem);
3958 xmlFreeElementContent(ret);
3959 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
3960 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3961 ctxt->sax->error(ctxt->userData,
3962 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
3963 ctxt->wellFormed = 0;
3964 ctxt->disableSAX = 1;
3965 return(NULL);
3966 }
3967
3968 } else {
3969 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
3970 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3971 ctxt->sax->error(ctxt->userData,
3972 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3973 ctxt->wellFormed = 0;
3974 ctxt->disableSAX = 1;
3975 }
3976 return(ret);
3977}
3978
3979/**
3980 * xmlParseElementChildrenContentDecl:
3981 * @ctxt: an XML parser context
3982 *
3983 * parse the declaration for a Mixed Element content
3984 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3985 *
3986 *
3987 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3988 *
3989 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3990 *
3991 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3992 *
3993 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3994 *
3995 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3996 * TODO Parameter-entity replacement text must be properly nested
3997 * with parenthetized groups. That is to say, if either of the
3998 * opening or closing parentheses in a choice, seq, or Mixed
3999 * construct is contained in the replacement text for a parameter
4000 * entity, both must be contained in the same replacement text. For
4001 * interoperability, if a parameter-entity reference appears in a
4002 * choice, seq, or Mixed construct, its replacement text should not
4003 * be empty, and neither the first nor last non-blank character of
4004 * the replacement text should be a connector (| or ,).
4005 *
4006 * returns: the tree of xmlElementContentPtr describing the element
4007 * hierarchy.
4008 */
4009xmlElementContentPtr
4010#ifdef VMS
4011xmlParseElementChildrenContentD
4012#else
4013xmlParseElementChildrenContentDecl
4014#endif
4015(xmlParserCtxtPtr ctxt) {
4016 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4017 xmlChar *elem;
4018 xmlChar type = 0;
4019
4020 SKIP_BLANKS;
4021 GROW;
4022 if (RAW == '(') {
4023 /* Recurse on first child */
4024 NEXT;
4025 SKIP_BLANKS;
4026 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4027 SKIP_BLANKS;
4028 GROW;
4029 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004030 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004031 if (elem == NULL) {
4032 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4034 ctxt->sax->error(ctxt->userData,
4035 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4036 ctxt->wellFormed = 0;
4037 ctxt->disableSAX = 1;
4038 return(NULL);
4039 }
4040 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4041 GROW;
4042 if (RAW == '?') {
4043 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4044 NEXT;
4045 } else if (RAW == '*') {
4046 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4047 NEXT;
4048 } else if (RAW == '+') {
4049 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4050 NEXT;
4051 } else {
4052 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4053 }
4054 xmlFree(elem);
4055 GROW;
4056 }
4057 SKIP_BLANKS;
4058 SHRINK;
4059 while (RAW != ')') {
4060 /*
4061 * Each loop we parse one separator and one element.
4062 */
4063 if (RAW == ',') {
4064 if (type == 0) type = CUR;
4065
4066 /*
4067 * Detect "Name | Name , Name" error
4068 */
4069 else if (type != CUR) {
4070 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4072 ctxt->sax->error(ctxt->userData,
4073 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4074 type);
4075 ctxt->wellFormed = 0;
4076 ctxt->disableSAX = 1;
4077 if ((op != NULL) && (op != ret))
4078 xmlFreeElementContent(op);
4079 if ((last != NULL) && (last != ret) &&
4080 (last != ret->c1) && (last != ret->c2))
4081 xmlFreeElementContent(last);
4082 if (ret != NULL)
4083 xmlFreeElementContent(ret);
4084 return(NULL);
4085 }
4086 NEXT;
4087
4088 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4089 if (op == NULL) {
4090 xmlFreeElementContent(ret);
4091 return(NULL);
4092 }
4093 if (last == NULL) {
4094 op->c1 = ret;
4095 ret = cur = op;
4096 } else {
4097 cur->c2 = op;
4098 op->c1 = last;
4099 cur =op;
4100 last = NULL;
4101 }
4102 } else if (RAW == '|') {
4103 if (type == 0) type = CUR;
4104
4105 /*
4106 * Detect "Name , Name | Name" error
4107 */
4108 else if (type != CUR) {
4109 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4110 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4111 ctxt->sax->error(ctxt->userData,
4112 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4113 type);
4114 ctxt->wellFormed = 0;
4115 ctxt->disableSAX = 1;
4116 if ((op != NULL) && (op != ret) && (op != last))
4117 xmlFreeElementContent(op);
4118 if ((last != NULL) && (last != ret) &&
4119 (last != ret->c1) && (last != ret->c2))
4120 xmlFreeElementContent(last);
4121 if (ret != NULL)
4122 xmlFreeElementContent(ret);
4123 return(NULL);
4124 }
4125 NEXT;
4126
4127 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4128 if (op == NULL) {
4129 if ((op != NULL) && (op != ret))
4130 xmlFreeElementContent(op);
4131 if ((last != NULL) && (last != ret) &&
4132 (last != ret->c1) && (last != ret->c2))
4133 xmlFreeElementContent(last);
4134 if (ret != NULL)
4135 xmlFreeElementContent(ret);
4136 return(NULL);
4137 }
4138 if (last == NULL) {
4139 op->c1 = ret;
4140 ret = cur = op;
4141 } else {
4142 cur->c2 = op;
4143 op->c1 = last;
4144 cur =op;
4145 last = NULL;
4146 }
4147 } else {
4148 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4149 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4150 ctxt->sax->error(ctxt->userData,
4151 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4152 ctxt->wellFormed = 0;
4153 ctxt->disableSAX = 1;
4154 if ((op != NULL) && (op != ret))
4155 xmlFreeElementContent(op);
4156 if ((last != NULL) && (last != ret) &&
4157 (last != ret->c1) && (last != ret->c2))
4158 xmlFreeElementContent(last);
4159 if (ret != NULL)
4160 xmlFreeElementContent(ret);
4161 return(NULL);
4162 }
4163 GROW;
4164 SKIP_BLANKS;
4165 GROW;
4166 if (RAW == '(') {
4167 /* Recurse on second child */
4168 NEXT;
4169 SKIP_BLANKS;
4170 last = xmlParseElementChildrenContentDecl(ctxt);
4171 SKIP_BLANKS;
4172 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004173 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004174 if (elem == NULL) {
4175 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4177 ctxt->sax->error(ctxt->userData,
4178 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4179 ctxt->wellFormed = 0;
4180 ctxt->disableSAX = 1;
4181 if ((op != NULL) && (op != ret))
4182 xmlFreeElementContent(op);
4183 if ((last != NULL) && (last != ret) &&
4184 (last != ret->c1) && (last != ret->c2))
4185 xmlFreeElementContent(last);
4186 if (ret != NULL)
4187 xmlFreeElementContent(ret);
4188 return(NULL);
4189 }
4190 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4191 xmlFree(elem);
4192 if (RAW == '?') {
4193 last->ocur = XML_ELEMENT_CONTENT_OPT;
4194 NEXT;
4195 } else if (RAW == '*') {
4196 last->ocur = XML_ELEMENT_CONTENT_MULT;
4197 NEXT;
4198 } else if (RAW == '+') {
4199 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4200 NEXT;
4201 } else {
4202 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4203 }
4204 }
4205 SKIP_BLANKS;
4206 GROW;
4207 }
4208 if ((cur != NULL) && (last != NULL)) {
4209 cur->c2 = last;
4210 }
4211 ctxt->entity = ctxt->input;
4212 NEXT;
4213 if (RAW == '?') {
4214 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4215 NEXT;
4216 } else if (RAW == '*') {
4217 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4218 NEXT;
4219 } else if (RAW == '+') {
4220 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4221 NEXT;
4222 }
4223 return(ret);
4224}
4225
4226/**
4227 * xmlParseElementContentDecl:
4228 * @ctxt: an XML parser context
4229 * @name: the name of the element being defined.
4230 * @result: the Element Content pointer will be stored here if any
4231 *
4232 * parse the declaration for an Element content either Mixed or Children,
4233 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4234 *
4235 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4236 *
4237 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4238 */
4239
4240int
4241xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4242 xmlElementContentPtr *result) {
4243
4244 xmlElementContentPtr tree = NULL;
4245 xmlParserInputPtr input = ctxt->input;
4246 int res;
4247
4248 *result = NULL;
4249
4250 if (RAW != '(') {
4251 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4253 ctxt->sax->error(ctxt->userData,
4254 "xmlParseElementContentDecl : '(' expected\n");
4255 ctxt->wellFormed = 0;
4256 ctxt->disableSAX = 1;
4257 return(-1);
4258 }
4259 NEXT;
4260 GROW;
4261 SKIP_BLANKS;
4262 if ((RAW == '#') && (NXT(1) == 'P') &&
4263 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4264 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4265 (NXT(6) == 'A')) {
4266 tree = xmlParseElementMixedContentDecl(ctxt);
4267 res = XML_ELEMENT_TYPE_MIXED;
4268 } else {
4269 tree = xmlParseElementChildrenContentDecl(ctxt);
4270 res = XML_ELEMENT_TYPE_ELEMENT;
4271 }
4272 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4273 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4275 ctxt->sax->error(ctxt->userData,
4276"Element content declaration doesn't start and stop in the same entity\n");
4277 ctxt->wellFormed = 0;
4278 ctxt->disableSAX = 1;
4279 }
4280 SKIP_BLANKS;
4281 *result = tree;
4282 return(res);
4283}
4284
4285/**
4286 * xmlParseElementDecl:
4287 * @ctxt: an XML parser context
4288 *
4289 * parse an Element declaration.
4290 *
4291 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4292 *
4293 * [ VC: Unique Element Type Declaration ]
4294 * No element type may be declared more than once
4295 *
4296 * Returns the type of the element, or -1 in case of error
4297 */
4298int
4299xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4300 xmlChar *name;
4301 int ret = -1;
4302 xmlElementContentPtr content = NULL;
4303
4304 GROW;
4305 if ((RAW == '<') && (NXT(1) == '!') &&
4306 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4307 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4308 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4309 (NXT(8) == 'T')) {
4310 xmlParserInputPtr input = ctxt->input;
4311
4312 SKIP(9);
4313 if (!IS_BLANK(CUR)) {
4314 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4316 ctxt->sax->error(ctxt->userData,
4317 "Space required after 'ELEMENT'\n");
4318 ctxt->wellFormed = 0;
4319 ctxt->disableSAX = 1;
4320 }
4321 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00004322 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004323 if (name == NULL) {
4324 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4326 ctxt->sax->error(ctxt->userData,
4327 "xmlParseElementDecl: no name for Element\n");
4328 ctxt->wellFormed = 0;
4329 ctxt->disableSAX = 1;
4330 return(-1);
4331 }
4332 while ((RAW == 0) && (ctxt->inputNr > 1))
4333 xmlPopInput(ctxt);
4334 if (!IS_BLANK(CUR)) {
4335 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4337 ctxt->sax->error(ctxt->userData,
4338 "Space required after the element name\n");
4339 ctxt->wellFormed = 0;
4340 ctxt->disableSAX = 1;
4341 }
4342 SKIP_BLANKS;
4343 if ((RAW == 'E') && (NXT(1) == 'M') &&
4344 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4345 (NXT(4) == 'Y')) {
4346 SKIP(5);
4347 /*
4348 * Element must always be empty.
4349 */
4350 ret = XML_ELEMENT_TYPE_EMPTY;
4351 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4352 (NXT(2) == 'Y')) {
4353 SKIP(3);
4354 /*
4355 * Element is a generic container.
4356 */
4357 ret = XML_ELEMENT_TYPE_ANY;
4358 } else if (RAW == '(') {
4359 ret = xmlParseElementContentDecl(ctxt, name, &content);
4360 } else {
4361 /*
4362 * [ WFC: PEs in Internal Subset ] error handling.
4363 */
4364 if ((RAW == '%') && (ctxt->external == 0) &&
4365 (ctxt->inputNr == 1)) {
4366 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4367 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4368 ctxt->sax->error(ctxt->userData,
4369 "PEReference: forbidden within markup decl in internal subset\n");
4370 } else {
4371 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4373 ctxt->sax->error(ctxt->userData,
4374 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4375 }
4376 ctxt->wellFormed = 0;
4377 ctxt->disableSAX = 1;
4378 if (name != NULL) xmlFree(name);
4379 return(-1);
4380 }
4381
4382 SKIP_BLANKS;
4383 /*
4384 * Pop-up of finished entities.
4385 */
4386 while ((RAW == 0) && (ctxt->inputNr > 1))
4387 xmlPopInput(ctxt);
4388 SKIP_BLANKS;
4389
4390 if (RAW != '>') {
4391 ctxt->errNo = XML_ERR_GT_REQUIRED;
4392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4393 ctxt->sax->error(ctxt->userData,
4394 "xmlParseElementDecl: expected '>' at the end\n");
4395 ctxt->wellFormed = 0;
4396 ctxt->disableSAX = 1;
4397 } else {
4398 if (input != ctxt->input) {
4399 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4401 ctxt->sax->error(ctxt->userData,
4402"Element declaration doesn't start and stop in the same entity\n");
4403 ctxt->wellFormed = 0;
4404 ctxt->disableSAX = 1;
4405 }
4406
4407 NEXT;
4408 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4409 (ctxt->sax->elementDecl != NULL))
4410 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4411 content);
4412 }
4413 if (content != NULL) {
4414 xmlFreeElementContent(content);
4415 }
4416 if (name != NULL) {
4417 xmlFree(name);
4418 }
4419 }
4420 return(ret);
4421}
4422
4423/**
4424 * xmlParseMarkupDecl:
4425 * @ctxt: an XML parser context
4426 *
4427 * parse Markup declarations
4428 *
4429 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4430 * NotationDecl | PI | Comment
4431 *
4432 * [ VC: Proper Declaration/PE Nesting ]
4433 * Parameter-entity replacement text must be properly nested with
4434 * markup declarations. That is to say, if either the first character
4435 * or the last character of a markup declaration (markupdecl above) is
4436 * contained in the replacement text for a parameter-entity reference,
4437 * both must be contained in the same replacement text.
4438 *
4439 * [ WFC: PEs in Internal Subset ]
4440 * In the internal DTD subset, parameter-entity references can occur
4441 * only where markup declarations can occur, not within markup declarations.
4442 * (This does not apply to references that occur in external parameter
4443 * entities or to the external subset.)
4444 */
4445void
4446xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4447 GROW;
4448 xmlParseElementDecl(ctxt);
4449 xmlParseAttributeListDecl(ctxt);
4450 xmlParseEntityDecl(ctxt);
4451 xmlParseNotationDecl(ctxt);
4452 xmlParsePI(ctxt);
4453 xmlParseComment(ctxt);
4454 /*
4455 * This is only for internal subset. On external entities,
4456 * the replacement is done before parsing stage
4457 */
4458 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4459 xmlParsePEReference(ctxt);
4460 ctxt->instate = XML_PARSER_DTD;
4461}
4462
4463/**
4464 * xmlParseTextDecl:
4465 * @ctxt: an XML parser context
4466 *
4467 * parse an XML declaration header for external entities
4468 *
4469 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4470 *
4471 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4472 */
4473
4474void
4475xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4476 xmlChar *version;
4477
4478 /*
4479 * We know that '<?xml' is here.
4480 */
4481 if ((RAW == '<') && (NXT(1) == '?') &&
4482 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4483 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4484 SKIP(5);
4485 } else {
4486 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4487 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4488 ctxt->sax->error(ctxt->userData,
4489 "Text declaration '<?xml' required\n");
4490 ctxt->wellFormed = 0;
4491 ctxt->disableSAX = 1;
4492
4493 return;
4494 }
4495
4496 if (!IS_BLANK(CUR)) {
4497 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4499 ctxt->sax->error(ctxt->userData,
4500 "Space needed after '<?xml'\n");
4501 ctxt->wellFormed = 0;
4502 ctxt->disableSAX = 1;
4503 }
4504 SKIP_BLANKS;
4505
4506 /*
4507 * We may have the VersionInfo here.
4508 */
4509 version = xmlParseVersionInfo(ctxt);
4510 if (version == NULL)
4511 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4512 ctxt->input->version = version;
4513
4514 /*
4515 * We must have the encoding declaration
4516 */
4517 if (!IS_BLANK(CUR)) {
4518 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4520 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4521 ctxt->wellFormed = 0;
4522 ctxt->disableSAX = 1;
4523 }
4524 xmlParseEncodingDecl(ctxt);
4525 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4526 /*
4527 * The XML REC instructs us to stop parsing right here
4528 */
4529 return;
4530 }
4531
4532 SKIP_BLANKS;
4533 if ((RAW == '?') && (NXT(1) == '>')) {
4534 SKIP(2);
4535 } else if (RAW == '>') {
4536 /* Deprecated old WD ... */
4537 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4539 ctxt->sax->error(ctxt->userData,
4540 "XML declaration must end-up with '?>'\n");
4541 ctxt->wellFormed = 0;
4542 ctxt->disableSAX = 1;
4543 NEXT;
4544 } else {
4545 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4547 ctxt->sax->error(ctxt->userData,
4548 "parsing XML declaration: '?>' expected\n");
4549 ctxt->wellFormed = 0;
4550 ctxt->disableSAX = 1;
4551 MOVETO_ENDTAG(CUR_PTR);
4552 NEXT;
4553 }
4554}
4555
4556/*
4557 * xmlParseConditionalSections
4558 * @ctxt: an XML parser context
4559 *
4560 * [61] conditionalSect ::= includeSect | ignoreSect
4561 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4562 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4563 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4564 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4565 */
4566
4567void
4568xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4569 SKIP(3);
4570 SKIP_BLANKS;
4571 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4572 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4573 (NXT(6) == 'E')) {
4574 SKIP(7);
4575 SKIP_BLANKS;
4576 if (RAW != '[') {
4577 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4578 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4579 ctxt->sax->error(ctxt->userData,
4580 "XML conditional section '[' expected\n");
4581 ctxt->wellFormed = 0;
4582 ctxt->disableSAX = 1;
4583 } else {
4584 NEXT;
4585 }
4586 if (xmlParserDebugEntities) {
4587 if ((ctxt->input != NULL) && (ctxt->input->filename))
4588 xmlGenericError(xmlGenericErrorContext,
4589 "%s(%d): ", ctxt->input->filename,
4590 ctxt->input->line);
4591 xmlGenericError(xmlGenericErrorContext,
4592 "Entering INCLUDE Conditional Section\n");
4593 }
4594
4595 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4596 (NXT(2) != '>'))) {
4597 const xmlChar *check = CUR_PTR;
4598 int cons = ctxt->input->consumed;
4599 int tok = ctxt->token;
4600
4601 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4602 xmlParseConditionalSections(ctxt);
4603 } else if (IS_BLANK(CUR)) {
4604 NEXT;
4605 } else if (RAW == '%') {
4606 xmlParsePEReference(ctxt);
4607 } else
4608 xmlParseMarkupDecl(ctxt);
4609
4610 /*
4611 * Pop-up of finished entities.
4612 */
4613 while ((RAW == 0) && (ctxt->inputNr > 1))
4614 xmlPopInput(ctxt);
4615
4616 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4617 (tok == ctxt->token)) {
4618 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4620 ctxt->sax->error(ctxt->userData,
4621 "Content error in the external subset\n");
4622 ctxt->wellFormed = 0;
4623 ctxt->disableSAX = 1;
4624 break;
4625 }
4626 }
4627 if (xmlParserDebugEntities) {
4628 if ((ctxt->input != NULL) && (ctxt->input->filename))
4629 xmlGenericError(xmlGenericErrorContext,
4630 "%s(%d): ", ctxt->input->filename,
4631 ctxt->input->line);
4632 xmlGenericError(xmlGenericErrorContext,
4633 "Leaving INCLUDE Conditional Section\n");
4634 }
4635
4636 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4637 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4638 int state;
4639 int instate;
4640 int depth = 0;
4641
4642 SKIP(6);
4643 SKIP_BLANKS;
4644 if (RAW != '[') {
4645 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4647 ctxt->sax->error(ctxt->userData,
4648 "XML conditional section '[' expected\n");
4649 ctxt->wellFormed = 0;
4650 ctxt->disableSAX = 1;
4651 } else {
4652 NEXT;
4653 }
4654 if (xmlParserDebugEntities) {
4655 if ((ctxt->input != NULL) && (ctxt->input->filename))
4656 xmlGenericError(xmlGenericErrorContext,
4657 "%s(%d): ", ctxt->input->filename,
4658 ctxt->input->line);
4659 xmlGenericError(xmlGenericErrorContext,
4660 "Entering IGNORE Conditional Section\n");
4661 }
4662
4663 /*
4664 * Parse up to the end of the conditionnal section
4665 * But disable SAX event generating DTD building in the meantime
4666 */
4667 state = ctxt->disableSAX;
4668 instate = ctxt->instate;
4669 ctxt->disableSAX = 1;
4670 ctxt->instate = XML_PARSER_IGNORE;
4671
4672 while (depth >= 0) {
4673 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4674 depth++;
4675 SKIP(3);
4676 continue;
4677 }
4678 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4679 if (--depth >= 0) SKIP(3);
4680 continue;
4681 }
4682 NEXT;
4683 continue;
4684 }
4685
4686 ctxt->disableSAX = state;
4687 ctxt->instate = instate;
4688
4689 if (xmlParserDebugEntities) {
4690 if ((ctxt->input != NULL) && (ctxt->input->filename))
4691 xmlGenericError(xmlGenericErrorContext,
4692 "%s(%d): ", ctxt->input->filename,
4693 ctxt->input->line);
4694 xmlGenericError(xmlGenericErrorContext,
4695 "Leaving IGNORE Conditional Section\n");
4696 }
4697
4698 } else {
4699 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4701 ctxt->sax->error(ctxt->userData,
4702 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4703 ctxt->wellFormed = 0;
4704 ctxt->disableSAX = 1;
4705 }
4706
4707 if (RAW == 0)
4708 SHRINK;
4709
4710 if (RAW == 0) {
4711 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4713 ctxt->sax->error(ctxt->userData,
4714 "XML conditional section not closed\n");
4715 ctxt->wellFormed = 0;
4716 ctxt->disableSAX = 1;
4717 } else {
4718 SKIP(3);
4719 }
4720}
4721
4722/**
4723 * xmlParseExternalSubset:
4724 * @ctxt: an XML parser context
4725 * @ExternalID: the external identifier
4726 * @SystemID: the system identifier (or URL)
4727 *
4728 * parse Markup declarations from an external subset
4729 *
4730 * [30] extSubset ::= textDecl? extSubsetDecl
4731 *
4732 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4733 */
4734void
4735xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4736 const xmlChar *SystemID) {
4737 GROW;
4738 if ((RAW == '<') && (NXT(1) == '?') &&
4739 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4740 (NXT(4) == 'l')) {
4741 xmlParseTextDecl(ctxt);
4742 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4743 /*
4744 * The XML REC instructs us to stop parsing right here
4745 */
4746 ctxt->instate = XML_PARSER_EOF;
4747 return;
4748 }
4749 }
4750 if (ctxt->myDoc == NULL) {
4751 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4752 }
4753 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4754 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4755
4756 ctxt->instate = XML_PARSER_DTD;
4757 ctxt->external = 1;
4758 while (((RAW == '<') && (NXT(1) == '?')) ||
4759 ((RAW == '<') && (NXT(1) == '!')) ||
4760 IS_BLANK(CUR)) {
4761 const xmlChar *check = CUR_PTR;
4762 int cons = ctxt->input->consumed;
4763 int tok = ctxt->token;
4764
4765 GROW;
4766 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4767 xmlParseConditionalSections(ctxt);
4768 } else if (IS_BLANK(CUR)) {
4769 NEXT;
4770 } else if (RAW == '%') {
4771 xmlParsePEReference(ctxt);
4772 } else
4773 xmlParseMarkupDecl(ctxt);
4774
4775 /*
4776 * Pop-up of finished entities.
4777 */
4778 while ((RAW == 0) && (ctxt->inputNr > 1))
4779 xmlPopInput(ctxt);
4780
4781 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4782 (tok == ctxt->token)) {
4783 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4785 ctxt->sax->error(ctxt->userData,
4786 "Content error in the external subset\n");
4787 ctxt->wellFormed = 0;
4788 ctxt->disableSAX = 1;
4789 break;
4790 }
4791 }
4792
4793 if (RAW != 0) {
4794 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4796 ctxt->sax->error(ctxt->userData,
4797 "Extra content at the end of the document\n");
4798 ctxt->wellFormed = 0;
4799 ctxt->disableSAX = 1;
4800 }
4801
4802}
4803
4804/**
4805 * xmlParseReference:
4806 * @ctxt: an XML parser context
4807 *
4808 * parse and handle entity references in content, depending on the SAX
4809 * interface, this may end-up in a call to character() if this is a
4810 * CharRef, a predefined entity, if there is no reference() callback.
4811 * or if the parser was asked to switch to that mode.
4812 *
4813 * [67] Reference ::= EntityRef | CharRef
4814 */
4815void
4816xmlParseReference(xmlParserCtxtPtr ctxt) {
4817 xmlEntityPtr ent;
4818 xmlChar *val;
4819 if (RAW != '&') return;
4820
4821 if (NXT(1) == '#') {
4822 int i = 0;
4823 xmlChar out[10];
4824 int hex = NXT(2);
4825 int val = xmlParseCharRef(ctxt);
4826
4827 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4828 /*
4829 * So we are using non-UTF-8 buffers
4830 * Check that the char fit on 8bits, if not
4831 * generate a CharRef.
4832 */
4833 if (val <= 0xFF) {
4834 out[0] = val;
4835 out[1] = 0;
4836 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4837 (!ctxt->disableSAX))
4838 ctxt->sax->characters(ctxt->userData, out, 1);
4839 } else {
4840 if ((hex == 'x') || (hex == 'X'))
4841 sprintf((char *)out, "#x%X", val);
4842 else
4843 sprintf((char *)out, "#%d", val);
4844 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4845 (!ctxt->disableSAX))
4846 ctxt->sax->reference(ctxt->userData, out);
4847 }
4848 } else {
4849 /*
4850 * Just encode the value in UTF-8
4851 */
4852 COPY_BUF(0 ,out, i, val);
4853 out[i] = 0;
4854 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4855 (!ctxt->disableSAX))
4856 ctxt->sax->characters(ctxt->userData, out, i);
4857 }
4858 } else {
4859 ent = xmlParseEntityRef(ctxt);
4860 if (ent == NULL) return;
4861 if ((ent->name != NULL) &&
4862 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4863 xmlNodePtr list = NULL;
4864 int ret;
4865
4866
4867 /*
4868 * The first reference to the entity trigger a parsing phase
4869 * where the ent->children is filled with the result from
4870 * the parsing.
4871 */
4872 if (ent->children == NULL) {
4873 xmlChar *value;
4874 value = ent->content;
4875
4876 /*
4877 * Check that this entity is well formed
4878 */
4879 if ((value != NULL) &&
4880 (value[1] == 0) && (value[0] == '<') &&
4881 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4882 /*
4883 * DONE: get definite answer on this !!!
4884 * Lots of entity decls are used to declare a single
4885 * char
4886 * <!ENTITY lt "<">
4887 * Which seems to be valid since
4888 * 2.4: The ampersand character (&) and the left angle
4889 * bracket (<) may appear in their literal form only
4890 * when used ... They are also legal within the literal
4891 * entity value of an internal entity declaration;i
4892 * see "4.3.2 Well-Formed Parsed Entities".
4893 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4894 * Looking at the OASIS test suite and James Clark
4895 * tests, this is broken. However the XML REC uses
4896 * it. Is the XML REC not well-formed ????
4897 * This is a hack to avoid this problem
4898 *
4899 * ANSWER: since lt gt amp .. are already defined,
4900 * this is a redefinition and hence the fact that the
4901 * contentis not well balanced is not a Wf error, this
4902 * is lousy but acceptable.
4903 */
4904 list = xmlNewDocText(ctxt->myDoc, value);
4905 if (list != NULL) {
4906 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4907 (ent->children == NULL)) {
4908 ent->children = list;
4909 ent->last = list;
4910 list->parent = (xmlNodePtr) ent;
4911 } else {
4912 xmlFreeNodeList(list);
4913 }
4914 } else if (list != NULL) {
4915 xmlFreeNodeList(list);
4916 }
4917 } else {
4918 /*
4919 * 4.3.2: An internal general parsed entity is well-formed
4920 * if its replacement text matches the production labeled
4921 * content.
4922 */
4923 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4924 ctxt->depth++;
4925 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
4926 ctxt->sax, NULL, ctxt->depth,
4927 value, &list);
4928 ctxt->depth--;
4929 } else if (ent->etype ==
4930 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4931 ctxt->depth++;
4932 ret = xmlParseExternalEntity(ctxt->myDoc,
4933 ctxt->sax, NULL, ctxt->depth,
4934 ent->URI, ent->ExternalID, &list);
4935 ctxt->depth--;
4936 } else {
4937 ret = -1;
4938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4939 ctxt->sax->error(ctxt->userData,
4940 "Internal: invalid entity type\n");
4941 }
4942 if (ret == XML_ERR_ENTITY_LOOP) {
4943 ctxt->errNo = XML_ERR_ENTITY_LOOP;
4944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4945 ctxt->sax->error(ctxt->userData,
4946 "Detected entity reference loop\n");
4947 ctxt->wellFormed = 0;
4948 ctxt->disableSAX = 1;
4949 } else if ((ret == 0) && (list != NULL)) {
4950 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4951 (ent->children == NULL)) {
4952 ent->children = list;
4953 while (list != NULL) {
4954 list->parent = (xmlNodePtr) ent;
4955 if (list->next == NULL)
4956 ent->last = list;
4957 list = list->next;
4958 }
4959 } else {
4960 xmlFreeNodeList(list);
4961 }
4962 } else if (ret > 0) {
4963 ctxt->errNo = ret;
4964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4965 ctxt->sax->error(ctxt->userData,
4966 "Entity value required\n");
4967 ctxt->wellFormed = 0;
4968 ctxt->disableSAX = 1;
4969 } else if (list != NULL) {
4970 xmlFreeNodeList(list);
4971 }
4972 }
4973 }
4974 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4975 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
4976 /*
4977 * Create a node.
4978 */
4979 ctxt->sax->reference(ctxt->userData, ent->name);
4980 return;
4981 } else if (ctxt->replaceEntities) {
4982 if ((ctxt->node != NULL) && (ent->children != NULL)) {
4983 /*
4984 * Seems we are generating the DOM content, do
4985 * a simple tree copy
4986 */
4987 xmlNodePtr new;
4988 new = xmlCopyNodeList(ent->children);
4989
4990 xmlAddChildList(ctxt->node, new);
4991 /*
4992 * This is to avoid a nasty side effect, see
4993 * characters() in SAX.c
4994 */
4995 ctxt->nodemem = 0;
4996 ctxt->nodelen = 0;
4997 return;
4998 } else {
4999 /*
5000 * Probably running in SAX mode
5001 */
5002 xmlParserInputPtr input;
5003
5004 input = xmlNewEntityInputStream(ctxt, ent);
5005 xmlPushInput(ctxt, input);
5006 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5007 (RAW == '<') && (NXT(1) == '?') &&
5008 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5009 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5010 xmlParseTextDecl(ctxt);
5011 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5012 /*
5013 * The XML REC instructs us to stop parsing right here
5014 */
5015 ctxt->instate = XML_PARSER_EOF;
5016 return;
5017 }
5018 if (input->standalone == 1) {
5019 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5021 ctxt->sax->error(ctxt->userData,
5022 "external parsed entities cannot be standalone\n");
5023 ctxt->wellFormed = 0;
5024 ctxt->disableSAX = 1;
5025 }
5026 }
5027 return;
5028 }
5029 }
5030 } else {
5031 val = ent->content;
5032 if (val == NULL) return;
5033 /*
5034 * inline the entity.
5035 */
5036 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5037 (!ctxt->disableSAX))
5038 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5039 }
5040 }
5041}
5042
5043/**
5044 * xmlParseEntityRef:
5045 * @ctxt: an XML parser context
5046 *
5047 * parse ENTITY references declarations
5048 *
5049 * [68] EntityRef ::= '&' Name ';'
5050 *
5051 * [ WFC: Entity Declared ]
5052 * In a document without any DTD, a document with only an internal DTD
5053 * subset which contains no parameter entity references, or a document
5054 * with "standalone='yes'", the Name given in the entity reference
5055 * must match that in an entity declaration, except that well-formed
5056 * documents need not declare any of the following entities: amp, lt,
5057 * gt, apos, quot. The declaration of a parameter entity must precede
5058 * any reference to it. Similarly, the declaration of a general entity
5059 * must precede any reference to it which appears in a default value in an
5060 * attribute-list declaration. Note that if entities are declared in the
5061 * external subset or in external parameter entities, a non-validating
5062 * processor is not obligated to read and process their declarations;
5063 * for such documents, the rule that an entity must be declared is a
5064 * well-formedness constraint only if standalone='yes'.
5065 *
5066 * [ WFC: Parsed Entity ]
5067 * An entity reference must not contain the name of an unparsed entity
5068 *
5069 * Returns the xmlEntityPtr if found, or NULL otherwise.
5070 */
5071xmlEntityPtr
5072xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5073 xmlChar *name;
5074 xmlEntityPtr ent = NULL;
5075
5076 GROW;
5077
5078 if (RAW == '&') {
5079 NEXT;
5080 name = xmlParseName(ctxt);
5081 if (name == NULL) {
5082 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5084 ctxt->sax->error(ctxt->userData,
5085 "xmlParseEntityRef: no name\n");
5086 ctxt->wellFormed = 0;
5087 ctxt->disableSAX = 1;
5088 } else {
5089 if (RAW == ';') {
5090 NEXT;
5091 /*
5092 * Ask first SAX for entity resolution, otherwise try the
5093 * predefined set.
5094 */
5095 if (ctxt->sax != NULL) {
5096 if (ctxt->sax->getEntity != NULL)
5097 ent = ctxt->sax->getEntity(ctxt->userData, name);
5098 if (ent == NULL)
5099 ent = xmlGetPredefinedEntity(name);
5100 }
5101 /*
5102 * [ WFC: Entity Declared ]
5103 * In a document without any DTD, a document with only an
5104 * internal DTD subset which contains no parameter entity
5105 * references, or a document with "standalone='yes'", the
5106 * Name given in the entity reference must match that in an
5107 * entity declaration, except that well-formed documents
5108 * need not declare any of the following entities: amp, lt,
5109 * gt, apos, quot.
5110 * The declaration of a parameter entity must precede any
5111 * reference to it.
5112 * Similarly, the declaration of a general entity must
5113 * precede any reference to it which appears in a default
5114 * value in an attribute-list declaration. Note that if
5115 * entities are declared in the external subset or in
5116 * external parameter entities, a non-validating processor
5117 * is not obligated to read and process their declarations;
5118 * for such documents, the rule that an entity must be
5119 * declared is a well-formedness constraint only if
5120 * standalone='yes'.
5121 */
5122 if (ent == NULL) {
5123 if ((ctxt->standalone == 1) ||
5124 ((ctxt->hasExternalSubset == 0) &&
5125 (ctxt->hasPErefs == 0))) {
5126 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5128 ctxt->sax->error(ctxt->userData,
5129 "Entity '%s' not defined\n", name);
5130 ctxt->wellFormed = 0;
5131 ctxt->disableSAX = 1;
5132 } else {
5133 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5134 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5135 ctxt->sax->warning(ctxt->userData,
5136 "Entity '%s' not defined\n", name);
5137 }
5138 }
5139
5140 /*
5141 * [ WFC: Parsed Entity ]
5142 * An entity reference must not contain the name of an
5143 * unparsed entity
5144 */
5145 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5146 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5147 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5148 ctxt->sax->error(ctxt->userData,
5149 "Entity reference to unparsed entity %s\n", name);
5150 ctxt->wellFormed = 0;
5151 ctxt->disableSAX = 1;
5152 }
5153
5154 /*
5155 * [ WFC: No External Entity References ]
5156 * Attribute values cannot contain direct or indirect
5157 * entity references to external entities.
5158 */
5159 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5160 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5161 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5163 ctxt->sax->error(ctxt->userData,
5164 "Attribute references external entity '%s'\n", name);
5165 ctxt->wellFormed = 0;
5166 ctxt->disableSAX = 1;
5167 }
5168 /*
5169 * [ WFC: No < in Attribute Values ]
5170 * The replacement text of any entity referred to directly or
5171 * indirectly in an attribute value (other than "&lt;") must
5172 * not contain a <.
5173 */
5174 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5175 (ent != NULL) &&
5176 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5177 (ent->content != NULL) &&
5178 (xmlStrchr(ent->content, '<'))) {
5179 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5181 ctxt->sax->error(ctxt->userData,
5182 "'<' in entity '%s' is not allowed in attributes values\n", name);
5183 ctxt->wellFormed = 0;
5184 ctxt->disableSAX = 1;
5185 }
5186
5187 /*
5188 * Internal check, no parameter entities here ...
5189 */
5190 else {
5191 switch (ent->etype) {
5192 case XML_INTERNAL_PARAMETER_ENTITY:
5193 case XML_EXTERNAL_PARAMETER_ENTITY:
5194 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5196 ctxt->sax->error(ctxt->userData,
5197 "Attempt to reference the parameter entity '%s'\n", name);
5198 ctxt->wellFormed = 0;
5199 ctxt->disableSAX = 1;
5200 break;
5201 default:
5202 break;
5203 }
5204 }
5205
5206 /*
5207 * [ WFC: No Recursion ]
5208 * A parsed entity must not contain a recursive reference
5209 * to itself, either directly or indirectly.
5210 * Done somewhere else
5211 */
5212
5213 } else {
5214 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5216 ctxt->sax->error(ctxt->userData,
5217 "xmlParseEntityRef: expecting ';'\n");
5218 ctxt->wellFormed = 0;
5219 ctxt->disableSAX = 1;
5220 }
5221 xmlFree(name);
5222 }
5223 }
5224 return(ent);
5225}
5226
5227/**
5228 * xmlParseStringEntityRef:
5229 * @ctxt: an XML parser context
5230 * @str: a pointer to an index in the string
5231 *
5232 * parse ENTITY references declarations, but this version parses it from
5233 * a string value.
5234 *
5235 * [68] EntityRef ::= '&' Name ';'
5236 *
5237 * [ WFC: Entity Declared ]
5238 * In a document without any DTD, a document with only an internal DTD
5239 * subset which contains no parameter entity references, or a document
5240 * with "standalone='yes'", the Name given in the entity reference
5241 * must match that in an entity declaration, except that well-formed
5242 * documents need not declare any of the following entities: amp, lt,
5243 * gt, apos, quot. The declaration of a parameter entity must precede
5244 * any reference to it. Similarly, the declaration of a general entity
5245 * must precede any reference to it which appears in a default value in an
5246 * attribute-list declaration. Note that if entities are declared in the
5247 * external subset or in external parameter entities, a non-validating
5248 * processor is not obligated to read and process their declarations;
5249 * for such documents, the rule that an entity must be declared is a
5250 * well-formedness constraint only if standalone='yes'.
5251 *
5252 * [ WFC: Parsed Entity ]
5253 * An entity reference must not contain the name of an unparsed entity
5254 *
5255 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5256 * is updated to the current location in the string.
5257 */
5258xmlEntityPtr
5259xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5260 xmlChar *name;
5261 const xmlChar *ptr;
5262 xmlChar cur;
5263 xmlEntityPtr ent = NULL;
5264
5265 if ((str == NULL) || (*str == NULL))
5266 return(NULL);
5267 ptr = *str;
5268 cur = *ptr;
5269 if (cur == '&') {
5270 ptr++;
5271 cur = *ptr;
5272 name = xmlParseStringName(ctxt, &ptr);
5273 if (name == NULL) {
5274 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5275 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5276 ctxt->sax->error(ctxt->userData,
5277 "xmlParseEntityRef: no name\n");
5278 ctxt->wellFormed = 0;
5279 ctxt->disableSAX = 1;
5280 } else {
5281 if (*ptr == ';') {
5282 ptr++;
5283 /*
5284 * Ask first SAX for entity resolution, otherwise try the
5285 * predefined set.
5286 */
5287 if (ctxt->sax != NULL) {
5288 if (ctxt->sax->getEntity != NULL)
5289 ent = ctxt->sax->getEntity(ctxt->userData, name);
5290 if (ent == NULL)
5291 ent = xmlGetPredefinedEntity(name);
5292 }
5293 /*
5294 * [ WFC: Entity Declared ]
5295 * In a document without any DTD, a document with only an
5296 * internal DTD subset which contains no parameter entity
5297 * references, or a document with "standalone='yes'", the
5298 * Name given in the entity reference must match that in an
5299 * entity declaration, except that well-formed documents
5300 * need not declare any of the following entities: amp, lt,
5301 * gt, apos, quot.
5302 * The declaration of a parameter entity must precede any
5303 * reference to it.
5304 * Similarly, the declaration of a general entity must
5305 * precede any reference to it which appears in a default
5306 * value in an attribute-list declaration. Note that if
5307 * entities are declared in the external subset or in
5308 * external parameter entities, a non-validating processor
5309 * is not obligated to read and process their declarations;
5310 * for such documents, the rule that an entity must be
5311 * declared is a well-formedness constraint only if
5312 * standalone='yes'.
5313 */
5314 if (ent == NULL) {
5315 if ((ctxt->standalone == 1) ||
5316 ((ctxt->hasExternalSubset == 0) &&
5317 (ctxt->hasPErefs == 0))) {
5318 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5320 ctxt->sax->error(ctxt->userData,
5321 "Entity '%s' not defined\n", name);
5322 ctxt->wellFormed = 0;
5323 ctxt->disableSAX = 1;
5324 } else {
5325 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5326 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5327 ctxt->sax->warning(ctxt->userData,
5328 "Entity '%s' not defined\n", name);
5329 }
5330 }
5331
5332 /*
5333 * [ WFC: Parsed Entity ]
5334 * An entity reference must not contain the name of an
5335 * unparsed entity
5336 */
5337 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5338 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5340 ctxt->sax->error(ctxt->userData,
5341 "Entity reference to unparsed entity %s\n", name);
5342 ctxt->wellFormed = 0;
5343 ctxt->disableSAX = 1;
5344 }
5345
5346 /*
5347 * [ WFC: No External Entity References ]
5348 * Attribute values cannot contain direct or indirect
5349 * entity references to external entities.
5350 */
5351 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5352 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5353 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5355 ctxt->sax->error(ctxt->userData,
5356 "Attribute references external entity '%s'\n", name);
5357 ctxt->wellFormed = 0;
5358 ctxt->disableSAX = 1;
5359 }
5360 /*
5361 * [ WFC: No < in Attribute Values ]
5362 * The replacement text of any entity referred to directly or
5363 * indirectly in an attribute value (other than "&lt;") must
5364 * not contain a <.
5365 */
5366 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5367 (ent != NULL) &&
5368 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5369 (ent->content != NULL) &&
5370 (xmlStrchr(ent->content, '<'))) {
5371 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5373 ctxt->sax->error(ctxt->userData,
5374 "'<' in entity '%s' is not allowed in attributes values\n", name);
5375 ctxt->wellFormed = 0;
5376 ctxt->disableSAX = 1;
5377 }
5378
5379 /*
5380 * Internal check, no parameter entities here ...
5381 */
5382 else {
5383 switch (ent->etype) {
5384 case XML_INTERNAL_PARAMETER_ENTITY:
5385 case XML_EXTERNAL_PARAMETER_ENTITY:
5386 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5388 ctxt->sax->error(ctxt->userData,
5389 "Attempt to reference the parameter entity '%s'\n", name);
5390 ctxt->wellFormed = 0;
5391 ctxt->disableSAX = 1;
5392 break;
5393 default:
5394 break;
5395 }
5396 }
5397
5398 /*
5399 * [ WFC: No Recursion ]
5400 * A parsed entity must not contain a recursive reference
5401 * to itself, either directly or indirectly.
5402 * Done somewhwere else
5403 */
5404
5405 } else {
5406 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5408 ctxt->sax->error(ctxt->userData,
5409 "xmlParseEntityRef: expecting ';'\n");
5410 ctxt->wellFormed = 0;
5411 ctxt->disableSAX = 1;
5412 }
5413 xmlFree(name);
5414 }
5415 }
5416 *str = ptr;
5417 return(ent);
5418}
5419
5420/**
5421 * xmlParsePEReference:
5422 * @ctxt: an XML parser context
5423 *
5424 * parse PEReference declarations
5425 * The entity content is handled directly by pushing it's content as
5426 * a new input stream.
5427 *
5428 * [69] PEReference ::= '%' Name ';'
5429 *
5430 * [ WFC: No Recursion ]
5431 * A parsed entity must not contain a recursive
5432 * reference to itself, either directly or indirectly.
5433 *
5434 * [ WFC: Entity Declared ]
5435 * In a document without any DTD, a document with only an internal DTD
5436 * subset which contains no parameter entity references, or a document
5437 * with "standalone='yes'", ... ... The declaration of a parameter
5438 * entity must precede any reference to it...
5439 *
5440 * [ VC: Entity Declared ]
5441 * In a document with an external subset or external parameter entities
5442 * with "standalone='no'", ... ... The declaration of a parameter entity
5443 * must precede any reference to it...
5444 *
5445 * [ WFC: In DTD ]
5446 * Parameter-entity references may only appear in the DTD.
5447 * NOTE: misleading but this is handled.
5448 */
5449void
5450xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5451 xmlChar *name;
5452 xmlEntityPtr entity = NULL;
5453 xmlParserInputPtr input;
5454
5455 if (RAW == '%') {
5456 NEXT;
Daniel Veillard29631a82001-03-05 09:49:20 +00005457 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005458 if (name == NULL) {
5459 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5460 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5461 ctxt->sax->error(ctxt->userData,
5462 "xmlParsePEReference: no name\n");
5463 ctxt->wellFormed = 0;
5464 ctxt->disableSAX = 1;
5465 } else {
5466 if (RAW == ';') {
5467 NEXT;
5468 if ((ctxt->sax != NULL) &&
5469 (ctxt->sax->getParameterEntity != NULL))
5470 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5471 name);
5472 if (entity == NULL) {
5473 /*
5474 * [ WFC: Entity Declared ]
5475 * In a document without any DTD, a document with only an
5476 * internal DTD subset which contains no parameter entity
5477 * references, or a document with "standalone='yes'", ...
5478 * ... The declaration of a parameter entity must precede
5479 * any reference to it...
5480 */
5481 if ((ctxt->standalone == 1) ||
5482 ((ctxt->hasExternalSubset == 0) &&
5483 (ctxt->hasPErefs == 0))) {
5484 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5485 if ((!ctxt->disableSAX) &&
5486 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5487 ctxt->sax->error(ctxt->userData,
5488 "PEReference: %%%s; not found\n", name);
5489 ctxt->wellFormed = 0;
5490 ctxt->disableSAX = 1;
5491 } else {
5492 /*
5493 * [ VC: Entity Declared ]
5494 * In a document with an external subset or external
5495 * parameter entities with "standalone='no'", ...
5496 * ... The declaration of a parameter entity must precede
5497 * any reference to it...
5498 */
5499 if ((!ctxt->disableSAX) &&
5500 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5501 ctxt->sax->warning(ctxt->userData,
5502 "PEReference: %%%s; not found\n", name);
5503 ctxt->valid = 0;
5504 }
5505 } else {
5506 /*
5507 * Internal checking in case the entity quest barfed
5508 */
5509 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5510 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5511 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5512 ctxt->sax->warning(ctxt->userData,
5513 "Internal: %%%s; is not a parameter entity\n", name);
5514 } else {
5515 /*
5516 * TODO !!!
5517 * handle the extra spaces added before and after
5518 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5519 */
5520 input = xmlNewEntityInputStream(ctxt, entity);
5521 xmlPushInput(ctxt, input);
5522 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5523 (RAW == '<') && (NXT(1) == '?') &&
5524 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5525 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5526 xmlParseTextDecl(ctxt);
5527 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5528 /*
5529 * The XML REC instructs us to stop parsing
5530 * right here
5531 */
5532 ctxt->instate = XML_PARSER_EOF;
5533 xmlFree(name);
5534 return;
5535 }
5536 }
5537 if (ctxt->token == 0)
5538 ctxt->token = ' ';
5539 }
5540 }
5541 ctxt->hasPErefs = 1;
5542 } else {
5543 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5545 ctxt->sax->error(ctxt->userData,
5546 "xmlParsePEReference: expecting ';'\n");
5547 ctxt->wellFormed = 0;
5548 ctxt->disableSAX = 1;
5549 }
5550 xmlFree(name);
5551 }
5552 }
5553}
5554
5555/**
5556 * xmlParseStringPEReference:
5557 * @ctxt: an XML parser context
5558 * @str: a pointer to an index in the string
5559 *
5560 * parse PEReference declarations
5561 *
5562 * [69] PEReference ::= '%' Name ';'
5563 *
5564 * [ WFC: No Recursion ]
5565 * A parsed entity must not contain a recursive
5566 * reference to itself, either directly or indirectly.
5567 *
5568 * [ WFC: Entity Declared ]
5569 * In a document without any DTD, a document with only an internal DTD
5570 * subset which contains no parameter entity references, or a document
5571 * with "standalone='yes'", ... ... The declaration of a parameter
5572 * entity must precede any reference to it...
5573 *
5574 * [ VC: Entity Declared ]
5575 * In a document with an external subset or external parameter entities
5576 * with "standalone='no'", ... ... The declaration of a parameter entity
5577 * must precede any reference to it...
5578 *
5579 * [ WFC: In DTD ]
5580 * Parameter-entity references may only appear in the DTD.
5581 * NOTE: misleading but this is handled.
5582 *
5583 * Returns the string of the entity content.
5584 * str is updated to the current value of the index
5585 */
5586xmlEntityPtr
5587xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5588 const xmlChar *ptr;
5589 xmlChar cur;
5590 xmlChar *name;
5591 xmlEntityPtr entity = NULL;
5592
5593 if ((str == NULL) || (*str == NULL)) return(NULL);
5594 ptr = *str;
5595 cur = *ptr;
5596 if (cur == '%') {
5597 ptr++;
5598 cur = *ptr;
5599 name = xmlParseStringName(ctxt, &ptr);
5600 if (name == NULL) {
5601 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5603 ctxt->sax->error(ctxt->userData,
5604 "xmlParseStringPEReference: no name\n");
5605 ctxt->wellFormed = 0;
5606 ctxt->disableSAX = 1;
5607 } else {
5608 cur = *ptr;
5609 if (cur == ';') {
5610 ptr++;
5611 cur = *ptr;
5612 if ((ctxt->sax != NULL) &&
5613 (ctxt->sax->getParameterEntity != NULL))
5614 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5615 name);
5616 if (entity == NULL) {
5617 /*
5618 * [ WFC: Entity Declared ]
5619 * In a document without any DTD, a document with only an
5620 * internal DTD subset which contains no parameter entity
5621 * references, or a document with "standalone='yes'", ...
5622 * ... The declaration of a parameter entity must precede
5623 * any reference to it...
5624 */
5625 if ((ctxt->standalone == 1) ||
5626 ((ctxt->hasExternalSubset == 0) &&
5627 (ctxt->hasPErefs == 0))) {
5628 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5630 ctxt->sax->error(ctxt->userData,
5631 "PEReference: %%%s; not found\n", name);
5632 ctxt->wellFormed = 0;
5633 ctxt->disableSAX = 1;
5634 } else {
5635 /*
5636 * [ VC: Entity Declared ]
5637 * In a document with an external subset or external
5638 * parameter entities with "standalone='no'", ...
5639 * ... The declaration of a parameter entity must
5640 * precede any reference to it...
5641 */
5642 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5643 ctxt->sax->warning(ctxt->userData,
5644 "PEReference: %%%s; not found\n", name);
5645 ctxt->valid = 0;
5646 }
5647 } else {
5648 /*
5649 * Internal checking in case the entity quest barfed
5650 */
5651 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5652 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5653 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5654 ctxt->sax->warning(ctxt->userData,
5655 "Internal: %%%s; is not a parameter entity\n", name);
5656 }
5657 }
5658 ctxt->hasPErefs = 1;
5659 } else {
5660 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5662 ctxt->sax->error(ctxt->userData,
5663 "xmlParseStringPEReference: expecting ';'\n");
5664 ctxt->wellFormed = 0;
5665 ctxt->disableSAX = 1;
5666 }
5667 xmlFree(name);
5668 }
5669 }
5670 *str = ptr;
5671 return(entity);
5672}
5673
5674/**
5675 * xmlParseDocTypeDecl:
5676 * @ctxt: an XML parser context
5677 *
5678 * parse a DOCTYPE declaration
5679 *
5680 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5681 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5682 *
5683 * [ VC: Root Element Type ]
5684 * The Name in the document type declaration must match the element
5685 * type of the root element.
5686 */
5687
5688void
5689xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5690 xmlChar *name = NULL;
5691 xmlChar *ExternalID = NULL;
5692 xmlChar *URI = NULL;
5693
5694 /*
5695 * We know that '<!DOCTYPE' has been detected.
5696 */
5697 SKIP(9);
5698
5699 SKIP_BLANKS;
5700
5701 /*
5702 * Parse the DOCTYPE name.
5703 */
5704 name = xmlParseName(ctxt);
5705 if (name == NULL) {
5706 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5708 ctxt->sax->error(ctxt->userData,
5709 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5710 ctxt->wellFormed = 0;
5711 ctxt->disableSAX = 1;
5712 }
5713 ctxt->intSubName = name;
5714
5715 SKIP_BLANKS;
5716
5717 /*
5718 * Check for SystemID and ExternalID
5719 */
5720 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5721
5722 if ((URI != NULL) || (ExternalID != NULL)) {
5723 ctxt->hasExternalSubset = 1;
5724 }
5725 ctxt->extSubURI = URI;
5726 ctxt->extSubSystem = ExternalID;
5727
5728 SKIP_BLANKS;
5729
5730 /*
5731 * Create and update the internal subset.
5732 */
5733 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5734 (!ctxt->disableSAX))
5735 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5736
5737 /*
5738 * Is there any internal subset declarations ?
5739 * they are handled separately in xmlParseInternalSubset()
5740 */
5741 if (RAW == '[')
5742 return;
5743
5744 /*
5745 * We should be at the end of the DOCTYPE declaration.
5746 */
5747 if (RAW != '>') {
5748 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5750 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5751 ctxt->wellFormed = 0;
5752 ctxt->disableSAX = 1;
5753 }
5754 NEXT;
5755}
5756
5757/**
5758 * xmlParseInternalsubset:
5759 * @ctxt: an XML parser context
5760 *
5761 * parse the internal subset declaration
5762 *
5763 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5764 */
5765
5766void
5767xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5768 /*
5769 * Is there any DTD definition ?
5770 */
5771 if (RAW == '[') {
5772 ctxt->instate = XML_PARSER_DTD;
5773 NEXT;
5774 /*
5775 * Parse the succession of Markup declarations and
5776 * PEReferences.
5777 * Subsequence (markupdecl | PEReference | S)*
5778 */
5779 while (RAW != ']') {
5780 const xmlChar *check = CUR_PTR;
5781 int cons = ctxt->input->consumed;
5782
5783 SKIP_BLANKS;
5784 xmlParseMarkupDecl(ctxt);
5785 xmlParsePEReference(ctxt);
5786
5787 /*
5788 * Pop-up of finished entities.
5789 */
5790 while ((RAW == 0) && (ctxt->inputNr > 1))
5791 xmlPopInput(ctxt);
5792
5793 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5794 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5796 ctxt->sax->error(ctxt->userData,
5797 "xmlParseInternalSubset: error detected in Markup declaration\n");
5798 ctxt->wellFormed = 0;
5799 ctxt->disableSAX = 1;
5800 break;
5801 }
5802 }
5803 if (RAW == ']') {
5804 NEXT;
5805 SKIP_BLANKS;
5806 }
5807 }
5808
5809 /*
5810 * We should be at the end of the DOCTYPE declaration.
5811 */
5812 if (RAW != '>') {
5813 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5815 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5816 ctxt->wellFormed = 0;
5817 ctxt->disableSAX = 1;
5818 }
5819 NEXT;
5820}
5821
5822/**
5823 * xmlParseAttribute:
5824 * @ctxt: an XML parser context
5825 * @value: a xmlChar ** used to store the value of the attribute
5826 *
5827 * parse an attribute
5828 *
5829 * [41] Attribute ::= Name Eq AttValue
5830 *
5831 * [ WFC: No External Entity References ]
5832 * Attribute values cannot contain direct or indirect entity references
5833 * to external entities.
5834 *
5835 * [ WFC: No < in Attribute Values ]
5836 * The replacement text of any entity referred to directly or indirectly in
5837 * an attribute value (other than "&lt;") must not contain a <.
5838 *
5839 * [ VC: Attribute Value Type ]
5840 * The attribute must have been declared; the value must be of the type
5841 * declared for it.
5842 *
5843 * [25] Eq ::= S? '=' S?
5844 *
5845 * With namespace:
5846 *
5847 * [NS 11] Attribute ::= QName Eq AttValue
5848 *
5849 * Also the case QName == xmlns:??? is handled independently as a namespace
5850 * definition.
5851 *
5852 * Returns the attribute name, and the value in *value.
5853 */
5854
5855xmlChar *
5856xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5857 xmlChar *name, *val;
5858
5859 *value = NULL;
5860 name = xmlParseName(ctxt);
5861 if (name == NULL) {
5862 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5864 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5865 ctxt->wellFormed = 0;
5866 ctxt->disableSAX = 1;
5867 return(NULL);
5868 }
5869
5870 /*
5871 * read the value
5872 */
5873 SKIP_BLANKS;
5874 if (RAW == '=') {
5875 NEXT;
5876 SKIP_BLANKS;
5877 val = xmlParseAttValue(ctxt);
5878 ctxt->instate = XML_PARSER_CONTENT;
5879 } else {
5880 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5882 ctxt->sax->error(ctxt->userData,
5883 "Specification mandate value for attribute %s\n", name);
5884 ctxt->wellFormed = 0;
5885 ctxt->disableSAX = 1;
5886 xmlFree(name);
5887 return(NULL);
5888 }
5889
5890 /*
5891 * Check that xml:lang conforms to the specification
5892 * No more registered as an error, just generate a warning now
5893 * since this was deprecated in XML second edition
5894 */
5895 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5896 if (!xmlCheckLanguageID(val)) {
5897 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5898 ctxt->sax->warning(ctxt->userData,
5899 "Malformed value for xml:lang : %s\n", val);
5900 }
5901 }
5902
5903 /*
5904 * Check that xml:space conforms to the specification
5905 */
5906 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5907 if (xmlStrEqual(val, BAD_CAST "default"))
5908 *(ctxt->space) = 0;
5909 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5910 *(ctxt->space) = 1;
5911 else {
5912 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5914 ctxt->sax->error(ctxt->userData,
5915"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5916 val);
5917 ctxt->wellFormed = 0;
5918 ctxt->disableSAX = 1;
5919 }
5920 }
5921
5922 *value = val;
5923 return(name);
5924}
5925
5926/**
5927 * xmlParseStartTag:
5928 * @ctxt: an XML parser context
5929 *
5930 * parse a start of tag either for rule element or
5931 * EmptyElement. In both case we don't parse the tag closing chars.
5932 *
5933 * [40] STag ::= '<' Name (S Attribute)* S? '>'
5934 *
5935 * [ WFC: Unique Att Spec ]
5936 * No attribute name may appear more than once in the same start-tag or
5937 * empty-element tag.
5938 *
5939 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5940 *
5941 * [ WFC: Unique Att Spec ]
5942 * No attribute name may appear more than once in the same start-tag or
5943 * empty-element tag.
5944 *
5945 * With namespace:
5946 *
5947 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5948 *
5949 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
5950 *
5951 * Returns the element name parsed
5952 */
5953
5954xmlChar *
5955xmlParseStartTag(xmlParserCtxtPtr ctxt) {
5956 xmlChar *name;
5957 xmlChar *attname;
5958 xmlChar *attvalue;
5959 const xmlChar **atts = NULL;
5960 int nbatts = 0;
5961 int maxatts = 0;
5962 int i;
5963
5964 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00005965 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00005966
5967 name = xmlParseName(ctxt);
5968 if (name == NULL) {
5969 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5970 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5971 ctxt->sax->error(ctxt->userData,
5972 "xmlParseStartTag: invalid element name\n");
5973 ctxt->wellFormed = 0;
5974 ctxt->disableSAX = 1;
5975 return(NULL);
5976 }
5977
5978 /*
5979 * Now parse the attributes, it ends up with the ending
5980 *
5981 * (S Attribute)* S?
5982 */
5983 SKIP_BLANKS;
5984 GROW;
5985
Daniel Veillard21a0f912001-02-25 19:54:14 +00005986 while ((RAW != '>') &&
5987 ((RAW != '/') || (NXT(1) != '>')) &&
5988 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005989 const xmlChar *q = CUR_PTR;
5990 int cons = ctxt->input->consumed;
5991
5992 attname = xmlParseAttribute(ctxt, &attvalue);
5993 if ((attname != NULL) && (attvalue != NULL)) {
5994 /*
5995 * [ WFC: Unique Att Spec ]
5996 * No attribute name may appear more than once in the same
5997 * start-tag or empty-element tag.
5998 */
5999 for (i = 0; i < nbatts;i += 2) {
6000 if (xmlStrEqual(atts[i], attname)) {
6001 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6003 ctxt->sax->error(ctxt->userData,
6004 "Attribute %s redefined\n",
6005 attname);
6006 ctxt->wellFormed = 0;
6007 ctxt->disableSAX = 1;
6008 xmlFree(attname);
6009 xmlFree(attvalue);
6010 goto failed;
6011 }
6012 }
6013
6014 /*
6015 * Add the pair to atts
6016 */
6017 if (atts == NULL) {
6018 maxatts = 10;
6019 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6020 if (atts == NULL) {
6021 xmlGenericError(xmlGenericErrorContext,
6022 "malloc of %ld byte failed\n",
6023 maxatts * (long)sizeof(xmlChar *));
6024 return(NULL);
6025 }
6026 } else if (nbatts + 4 > maxatts) {
6027 maxatts *= 2;
6028 atts = (const xmlChar **) xmlRealloc((void *) atts,
6029 maxatts * sizeof(xmlChar *));
6030 if (atts == NULL) {
6031 xmlGenericError(xmlGenericErrorContext,
6032 "realloc of %ld byte failed\n",
6033 maxatts * (long)sizeof(xmlChar *));
6034 return(NULL);
6035 }
6036 }
6037 atts[nbatts++] = attname;
6038 atts[nbatts++] = attvalue;
6039 atts[nbatts] = NULL;
6040 atts[nbatts + 1] = NULL;
6041 } else {
6042 if (attname != NULL)
6043 xmlFree(attname);
6044 if (attvalue != NULL)
6045 xmlFree(attvalue);
6046 }
6047
6048failed:
6049
6050 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6051 break;
6052 if (!IS_BLANK(RAW)) {
6053 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6055 ctxt->sax->error(ctxt->userData,
6056 "attributes construct error\n");
6057 ctxt->wellFormed = 0;
6058 ctxt->disableSAX = 1;
6059 }
6060 SKIP_BLANKS;
6061 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6062 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6063 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6064 ctxt->sax->error(ctxt->userData,
6065 "xmlParseStartTag: problem parsing attributes\n");
6066 ctxt->wellFormed = 0;
6067 ctxt->disableSAX = 1;
6068 break;
6069 }
6070 GROW;
6071 }
6072
6073 /*
6074 * SAX: Start of Element !
6075 */
6076 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6077 (!ctxt->disableSAX))
6078 ctxt->sax->startElement(ctxt->userData, name, atts);
6079
6080 if (atts != NULL) {
6081 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6082 xmlFree((void *) atts);
6083 }
6084 return(name);
6085}
6086
6087/**
6088 * xmlParseEndTag:
6089 * @ctxt: an XML parser context
6090 *
6091 * parse an end of tag
6092 *
6093 * [42] ETag ::= '</' Name S? '>'
6094 *
6095 * With namespace
6096 *
6097 * [NS 9] ETag ::= '</' QName S? '>'
6098 */
6099
6100void
6101xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6102 xmlChar *name;
6103 xmlChar *oldname;
6104
6105 GROW;
6106 if ((RAW != '<') || (NXT(1) != '/')) {
6107 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6108 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6109 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6110 ctxt->wellFormed = 0;
6111 ctxt->disableSAX = 1;
6112 return;
6113 }
6114 SKIP(2);
6115
6116 name = xmlParseName(ctxt);
6117
6118 /*
6119 * We should definitely be at the ending "S? '>'" part
6120 */
6121 GROW;
6122 SKIP_BLANKS;
6123 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6124 ctxt->errNo = XML_ERR_GT_REQUIRED;
6125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6126 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6127 ctxt->wellFormed = 0;
6128 ctxt->disableSAX = 1;
6129 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006130 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006131
6132 /*
6133 * [ WFC: Element Type Match ]
6134 * The Name in an element's end-tag must match the element type in the
6135 * start-tag.
6136 *
6137 */
6138 if ((name == NULL) || (ctxt->name == NULL) ||
6139 (!xmlStrEqual(name, ctxt->name))) {
6140 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6142 if ((name != NULL) && (ctxt->name != NULL)) {
6143 ctxt->sax->error(ctxt->userData,
6144 "Opening and ending tag mismatch: %s and %s\n",
6145 ctxt->name, name);
6146 } else if (ctxt->name != NULL) {
6147 ctxt->sax->error(ctxt->userData,
6148 "Ending tag eror for: %s\n", ctxt->name);
6149 } else {
6150 ctxt->sax->error(ctxt->userData,
6151 "Ending tag error: internal error ???\n");
6152 }
6153
6154 }
6155 ctxt->wellFormed = 0;
6156 ctxt->disableSAX = 1;
6157 }
6158
6159 /*
6160 * SAX: End of Tag
6161 */
6162 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6163 (!ctxt->disableSAX))
6164 ctxt->sax->endElement(ctxt->userData, name);
6165
6166 if (name != NULL)
6167 xmlFree(name);
6168 oldname = namePop(ctxt);
6169 spacePop(ctxt);
6170 if (oldname != NULL) {
6171#ifdef DEBUG_STACK
6172 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6173#endif
6174 xmlFree(oldname);
6175 }
6176 return;
6177}
6178
6179/**
6180 * xmlParseCDSect:
6181 * @ctxt: an XML parser context
6182 *
6183 * Parse escaped pure raw content.
6184 *
6185 * [18] CDSect ::= CDStart CData CDEnd
6186 *
6187 * [19] CDStart ::= '<![CDATA['
6188 *
6189 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6190 *
6191 * [21] CDEnd ::= ']]>'
6192 */
6193void
6194xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6195 xmlChar *buf = NULL;
6196 int len = 0;
6197 int size = XML_PARSER_BUFFER_SIZE;
6198 int r, rl;
6199 int s, sl;
6200 int cur, l;
6201 int count = 0;
6202
6203 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6204 (NXT(2) == '[') && (NXT(3) == 'C') &&
6205 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6206 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6207 (NXT(8) == '[')) {
6208 SKIP(9);
6209 } else
6210 return;
6211
6212 ctxt->instate = XML_PARSER_CDATA_SECTION;
6213 r = CUR_CHAR(rl);
6214 if (!IS_CHAR(r)) {
6215 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6216 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6217 ctxt->sax->error(ctxt->userData,
6218 "CData section not finished\n");
6219 ctxt->wellFormed = 0;
6220 ctxt->disableSAX = 1;
6221 ctxt->instate = XML_PARSER_CONTENT;
6222 return;
6223 }
6224 NEXTL(rl);
6225 s = CUR_CHAR(sl);
6226 if (!IS_CHAR(s)) {
6227 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6229 ctxt->sax->error(ctxt->userData,
6230 "CData section not finished\n");
6231 ctxt->wellFormed = 0;
6232 ctxt->disableSAX = 1;
6233 ctxt->instate = XML_PARSER_CONTENT;
6234 return;
6235 }
6236 NEXTL(sl);
6237 cur = CUR_CHAR(l);
6238 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6239 if (buf == NULL) {
6240 xmlGenericError(xmlGenericErrorContext,
6241 "malloc of %d byte failed\n", size);
6242 return;
6243 }
6244 while (IS_CHAR(cur) &&
6245 ((r != ']') || (s != ']') || (cur != '>'))) {
6246 if (len + 5 >= size) {
6247 size *= 2;
6248 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6249 if (buf == NULL) {
6250 xmlGenericError(xmlGenericErrorContext,
6251 "realloc of %d byte failed\n", size);
6252 return;
6253 }
6254 }
6255 COPY_BUF(rl,buf,len,r);
6256 r = s;
6257 rl = sl;
6258 s = cur;
6259 sl = l;
6260 count++;
6261 if (count > 50) {
6262 GROW;
6263 count = 0;
6264 }
6265 NEXTL(l);
6266 cur = CUR_CHAR(l);
6267 }
6268 buf[len] = 0;
6269 ctxt->instate = XML_PARSER_CONTENT;
6270 if (cur != '>') {
6271 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6273 ctxt->sax->error(ctxt->userData,
6274 "CData section not finished\n%.50s\n", buf);
6275 ctxt->wellFormed = 0;
6276 ctxt->disableSAX = 1;
6277 xmlFree(buf);
6278 return;
6279 }
6280 NEXTL(l);
6281
6282 /*
6283 * Ok the buffer is to be consumed as cdata.
6284 */
6285 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6286 if (ctxt->sax->cdataBlock != NULL)
6287 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6288 }
6289 xmlFree(buf);
6290}
6291
6292/**
6293 * xmlParseContent:
6294 * @ctxt: an XML parser context
6295 *
6296 * Parse a content:
6297 *
6298 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6299 */
6300
6301void
6302xmlParseContent(xmlParserCtxtPtr ctxt) {
6303 GROW;
6304 while (((RAW != 0) || (ctxt->token != 0)) &&
6305 ((RAW != '<') || (NXT(1) != '/'))) {
6306 const xmlChar *test = CUR_PTR;
6307 int cons = ctxt->input->consumed;
6308 xmlChar tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006309 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006310
6311 /*
6312 * Handle possible processed charrefs.
6313 */
6314 if (ctxt->token != 0) {
6315 xmlParseCharData(ctxt, 0);
6316 }
6317 /*
6318 * First case : a Processing Instruction.
6319 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006320 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006321 xmlParsePI(ctxt);
6322 }
6323
6324 /*
6325 * Second case : a CDSection
6326 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006327 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006328 (NXT(2) == '[') && (NXT(3) == 'C') &&
6329 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6330 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6331 (NXT(8) == '[')) {
6332 xmlParseCDSect(ctxt);
6333 }
6334
6335 /*
6336 * Third case : a comment
6337 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006338 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006339 (NXT(2) == '-') && (NXT(3) == '-')) {
6340 xmlParseComment(ctxt);
6341 ctxt->instate = XML_PARSER_CONTENT;
6342 }
6343
6344 /*
6345 * Fourth case : a sub-element.
6346 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006347 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006348 xmlParseElement(ctxt);
6349 }
6350
6351 /*
6352 * Fifth case : a reference. If if has not been resolved,
6353 * parsing returns it's Name, create the node
6354 */
6355
Daniel Veillard21a0f912001-02-25 19:54:14 +00006356 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006357 xmlParseReference(ctxt);
6358 }
6359
6360 /*
6361 * Last case, text. Note that References are handled directly.
6362 */
6363 else {
6364 xmlParseCharData(ctxt, 0);
6365 }
6366
6367 GROW;
6368 /*
6369 * Pop-up of finished entities.
6370 */
6371 while ((RAW == 0) && (ctxt->inputNr > 1))
6372 xmlPopInput(ctxt);
6373 SHRINK;
6374
6375 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6376 (tok == ctxt->token)) {
6377 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6378 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6379 ctxt->sax->error(ctxt->userData,
6380 "detected an error in element content\n");
6381 ctxt->wellFormed = 0;
6382 ctxt->disableSAX = 1;
6383 ctxt->instate = XML_PARSER_EOF;
6384 break;
6385 }
6386 }
6387}
6388
6389/**
6390 * xmlParseElement:
6391 * @ctxt: an XML parser context
6392 *
6393 * parse an XML element, this is highly recursive
6394 *
6395 * [39] element ::= EmptyElemTag | STag content ETag
6396 *
6397 * [ WFC: Element Type Match ]
6398 * The Name in an element's end-tag must match the element type in the
6399 * start-tag.
6400 *
6401 * [ VC: Element Valid ]
6402 * An element is valid if there is a declaration matching elementdecl
6403 * where the Name matches the element type and one of the following holds:
6404 * - The declaration matches EMPTY and the element has no content.
6405 * - The declaration matches children and the sequence of child elements
6406 * belongs to the language generated by the regular expression in the
6407 * content model, with optional white space (characters matching the
6408 * nonterminal S) between each pair of child elements.
6409 * - The declaration matches Mixed and the content consists of character
6410 * data and child elements whose types match names in the content model.
6411 * - The declaration matches ANY, and the types of any child elements have
6412 * been declared.
6413 */
6414
6415void
6416xmlParseElement(xmlParserCtxtPtr ctxt) {
6417 const xmlChar *openTag = CUR_PTR;
6418 xmlChar *name;
6419 xmlChar *oldname;
6420 xmlParserNodeInfo node_info;
6421 xmlNodePtr ret;
6422
6423 /* Capture start position */
6424 if (ctxt->record_info) {
6425 node_info.begin_pos = ctxt->input->consumed +
6426 (CUR_PTR - ctxt->input->base);
6427 node_info.begin_line = ctxt->input->line;
6428 }
6429
6430 if (ctxt->spaceNr == 0)
6431 spacePush(ctxt, -1);
6432 else
6433 spacePush(ctxt, *ctxt->space);
6434
6435 name = xmlParseStartTag(ctxt);
6436 if (name == NULL) {
6437 spacePop(ctxt);
6438 return;
6439 }
6440 namePush(ctxt, name);
6441 ret = ctxt->node;
6442
6443 /*
6444 * [ VC: Root Element Type ]
6445 * The Name in the document type declaration must match the element
6446 * type of the root element.
6447 */
6448 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6449 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6450 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6451
6452 /*
6453 * Check for an Empty Element.
6454 */
6455 if ((RAW == '/') && (NXT(1) == '>')) {
6456 SKIP(2);
6457 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6458 (!ctxt->disableSAX))
6459 ctxt->sax->endElement(ctxt->userData, name);
6460 oldname = namePop(ctxt);
6461 spacePop(ctxt);
6462 if (oldname != NULL) {
6463#ifdef DEBUG_STACK
6464 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6465#endif
6466 xmlFree(oldname);
6467 }
6468 if ( ret != NULL && ctxt->record_info ) {
6469 node_info.end_pos = ctxt->input->consumed +
6470 (CUR_PTR - ctxt->input->base);
6471 node_info.end_line = ctxt->input->line;
6472 node_info.node = ret;
6473 xmlParserAddNodeInfo(ctxt, &node_info);
6474 }
6475 return;
6476 }
6477 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006478 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006479 } else {
6480 ctxt->errNo = XML_ERR_GT_REQUIRED;
6481 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6482 ctxt->sax->error(ctxt->userData,
6483 "Couldn't find end of Start Tag\n%.30s\n",
6484 openTag);
6485 ctxt->wellFormed = 0;
6486 ctxt->disableSAX = 1;
6487
6488 /*
6489 * end of parsing of this node.
6490 */
6491 nodePop(ctxt);
6492 oldname = namePop(ctxt);
6493 spacePop(ctxt);
6494 if (oldname != NULL) {
6495#ifdef DEBUG_STACK
6496 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6497#endif
6498 xmlFree(oldname);
6499 }
6500
6501 /*
6502 * Capture end position and add node
6503 */
6504 if ( ret != NULL && ctxt->record_info ) {
6505 node_info.end_pos = ctxt->input->consumed +
6506 (CUR_PTR - ctxt->input->base);
6507 node_info.end_line = ctxt->input->line;
6508 node_info.node = ret;
6509 xmlParserAddNodeInfo(ctxt, &node_info);
6510 }
6511 return;
6512 }
6513
6514 /*
6515 * Parse the content of the element:
6516 */
6517 xmlParseContent(ctxt);
6518 if (!IS_CHAR(RAW)) {
6519 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6521 ctxt->sax->error(ctxt->userData,
6522 "Premature end of data in tag %.30s\n", openTag);
6523 ctxt->wellFormed = 0;
6524 ctxt->disableSAX = 1;
6525
6526 /*
6527 * end of parsing of this node.
6528 */
6529 nodePop(ctxt);
6530 oldname = namePop(ctxt);
6531 spacePop(ctxt);
6532 if (oldname != NULL) {
6533#ifdef DEBUG_STACK
6534 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6535#endif
6536 xmlFree(oldname);
6537 }
6538 return;
6539 }
6540
6541 /*
6542 * parse the end of tag: '</' should be here.
6543 */
6544 xmlParseEndTag(ctxt);
6545
6546 /*
6547 * Capture end position and add node
6548 */
6549 if ( ret != NULL && ctxt->record_info ) {
6550 node_info.end_pos = ctxt->input->consumed +
6551 (CUR_PTR - ctxt->input->base);
6552 node_info.end_line = ctxt->input->line;
6553 node_info.node = ret;
6554 xmlParserAddNodeInfo(ctxt, &node_info);
6555 }
6556}
6557
6558/**
6559 * xmlParseVersionNum:
6560 * @ctxt: an XML parser context
6561 *
6562 * parse the XML version value.
6563 *
6564 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6565 *
6566 * Returns the string giving the XML version number, or NULL
6567 */
6568xmlChar *
6569xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6570 xmlChar *buf = NULL;
6571 int len = 0;
6572 int size = 10;
6573 xmlChar cur;
6574
6575 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6576 if (buf == NULL) {
6577 xmlGenericError(xmlGenericErrorContext,
6578 "malloc of %d byte failed\n", size);
6579 return(NULL);
6580 }
6581 cur = CUR;
6582 while (((cur >= 'a') && (cur <= 'z')) ||
6583 ((cur >= 'A') && (cur <= 'Z')) ||
6584 ((cur >= '0') && (cur <= '9')) ||
6585 (cur == '_') || (cur == '.') ||
6586 (cur == ':') || (cur == '-')) {
6587 if (len + 1 >= size) {
6588 size *= 2;
6589 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6590 if (buf == NULL) {
6591 xmlGenericError(xmlGenericErrorContext,
6592 "realloc of %d byte failed\n", size);
6593 return(NULL);
6594 }
6595 }
6596 buf[len++] = cur;
6597 NEXT;
6598 cur=CUR;
6599 }
6600 buf[len] = 0;
6601 return(buf);
6602}
6603
6604/**
6605 * xmlParseVersionInfo:
6606 * @ctxt: an XML parser context
6607 *
6608 * parse the XML version.
6609 *
6610 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6611 *
6612 * [25] Eq ::= S? '=' S?
6613 *
6614 * Returns the version string, e.g. "1.0"
6615 */
6616
6617xmlChar *
6618xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6619 xmlChar *version = NULL;
6620 const xmlChar *q;
6621
6622 if ((RAW == 'v') && (NXT(1) == 'e') &&
6623 (NXT(2) == 'r') && (NXT(3) == 's') &&
6624 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6625 (NXT(6) == 'n')) {
6626 SKIP(7);
6627 SKIP_BLANKS;
6628 if (RAW != '=') {
6629 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6631 ctxt->sax->error(ctxt->userData,
6632 "xmlParseVersionInfo : expected '='\n");
6633 ctxt->wellFormed = 0;
6634 ctxt->disableSAX = 1;
6635 return(NULL);
6636 }
6637 NEXT;
6638 SKIP_BLANKS;
6639 if (RAW == '"') {
6640 NEXT;
6641 q = CUR_PTR;
6642 version = xmlParseVersionNum(ctxt);
6643 if (RAW != '"') {
6644 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6646 ctxt->sax->error(ctxt->userData,
6647 "String not closed\n%.50s\n", q);
6648 ctxt->wellFormed = 0;
6649 ctxt->disableSAX = 1;
6650 } else
6651 NEXT;
6652 } else if (RAW == '\''){
6653 NEXT;
6654 q = CUR_PTR;
6655 version = xmlParseVersionNum(ctxt);
6656 if (RAW != '\'') {
6657 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6659 ctxt->sax->error(ctxt->userData,
6660 "String not closed\n%.50s\n", q);
6661 ctxt->wellFormed = 0;
6662 ctxt->disableSAX = 1;
6663 } else
6664 NEXT;
6665 } else {
6666 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6668 ctxt->sax->error(ctxt->userData,
6669 "xmlParseVersionInfo : expected ' or \"\n");
6670 ctxt->wellFormed = 0;
6671 ctxt->disableSAX = 1;
6672 }
6673 }
6674 return(version);
6675}
6676
6677/**
6678 * xmlParseEncName:
6679 * @ctxt: an XML parser context
6680 *
6681 * parse the XML encoding name
6682 *
6683 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6684 *
6685 * Returns the encoding name value or NULL
6686 */
6687xmlChar *
6688xmlParseEncName(xmlParserCtxtPtr ctxt) {
6689 xmlChar *buf = NULL;
6690 int len = 0;
6691 int size = 10;
6692 xmlChar cur;
6693
6694 cur = CUR;
6695 if (((cur >= 'a') && (cur <= 'z')) ||
6696 ((cur >= 'A') && (cur <= 'Z'))) {
6697 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6698 if (buf == NULL) {
6699 xmlGenericError(xmlGenericErrorContext,
6700 "malloc of %d byte failed\n", size);
6701 return(NULL);
6702 }
6703
6704 buf[len++] = cur;
6705 NEXT;
6706 cur = CUR;
6707 while (((cur >= 'a') && (cur <= 'z')) ||
6708 ((cur >= 'A') && (cur <= 'Z')) ||
6709 ((cur >= '0') && (cur <= '9')) ||
6710 (cur == '.') || (cur == '_') ||
6711 (cur == '-')) {
6712 if (len + 1 >= size) {
6713 size *= 2;
6714 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6715 if (buf == NULL) {
6716 xmlGenericError(xmlGenericErrorContext,
6717 "realloc of %d byte failed\n", size);
6718 return(NULL);
6719 }
6720 }
6721 buf[len++] = cur;
6722 NEXT;
6723 cur = CUR;
6724 if (cur == 0) {
6725 SHRINK;
6726 GROW;
6727 cur = CUR;
6728 }
6729 }
6730 buf[len] = 0;
6731 } else {
6732 ctxt->errNo = XML_ERR_ENCODING_NAME;
6733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6734 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6735 ctxt->wellFormed = 0;
6736 ctxt->disableSAX = 1;
6737 }
6738 return(buf);
6739}
6740
6741/**
6742 * xmlParseEncodingDecl:
6743 * @ctxt: an XML parser context
6744 *
6745 * parse the XML encoding declaration
6746 *
6747 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6748 *
6749 * this setups the conversion filters.
6750 *
6751 * Returns the encoding value or NULL
6752 */
6753
6754xmlChar *
6755xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6756 xmlChar *encoding = NULL;
6757 const xmlChar *q;
6758
6759 SKIP_BLANKS;
6760 if ((RAW == 'e') && (NXT(1) == 'n') &&
6761 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6762 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6763 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6764 SKIP(8);
6765 SKIP_BLANKS;
6766 if (RAW != '=') {
6767 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6769 ctxt->sax->error(ctxt->userData,
6770 "xmlParseEncodingDecl : expected '='\n");
6771 ctxt->wellFormed = 0;
6772 ctxt->disableSAX = 1;
6773 return(NULL);
6774 }
6775 NEXT;
6776 SKIP_BLANKS;
6777 if (RAW == '"') {
6778 NEXT;
6779 q = CUR_PTR;
6780 encoding = xmlParseEncName(ctxt);
6781 if (RAW != '"') {
6782 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6784 ctxt->sax->error(ctxt->userData,
6785 "String not closed\n%.50s\n", q);
6786 ctxt->wellFormed = 0;
6787 ctxt->disableSAX = 1;
6788 } else
6789 NEXT;
6790 } else if (RAW == '\''){
6791 NEXT;
6792 q = CUR_PTR;
6793 encoding = xmlParseEncName(ctxt);
6794 if (RAW != '\'') {
6795 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6797 ctxt->sax->error(ctxt->userData,
6798 "String not closed\n%.50s\n", q);
6799 ctxt->wellFormed = 0;
6800 ctxt->disableSAX = 1;
6801 } else
6802 NEXT;
6803 } else if (RAW == '"'){
6804 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6806 ctxt->sax->error(ctxt->userData,
6807 "xmlParseEncodingDecl : expected ' or \"\n");
6808 ctxt->wellFormed = 0;
6809 ctxt->disableSAX = 1;
6810 }
6811 if (encoding != NULL) {
6812 xmlCharEncoding enc;
6813 xmlCharEncodingHandlerPtr handler;
6814
6815 if (ctxt->input->encoding != NULL)
6816 xmlFree((xmlChar *) ctxt->input->encoding);
6817 ctxt->input->encoding = encoding;
6818
6819 enc = xmlParseCharEncoding((const char *) encoding);
6820 /*
6821 * registered set of known encodings
6822 */
6823 if (enc != XML_CHAR_ENCODING_ERROR) {
6824 xmlSwitchEncoding(ctxt, enc);
6825 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6826 xmlFree(encoding);
6827 return(NULL);
6828 }
6829 } else {
6830 /*
6831 * fallback for unknown encodings
6832 */
6833 handler = xmlFindCharEncodingHandler((const char *) encoding);
6834 if (handler != NULL) {
6835 xmlSwitchToEncoding(ctxt, handler);
6836 } else {
6837 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6839 ctxt->sax->error(ctxt->userData,
6840 "Unsupported encoding %s\n", encoding);
6841 return(NULL);
6842 }
6843 }
6844 }
6845 }
6846 return(encoding);
6847}
6848
6849/**
6850 * xmlParseSDDecl:
6851 * @ctxt: an XML parser context
6852 *
6853 * parse the XML standalone declaration
6854 *
6855 * [32] SDDecl ::= S 'standalone' Eq
6856 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6857 *
6858 * [ VC: Standalone Document Declaration ]
6859 * TODO The standalone document declaration must have the value "no"
6860 * if any external markup declarations contain declarations of:
6861 * - attributes with default values, if elements to which these
6862 * attributes apply appear in the document without specifications
6863 * of values for these attributes, or
6864 * - entities (other than amp, lt, gt, apos, quot), if references
6865 * to those entities appear in the document, or
6866 * - attributes with values subject to normalization, where the
6867 * attribute appears in the document with a value which will change
6868 * as a result of normalization, or
6869 * - element types with element content, if white space occurs directly
6870 * within any instance of those types.
6871 *
6872 * Returns 1 if standalone, 0 otherwise
6873 */
6874
6875int
6876xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6877 int standalone = -1;
6878
6879 SKIP_BLANKS;
6880 if ((RAW == 's') && (NXT(1) == 't') &&
6881 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6882 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6883 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6884 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6885 SKIP(10);
6886 SKIP_BLANKS;
6887 if (RAW != '=') {
6888 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6890 ctxt->sax->error(ctxt->userData,
6891 "XML standalone declaration : expected '='\n");
6892 ctxt->wellFormed = 0;
6893 ctxt->disableSAX = 1;
6894 return(standalone);
6895 }
6896 NEXT;
6897 SKIP_BLANKS;
6898 if (RAW == '\''){
6899 NEXT;
6900 if ((RAW == 'n') && (NXT(1) == 'o')) {
6901 standalone = 0;
6902 SKIP(2);
6903 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6904 (NXT(2) == 's')) {
6905 standalone = 1;
6906 SKIP(3);
6907 } else {
6908 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6910 ctxt->sax->error(ctxt->userData,
6911 "standalone accepts only 'yes' or 'no'\n");
6912 ctxt->wellFormed = 0;
6913 ctxt->disableSAX = 1;
6914 }
6915 if (RAW != '\'') {
6916 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6917 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6918 ctxt->sax->error(ctxt->userData, "String not closed\n");
6919 ctxt->wellFormed = 0;
6920 ctxt->disableSAX = 1;
6921 } else
6922 NEXT;
6923 } else if (RAW == '"'){
6924 NEXT;
6925 if ((RAW == 'n') && (NXT(1) == 'o')) {
6926 standalone = 0;
6927 SKIP(2);
6928 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6929 (NXT(2) == 's')) {
6930 standalone = 1;
6931 SKIP(3);
6932 } else {
6933 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6935 ctxt->sax->error(ctxt->userData,
6936 "standalone accepts only 'yes' or 'no'\n");
6937 ctxt->wellFormed = 0;
6938 ctxt->disableSAX = 1;
6939 }
6940 if (RAW != '"') {
6941 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6943 ctxt->sax->error(ctxt->userData, "String not closed\n");
6944 ctxt->wellFormed = 0;
6945 ctxt->disableSAX = 1;
6946 } else
6947 NEXT;
6948 } else {
6949 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6950 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6951 ctxt->sax->error(ctxt->userData,
6952 "Standalone value not found\n");
6953 ctxt->wellFormed = 0;
6954 ctxt->disableSAX = 1;
6955 }
6956 }
6957 return(standalone);
6958}
6959
6960/**
6961 * xmlParseXMLDecl:
6962 * @ctxt: an XML parser context
6963 *
6964 * parse an XML declaration header
6965 *
6966 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
6967 */
6968
6969void
6970xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
6971 xmlChar *version;
6972
6973 /*
6974 * We know that '<?xml' is here.
6975 */
6976 SKIP(5);
6977
6978 if (!IS_BLANK(RAW)) {
6979 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6981 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
6982 ctxt->wellFormed = 0;
6983 ctxt->disableSAX = 1;
6984 }
6985 SKIP_BLANKS;
6986
6987 /*
6988 * We should have the VersionInfo here.
6989 */
6990 version = xmlParseVersionInfo(ctxt);
6991 if (version == NULL)
6992 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6993 ctxt->version = xmlStrdup(version);
6994 xmlFree(version);
6995
6996 /*
6997 * We may have the encoding declaration
6998 */
6999 if (!IS_BLANK(RAW)) {
7000 if ((RAW == '?') && (NXT(1) == '>')) {
7001 SKIP(2);
7002 return;
7003 }
7004 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7006 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7007 ctxt->wellFormed = 0;
7008 ctxt->disableSAX = 1;
7009 }
7010 xmlParseEncodingDecl(ctxt);
7011 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7012 /*
7013 * The XML REC instructs us to stop parsing right here
7014 */
7015 return;
7016 }
7017
7018 /*
7019 * We may have the standalone status.
7020 */
7021 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7022 if ((RAW == '?') && (NXT(1) == '>')) {
7023 SKIP(2);
7024 return;
7025 }
7026 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7028 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7029 ctxt->wellFormed = 0;
7030 ctxt->disableSAX = 1;
7031 }
7032 SKIP_BLANKS;
7033 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7034
7035 SKIP_BLANKS;
7036 if ((RAW == '?') && (NXT(1) == '>')) {
7037 SKIP(2);
7038 } else if (RAW == '>') {
7039 /* Deprecated old WD ... */
7040 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7042 ctxt->sax->error(ctxt->userData,
7043 "XML declaration must end-up with '?>'\n");
7044 ctxt->wellFormed = 0;
7045 ctxt->disableSAX = 1;
7046 NEXT;
7047 } else {
7048 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7050 ctxt->sax->error(ctxt->userData,
7051 "parsing XML declaration: '?>' expected\n");
7052 ctxt->wellFormed = 0;
7053 ctxt->disableSAX = 1;
7054 MOVETO_ENDTAG(CUR_PTR);
7055 NEXT;
7056 }
7057}
7058
7059/**
7060 * xmlParseMisc:
7061 * @ctxt: an XML parser context
7062 *
7063 * parse an XML Misc* optionnal field.
7064 *
7065 * [27] Misc ::= Comment | PI | S
7066 */
7067
7068void
7069xmlParseMisc(xmlParserCtxtPtr ctxt) {
7070 while (((RAW == '<') && (NXT(1) == '?')) ||
7071 ((RAW == '<') && (NXT(1) == '!') &&
7072 (NXT(2) == '-') && (NXT(3) == '-')) ||
7073 IS_BLANK(CUR)) {
7074 if ((RAW == '<') && (NXT(1) == '?')) {
7075 xmlParsePI(ctxt);
7076 } else if (IS_BLANK(CUR)) {
7077 NEXT;
7078 } else
7079 xmlParseComment(ctxt);
7080 }
7081}
7082
7083/**
7084 * xmlParseDocument:
7085 * @ctxt: an XML parser context
7086 *
7087 * parse an XML document (and build a tree if using the standard SAX
7088 * interface).
7089 *
7090 * [1] document ::= prolog element Misc*
7091 *
7092 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7093 *
7094 * Returns 0, -1 in case of error. the parser context is augmented
7095 * as a result of the parsing.
7096 */
7097
7098int
7099xmlParseDocument(xmlParserCtxtPtr ctxt) {
7100 xmlChar start[4];
7101 xmlCharEncoding enc;
7102
7103 xmlInitParser();
7104
7105 GROW;
7106
7107 /*
7108 * SAX: beginning of the document processing.
7109 */
7110 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7111 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7112
7113 /*
7114 * Get the 4 first bytes and decode the charset
7115 * if enc != XML_CHAR_ENCODING_NONE
7116 * plug some encoding conversion routines.
7117 */
7118 start[0] = RAW;
7119 start[1] = NXT(1);
7120 start[2] = NXT(2);
7121 start[3] = NXT(3);
7122 enc = xmlDetectCharEncoding(start, 4);
7123 if (enc != XML_CHAR_ENCODING_NONE) {
7124 xmlSwitchEncoding(ctxt, enc);
7125 }
7126
7127
7128 if (CUR == 0) {
7129 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7130 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7131 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7132 ctxt->wellFormed = 0;
7133 ctxt->disableSAX = 1;
7134 }
7135
7136 /*
7137 * Check for the XMLDecl in the Prolog.
7138 */
7139 GROW;
7140 if ((RAW == '<') && (NXT(1) == '?') &&
7141 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7142 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7143
7144 /*
7145 * Note that we will switch encoding on the fly.
7146 */
7147 xmlParseXMLDecl(ctxt);
7148 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7149 /*
7150 * The XML REC instructs us to stop parsing right here
7151 */
7152 return(-1);
7153 }
7154 ctxt->standalone = ctxt->input->standalone;
7155 SKIP_BLANKS;
7156 } else {
7157 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7158 }
7159 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7160 ctxt->sax->startDocument(ctxt->userData);
7161
7162 /*
7163 * The Misc part of the Prolog
7164 */
7165 GROW;
7166 xmlParseMisc(ctxt);
7167
7168 /*
7169 * Then possibly doc type declaration(s) and more Misc
7170 * (doctypedecl Misc*)?
7171 */
7172 GROW;
7173 if ((RAW == '<') && (NXT(1) == '!') &&
7174 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7175 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7176 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7177 (NXT(8) == 'E')) {
7178
7179 ctxt->inSubset = 1;
7180 xmlParseDocTypeDecl(ctxt);
7181 if (RAW == '[') {
7182 ctxt->instate = XML_PARSER_DTD;
7183 xmlParseInternalSubset(ctxt);
7184 }
7185
7186 /*
7187 * Create and update the external subset.
7188 */
7189 ctxt->inSubset = 2;
7190 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7191 (!ctxt->disableSAX))
7192 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7193 ctxt->extSubSystem, ctxt->extSubURI);
7194 ctxt->inSubset = 0;
7195
7196
7197 ctxt->instate = XML_PARSER_PROLOG;
7198 xmlParseMisc(ctxt);
7199 }
7200
7201 /*
7202 * Time to start parsing the tree itself
7203 */
7204 GROW;
7205 if (RAW != '<') {
7206 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7208 ctxt->sax->error(ctxt->userData,
7209 "Start tag expected, '<' not found\n");
7210 ctxt->wellFormed = 0;
7211 ctxt->disableSAX = 1;
7212 ctxt->instate = XML_PARSER_EOF;
7213 } else {
7214 ctxt->instate = XML_PARSER_CONTENT;
7215 xmlParseElement(ctxt);
7216 ctxt->instate = XML_PARSER_EPILOG;
7217
7218
7219 /*
7220 * The Misc part at the end
7221 */
7222 xmlParseMisc(ctxt);
7223
7224 if (RAW != 0) {
7225 ctxt->errNo = XML_ERR_DOCUMENT_END;
7226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7227 ctxt->sax->error(ctxt->userData,
7228 "Extra content at the end of the document\n");
7229 ctxt->wellFormed = 0;
7230 ctxt->disableSAX = 1;
7231 }
7232 ctxt->instate = XML_PARSER_EOF;
7233 }
7234
7235 /*
7236 * SAX: end of the document processing.
7237 */
7238 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7239 (!ctxt->disableSAX))
7240 ctxt->sax->endDocument(ctxt->userData);
7241
7242 if (! ctxt->wellFormed) return(-1);
7243 return(0);
7244}
7245
7246/**
7247 * xmlParseExtParsedEnt:
7248 * @ctxt: an XML parser context
7249 *
7250 * parse a genreral parsed entity
7251 * An external general parsed entity is well-formed if it matches the
7252 * production labeled extParsedEnt.
7253 *
7254 * [78] extParsedEnt ::= TextDecl? content
7255 *
7256 * Returns 0, -1 in case of error. the parser context is augmented
7257 * as a result of the parsing.
7258 */
7259
7260int
7261xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7262 xmlChar start[4];
7263 xmlCharEncoding enc;
7264
7265 xmlDefaultSAXHandlerInit();
7266
7267 GROW;
7268
7269 /*
7270 * SAX: beginning of the document processing.
7271 */
7272 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7273 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7274
7275 /*
7276 * Get the 4 first bytes and decode the charset
7277 * if enc != XML_CHAR_ENCODING_NONE
7278 * plug some encoding conversion routines.
7279 */
7280 start[0] = RAW;
7281 start[1] = NXT(1);
7282 start[2] = NXT(2);
7283 start[3] = NXT(3);
7284 enc = xmlDetectCharEncoding(start, 4);
7285 if (enc != XML_CHAR_ENCODING_NONE) {
7286 xmlSwitchEncoding(ctxt, enc);
7287 }
7288
7289
7290 if (CUR == 0) {
7291 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7292 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7293 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7294 ctxt->wellFormed = 0;
7295 ctxt->disableSAX = 1;
7296 }
7297
7298 /*
7299 * Check for the XMLDecl in the Prolog.
7300 */
7301 GROW;
7302 if ((RAW == '<') && (NXT(1) == '?') &&
7303 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7304 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7305
7306 /*
7307 * Note that we will switch encoding on the fly.
7308 */
7309 xmlParseXMLDecl(ctxt);
7310 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7311 /*
7312 * The XML REC instructs us to stop parsing right here
7313 */
7314 return(-1);
7315 }
7316 SKIP_BLANKS;
7317 } else {
7318 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7319 }
7320 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7321 ctxt->sax->startDocument(ctxt->userData);
7322
7323 /*
7324 * Doing validity checking on chunk doesn't make sense
7325 */
7326 ctxt->instate = XML_PARSER_CONTENT;
7327 ctxt->validate = 0;
7328 ctxt->loadsubset = 0;
7329 ctxt->depth = 0;
7330
7331 xmlParseContent(ctxt);
7332
7333 if ((RAW == '<') && (NXT(1) == '/')) {
7334 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7335 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7336 ctxt->sax->error(ctxt->userData,
7337 "chunk is not well balanced\n");
7338 ctxt->wellFormed = 0;
7339 ctxt->disableSAX = 1;
7340 } else if (RAW != 0) {
7341 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7343 ctxt->sax->error(ctxt->userData,
7344 "extra content at the end of well balanced chunk\n");
7345 ctxt->wellFormed = 0;
7346 ctxt->disableSAX = 1;
7347 }
7348
7349 /*
7350 * SAX: end of the document processing.
7351 */
7352 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7353 (!ctxt->disableSAX))
7354 ctxt->sax->endDocument(ctxt->userData);
7355
7356 if (! ctxt->wellFormed) return(-1);
7357 return(0);
7358}
7359
7360/************************************************************************
7361 * *
7362 * Progressive parsing interfaces *
7363 * *
7364 ************************************************************************/
7365
7366/**
7367 * xmlParseLookupSequence:
7368 * @ctxt: an XML parser context
7369 * @first: the first char to lookup
7370 * @next: the next char to lookup or zero
7371 * @third: the next char to lookup or zero
7372 *
7373 * Try to find if a sequence (first, next, third) or just (first next) or
7374 * (first) is available in the input stream.
7375 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7376 * to avoid rescanning sequences of bytes, it DOES change the state of the
7377 * parser, do not use liberally.
7378 *
7379 * Returns the index to the current parsing point if the full sequence
7380 * is available, -1 otherwise.
7381 */
7382int
7383xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7384 xmlChar next, xmlChar third) {
7385 int base, len;
7386 xmlParserInputPtr in;
7387 const xmlChar *buf;
7388
7389 in = ctxt->input;
7390 if (in == NULL) return(-1);
7391 base = in->cur - in->base;
7392 if (base < 0) return(-1);
7393 if (ctxt->checkIndex > base)
7394 base = ctxt->checkIndex;
7395 if (in->buf == NULL) {
7396 buf = in->base;
7397 len = in->length;
7398 } else {
7399 buf = in->buf->buffer->content;
7400 len = in->buf->buffer->use;
7401 }
7402 /* take into account the sequence length */
7403 if (third) len -= 2;
7404 else if (next) len --;
7405 for (;base < len;base++) {
7406 if (buf[base] == first) {
7407 if (third != 0) {
7408 if ((buf[base + 1] != next) ||
7409 (buf[base + 2] != third)) continue;
7410 } else if (next != 0) {
7411 if (buf[base + 1] != next) continue;
7412 }
7413 ctxt->checkIndex = 0;
7414#ifdef DEBUG_PUSH
7415 if (next == 0)
7416 xmlGenericError(xmlGenericErrorContext,
7417 "PP: lookup '%c' found at %d\n",
7418 first, base);
7419 else if (third == 0)
7420 xmlGenericError(xmlGenericErrorContext,
7421 "PP: lookup '%c%c' found at %d\n",
7422 first, next, base);
7423 else
7424 xmlGenericError(xmlGenericErrorContext,
7425 "PP: lookup '%c%c%c' found at %d\n",
7426 first, next, third, base);
7427#endif
7428 return(base - (in->cur - in->base));
7429 }
7430 }
7431 ctxt->checkIndex = base;
7432#ifdef DEBUG_PUSH
7433 if (next == 0)
7434 xmlGenericError(xmlGenericErrorContext,
7435 "PP: lookup '%c' failed\n", first);
7436 else if (third == 0)
7437 xmlGenericError(xmlGenericErrorContext,
7438 "PP: lookup '%c%c' failed\n", first, next);
7439 else
7440 xmlGenericError(xmlGenericErrorContext,
7441 "PP: lookup '%c%c%c' failed\n", first, next, third);
7442#endif
7443 return(-1);
7444}
7445
7446/**
7447 * xmlParseTryOrFinish:
7448 * @ctxt: an XML parser context
7449 * @terminate: last chunk indicator
7450 *
7451 * Try to progress on parsing
7452 *
7453 * Returns zero if no parsing was possible
7454 */
7455int
7456xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7457 int ret = 0;
7458 int avail;
7459 xmlChar cur, next;
7460
7461#ifdef DEBUG_PUSH
7462 switch (ctxt->instate) {
7463 case XML_PARSER_EOF:
7464 xmlGenericError(xmlGenericErrorContext,
7465 "PP: try EOF\n"); break;
7466 case XML_PARSER_START:
7467 xmlGenericError(xmlGenericErrorContext,
7468 "PP: try START\n"); break;
7469 case XML_PARSER_MISC:
7470 xmlGenericError(xmlGenericErrorContext,
7471 "PP: try MISC\n");break;
7472 case XML_PARSER_COMMENT:
7473 xmlGenericError(xmlGenericErrorContext,
7474 "PP: try COMMENT\n");break;
7475 case XML_PARSER_PROLOG:
7476 xmlGenericError(xmlGenericErrorContext,
7477 "PP: try PROLOG\n");break;
7478 case XML_PARSER_START_TAG:
7479 xmlGenericError(xmlGenericErrorContext,
7480 "PP: try START_TAG\n");break;
7481 case XML_PARSER_CONTENT:
7482 xmlGenericError(xmlGenericErrorContext,
7483 "PP: try CONTENT\n");break;
7484 case XML_PARSER_CDATA_SECTION:
7485 xmlGenericError(xmlGenericErrorContext,
7486 "PP: try CDATA_SECTION\n");break;
7487 case XML_PARSER_END_TAG:
7488 xmlGenericError(xmlGenericErrorContext,
7489 "PP: try END_TAG\n");break;
7490 case XML_PARSER_ENTITY_DECL:
7491 xmlGenericError(xmlGenericErrorContext,
7492 "PP: try ENTITY_DECL\n");break;
7493 case XML_PARSER_ENTITY_VALUE:
7494 xmlGenericError(xmlGenericErrorContext,
7495 "PP: try ENTITY_VALUE\n");break;
7496 case XML_PARSER_ATTRIBUTE_VALUE:
7497 xmlGenericError(xmlGenericErrorContext,
7498 "PP: try ATTRIBUTE_VALUE\n");break;
7499 case XML_PARSER_DTD:
7500 xmlGenericError(xmlGenericErrorContext,
7501 "PP: try DTD\n");break;
7502 case XML_PARSER_EPILOG:
7503 xmlGenericError(xmlGenericErrorContext,
7504 "PP: try EPILOG\n");break;
7505 case XML_PARSER_PI:
7506 xmlGenericError(xmlGenericErrorContext,
7507 "PP: try PI\n");break;
7508 case XML_PARSER_IGNORE:
7509 xmlGenericError(xmlGenericErrorContext,
7510 "PP: try IGNORE\n");break;
7511 }
7512#endif
7513
7514 while (1) {
7515 /*
7516 * Pop-up of finished entities.
7517 */
7518 while ((RAW == 0) && (ctxt->inputNr > 1))
7519 xmlPopInput(ctxt);
7520
7521 if (ctxt->input ==NULL) break;
7522 if (ctxt->input->buf == NULL)
7523 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7524 else
7525 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7526 if (avail < 1)
7527 goto done;
7528 switch (ctxt->instate) {
7529 case XML_PARSER_EOF:
7530 /*
7531 * Document parsing is done !
7532 */
7533 goto done;
7534 case XML_PARSER_START:
7535 /*
7536 * Very first chars read from the document flow.
7537 */
Owen Taylor3473f882001-02-23 17:55:21 +00007538 if (avail < 2)
7539 goto done;
7540
7541 cur = ctxt->input->cur[0];
7542 next = ctxt->input->cur[1];
7543 if (cur == 0) {
7544 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7545 ctxt->sax->setDocumentLocator(ctxt->userData,
7546 &xmlDefaultSAXLocator);
7547 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7549 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7550 ctxt->wellFormed = 0;
7551 ctxt->disableSAX = 1;
7552 ctxt->instate = XML_PARSER_EOF;
7553#ifdef DEBUG_PUSH
7554 xmlGenericError(xmlGenericErrorContext,
7555 "PP: entering EOF\n");
7556#endif
7557 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7558 ctxt->sax->endDocument(ctxt->userData);
7559 goto done;
7560 }
7561 if ((cur == '<') && (next == '?')) {
7562 /* PI or XML decl */
7563 if (avail < 5) return(ret);
7564 if ((!terminate) &&
7565 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7566 return(ret);
7567 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7568 ctxt->sax->setDocumentLocator(ctxt->userData,
7569 &xmlDefaultSAXLocator);
7570 if ((ctxt->input->cur[2] == 'x') &&
7571 (ctxt->input->cur[3] == 'm') &&
7572 (ctxt->input->cur[4] == 'l') &&
7573 (IS_BLANK(ctxt->input->cur[5]))) {
7574 ret += 5;
7575#ifdef DEBUG_PUSH
7576 xmlGenericError(xmlGenericErrorContext,
7577 "PP: Parsing XML Decl\n");
7578#endif
7579 xmlParseXMLDecl(ctxt);
7580 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7581 /*
7582 * The XML REC instructs us to stop parsing right
7583 * here
7584 */
7585 ctxt->instate = XML_PARSER_EOF;
7586 return(0);
7587 }
7588 ctxt->standalone = ctxt->input->standalone;
7589 if ((ctxt->encoding == NULL) &&
7590 (ctxt->input->encoding != NULL))
7591 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7592 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7593 (!ctxt->disableSAX))
7594 ctxt->sax->startDocument(ctxt->userData);
7595 ctxt->instate = XML_PARSER_MISC;
7596#ifdef DEBUG_PUSH
7597 xmlGenericError(xmlGenericErrorContext,
7598 "PP: entering MISC\n");
7599#endif
7600 } else {
7601 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7602 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7603 (!ctxt->disableSAX))
7604 ctxt->sax->startDocument(ctxt->userData);
7605 ctxt->instate = XML_PARSER_MISC;
7606#ifdef DEBUG_PUSH
7607 xmlGenericError(xmlGenericErrorContext,
7608 "PP: entering MISC\n");
7609#endif
7610 }
7611 } else {
7612 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7613 ctxt->sax->setDocumentLocator(ctxt->userData,
7614 &xmlDefaultSAXLocator);
7615 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7616 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7617 (!ctxt->disableSAX))
7618 ctxt->sax->startDocument(ctxt->userData);
7619 ctxt->instate = XML_PARSER_MISC;
7620#ifdef DEBUG_PUSH
7621 xmlGenericError(xmlGenericErrorContext,
7622 "PP: entering MISC\n");
7623#endif
7624 }
7625 break;
7626 case XML_PARSER_MISC:
7627 SKIP_BLANKS;
7628 if (ctxt->input->buf == NULL)
7629 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7630 else
7631 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7632 if (avail < 2)
7633 goto done;
7634 cur = ctxt->input->cur[0];
7635 next = ctxt->input->cur[1];
7636 if ((cur == '<') && (next == '?')) {
7637 if ((!terminate) &&
7638 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7639 goto done;
7640#ifdef DEBUG_PUSH
7641 xmlGenericError(xmlGenericErrorContext,
7642 "PP: Parsing PI\n");
7643#endif
7644 xmlParsePI(ctxt);
7645 } else if ((cur == '<') && (next == '!') &&
7646 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7647 if ((!terminate) &&
7648 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7649 goto done;
7650#ifdef DEBUG_PUSH
7651 xmlGenericError(xmlGenericErrorContext,
7652 "PP: Parsing Comment\n");
7653#endif
7654 xmlParseComment(ctxt);
7655 ctxt->instate = XML_PARSER_MISC;
7656 } else if ((cur == '<') && (next == '!') &&
7657 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7658 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7659 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7660 (ctxt->input->cur[8] == 'E')) {
7661 if ((!terminate) &&
7662 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7663 goto done;
7664#ifdef DEBUG_PUSH
7665 xmlGenericError(xmlGenericErrorContext,
7666 "PP: Parsing internal subset\n");
7667#endif
7668 ctxt->inSubset = 1;
7669 xmlParseDocTypeDecl(ctxt);
7670 if (RAW == '[') {
7671 ctxt->instate = XML_PARSER_DTD;
7672#ifdef DEBUG_PUSH
7673 xmlGenericError(xmlGenericErrorContext,
7674 "PP: entering DTD\n");
7675#endif
7676 } else {
7677 /*
7678 * Create and update the external subset.
7679 */
7680 ctxt->inSubset = 2;
7681 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7682 (ctxt->sax->externalSubset != NULL))
7683 ctxt->sax->externalSubset(ctxt->userData,
7684 ctxt->intSubName, ctxt->extSubSystem,
7685 ctxt->extSubURI);
7686 ctxt->inSubset = 0;
7687 ctxt->instate = XML_PARSER_PROLOG;
7688#ifdef DEBUG_PUSH
7689 xmlGenericError(xmlGenericErrorContext,
7690 "PP: entering PROLOG\n");
7691#endif
7692 }
7693 } else if ((cur == '<') && (next == '!') &&
7694 (avail < 9)) {
7695 goto done;
7696 } else {
7697 ctxt->instate = XML_PARSER_START_TAG;
7698#ifdef DEBUG_PUSH
7699 xmlGenericError(xmlGenericErrorContext,
7700 "PP: entering START_TAG\n");
7701#endif
7702 }
7703 break;
7704 case XML_PARSER_IGNORE:
7705 xmlGenericError(xmlGenericErrorContext,
7706 "PP: internal error, state == IGNORE");
7707 ctxt->instate = XML_PARSER_DTD;
7708#ifdef DEBUG_PUSH
7709 xmlGenericError(xmlGenericErrorContext,
7710 "PP: entering DTD\n");
7711#endif
7712 break;
7713 case XML_PARSER_PROLOG:
7714 SKIP_BLANKS;
7715 if (ctxt->input->buf == NULL)
7716 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7717 else
7718 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7719 if (avail < 2)
7720 goto done;
7721 cur = ctxt->input->cur[0];
7722 next = ctxt->input->cur[1];
7723 if ((cur == '<') && (next == '?')) {
7724 if ((!terminate) &&
7725 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7726 goto done;
7727#ifdef DEBUG_PUSH
7728 xmlGenericError(xmlGenericErrorContext,
7729 "PP: Parsing PI\n");
7730#endif
7731 xmlParsePI(ctxt);
7732 } else if ((cur == '<') && (next == '!') &&
7733 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7734 if ((!terminate) &&
7735 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7736 goto done;
7737#ifdef DEBUG_PUSH
7738 xmlGenericError(xmlGenericErrorContext,
7739 "PP: Parsing Comment\n");
7740#endif
7741 xmlParseComment(ctxt);
7742 ctxt->instate = XML_PARSER_PROLOG;
7743 } else if ((cur == '<') && (next == '!') &&
7744 (avail < 4)) {
7745 goto done;
7746 } else {
7747 ctxt->instate = XML_PARSER_START_TAG;
7748#ifdef DEBUG_PUSH
7749 xmlGenericError(xmlGenericErrorContext,
7750 "PP: entering START_TAG\n");
7751#endif
7752 }
7753 break;
7754 case XML_PARSER_EPILOG:
7755 SKIP_BLANKS;
7756 if (ctxt->input->buf == NULL)
7757 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7758 else
7759 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7760 if (avail < 2)
7761 goto done;
7762 cur = ctxt->input->cur[0];
7763 next = ctxt->input->cur[1];
7764 if ((cur == '<') && (next == '?')) {
7765 if ((!terminate) &&
7766 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7767 goto done;
7768#ifdef DEBUG_PUSH
7769 xmlGenericError(xmlGenericErrorContext,
7770 "PP: Parsing PI\n");
7771#endif
7772 xmlParsePI(ctxt);
7773 ctxt->instate = XML_PARSER_EPILOG;
7774 } else if ((cur == '<') && (next == '!') &&
7775 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7776 if ((!terminate) &&
7777 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7778 goto done;
7779#ifdef DEBUG_PUSH
7780 xmlGenericError(xmlGenericErrorContext,
7781 "PP: Parsing Comment\n");
7782#endif
7783 xmlParseComment(ctxt);
7784 ctxt->instate = XML_PARSER_EPILOG;
7785 } else if ((cur == '<') && (next == '!') &&
7786 (avail < 4)) {
7787 goto done;
7788 } else {
7789 ctxt->errNo = XML_ERR_DOCUMENT_END;
7790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7791 ctxt->sax->error(ctxt->userData,
7792 "Extra content at the end of the document\n");
7793 ctxt->wellFormed = 0;
7794 ctxt->disableSAX = 1;
7795 ctxt->instate = XML_PARSER_EOF;
7796#ifdef DEBUG_PUSH
7797 xmlGenericError(xmlGenericErrorContext,
7798 "PP: entering EOF\n");
7799#endif
7800 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7801 (!ctxt->disableSAX))
7802 ctxt->sax->endDocument(ctxt->userData);
7803 goto done;
7804 }
7805 break;
7806 case XML_PARSER_START_TAG: {
7807 xmlChar *name, *oldname;
7808
7809 if ((avail < 2) && (ctxt->inputNr == 1))
7810 goto done;
7811 cur = ctxt->input->cur[0];
7812 if (cur != '<') {
7813 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7815 ctxt->sax->error(ctxt->userData,
7816 "Start tag expect, '<' not found\n");
7817 ctxt->wellFormed = 0;
7818 ctxt->disableSAX = 1;
7819 ctxt->instate = XML_PARSER_EOF;
7820#ifdef DEBUG_PUSH
7821 xmlGenericError(xmlGenericErrorContext,
7822 "PP: entering EOF\n");
7823#endif
7824 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7825 (!ctxt->disableSAX))
7826 ctxt->sax->endDocument(ctxt->userData);
7827 goto done;
7828 }
7829 if ((!terminate) &&
7830 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7831 goto done;
7832 if (ctxt->spaceNr == 0)
7833 spacePush(ctxt, -1);
7834 else
7835 spacePush(ctxt, *ctxt->space);
7836 name = xmlParseStartTag(ctxt);
7837 if (name == NULL) {
7838 spacePop(ctxt);
7839 ctxt->instate = XML_PARSER_EOF;
7840#ifdef DEBUG_PUSH
7841 xmlGenericError(xmlGenericErrorContext,
7842 "PP: entering EOF\n");
7843#endif
7844 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7845 (!ctxt->disableSAX))
7846 ctxt->sax->endDocument(ctxt->userData);
7847 goto done;
7848 }
7849 namePush(ctxt, xmlStrdup(name));
7850
7851 /*
7852 * [ VC: Root Element Type ]
7853 * The Name in the document type declaration must match
7854 * the element type of the root element.
7855 */
7856 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7857 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7858 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7859
7860 /*
7861 * Check for an Empty Element.
7862 */
7863 if ((RAW == '/') && (NXT(1) == '>')) {
7864 SKIP(2);
7865 if ((ctxt->sax != NULL) &&
7866 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7867 ctxt->sax->endElement(ctxt->userData, name);
7868 xmlFree(name);
7869 oldname = namePop(ctxt);
7870 spacePop(ctxt);
7871 if (oldname != NULL) {
7872#ifdef DEBUG_STACK
7873 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7874#endif
7875 xmlFree(oldname);
7876 }
7877 if (ctxt->name == NULL) {
7878 ctxt->instate = XML_PARSER_EPILOG;
7879#ifdef DEBUG_PUSH
7880 xmlGenericError(xmlGenericErrorContext,
7881 "PP: entering EPILOG\n");
7882#endif
7883 } else {
7884 ctxt->instate = XML_PARSER_CONTENT;
7885#ifdef DEBUG_PUSH
7886 xmlGenericError(xmlGenericErrorContext,
7887 "PP: entering CONTENT\n");
7888#endif
7889 }
7890 break;
7891 }
7892 if (RAW == '>') {
7893 NEXT;
7894 } else {
7895 ctxt->errNo = XML_ERR_GT_REQUIRED;
7896 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7897 ctxt->sax->error(ctxt->userData,
7898 "Couldn't find end of Start Tag %s\n",
7899 name);
7900 ctxt->wellFormed = 0;
7901 ctxt->disableSAX = 1;
7902
7903 /*
7904 * end of parsing of this node.
7905 */
7906 nodePop(ctxt);
7907 oldname = namePop(ctxt);
7908 spacePop(ctxt);
7909 if (oldname != NULL) {
7910#ifdef DEBUG_STACK
7911 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7912#endif
7913 xmlFree(oldname);
7914 }
7915 }
7916 xmlFree(name);
7917 ctxt->instate = XML_PARSER_CONTENT;
7918#ifdef DEBUG_PUSH
7919 xmlGenericError(xmlGenericErrorContext,
7920 "PP: entering CONTENT\n");
7921#endif
7922 break;
7923 }
7924 case XML_PARSER_CONTENT: {
7925 const xmlChar *test;
7926 int cons;
7927 xmlChar tok;
7928
7929 /*
7930 * Handle preparsed entities and charRef
7931 */
7932 if (ctxt->token != 0) {
7933 xmlChar cur[2] = { 0 , 0 } ;
7934
7935 cur[0] = (xmlChar) ctxt->token;
7936 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7937 (ctxt->sax->characters != NULL))
7938 ctxt->sax->characters(ctxt->userData, cur, 1);
7939 ctxt->token = 0;
7940 }
7941 if ((avail < 2) && (ctxt->inputNr == 1))
7942 goto done;
7943 cur = ctxt->input->cur[0];
7944 next = ctxt->input->cur[1];
7945
7946 test = CUR_PTR;
7947 cons = ctxt->input->consumed;
7948 tok = ctxt->token;
7949 if ((cur == '<') && (next == '?')) {
7950 if ((!terminate) &&
7951 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7952 goto done;
7953#ifdef DEBUG_PUSH
7954 xmlGenericError(xmlGenericErrorContext,
7955 "PP: Parsing PI\n");
7956#endif
7957 xmlParsePI(ctxt);
7958 } else if ((cur == '<') && (next == '!') &&
7959 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7960 if ((!terminate) &&
7961 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7962 goto done;
7963#ifdef DEBUG_PUSH
7964 xmlGenericError(xmlGenericErrorContext,
7965 "PP: Parsing Comment\n");
7966#endif
7967 xmlParseComment(ctxt);
7968 ctxt->instate = XML_PARSER_CONTENT;
7969 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7970 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7971 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7972 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7973 (ctxt->input->cur[8] == '[')) {
7974 SKIP(9);
7975 ctxt->instate = XML_PARSER_CDATA_SECTION;
7976#ifdef DEBUG_PUSH
7977 xmlGenericError(xmlGenericErrorContext,
7978 "PP: entering CDATA_SECTION\n");
7979#endif
7980 break;
7981 } else if ((cur == '<') && (next == '!') &&
7982 (avail < 9)) {
7983 goto done;
7984 } else if ((cur == '<') && (next == '/')) {
7985 ctxt->instate = XML_PARSER_END_TAG;
7986#ifdef DEBUG_PUSH
7987 xmlGenericError(xmlGenericErrorContext,
7988 "PP: entering END_TAG\n");
7989#endif
7990 break;
7991 } else if (cur == '<') {
7992 ctxt->instate = XML_PARSER_START_TAG;
7993#ifdef DEBUG_PUSH
7994 xmlGenericError(xmlGenericErrorContext,
7995 "PP: entering START_TAG\n");
7996#endif
7997 break;
7998 } else if (cur == '&') {
7999 if ((!terminate) &&
8000 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8001 goto done;
8002#ifdef DEBUG_PUSH
8003 xmlGenericError(xmlGenericErrorContext,
8004 "PP: Parsing Reference\n");
8005#endif
8006 xmlParseReference(ctxt);
8007 } else {
8008 /* TODO Avoid the extra copy, handle directly !!! */
8009 /*
8010 * Goal of the following test is:
8011 * - minimize calls to the SAX 'character' callback
8012 * when they are mergeable
8013 * - handle an problem for isBlank when we only parse
8014 * a sequence of blank chars and the next one is
8015 * not available to check against '<' presence.
8016 * - tries to homogenize the differences in SAX
8017 * callbacks beween the push and pull versions
8018 * of the parser.
8019 */
8020 if ((ctxt->inputNr == 1) &&
8021 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8022 if ((!terminate) &&
8023 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8024 goto done;
8025 }
8026 ctxt->checkIndex = 0;
8027#ifdef DEBUG_PUSH
8028 xmlGenericError(xmlGenericErrorContext,
8029 "PP: Parsing char data\n");
8030#endif
8031 xmlParseCharData(ctxt, 0);
8032 }
8033 /*
8034 * Pop-up of finished entities.
8035 */
8036 while ((RAW == 0) && (ctxt->inputNr > 1))
8037 xmlPopInput(ctxt);
8038 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8039 (tok == ctxt->token)) {
8040 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8042 ctxt->sax->error(ctxt->userData,
8043 "detected an error in element content\n");
8044 ctxt->wellFormed = 0;
8045 ctxt->disableSAX = 1;
8046 ctxt->instate = XML_PARSER_EOF;
8047 break;
8048 }
8049 break;
8050 }
8051 case XML_PARSER_CDATA_SECTION: {
8052 /*
8053 * The Push mode need to have the SAX callback for
8054 * cdataBlock merge back contiguous callbacks.
8055 */
8056 int base;
8057
8058 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8059 if (base < 0) {
8060 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8061 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8062 if (ctxt->sax->cdataBlock != NULL)
8063 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8064 XML_PARSER_BIG_BUFFER_SIZE);
8065 }
8066 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8067 ctxt->checkIndex = 0;
8068 }
8069 goto done;
8070 } else {
8071 if ((ctxt->sax != NULL) && (base > 0) &&
8072 (!ctxt->disableSAX)) {
8073 if (ctxt->sax->cdataBlock != NULL)
8074 ctxt->sax->cdataBlock(ctxt->userData,
8075 ctxt->input->cur, base);
8076 }
8077 SKIP(base + 3);
8078 ctxt->checkIndex = 0;
8079 ctxt->instate = XML_PARSER_CONTENT;
8080#ifdef DEBUG_PUSH
8081 xmlGenericError(xmlGenericErrorContext,
8082 "PP: entering CONTENT\n");
8083#endif
8084 }
8085 break;
8086 }
8087 case XML_PARSER_END_TAG:
8088 if (avail < 2)
8089 goto done;
8090 if ((!terminate) &&
8091 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8092 goto done;
8093 xmlParseEndTag(ctxt);
8094 if (ctxt->name == NULL) {
8095 ctxt->instate = XML_PARSER_EPILOG;
8096#ifdef DEBUG_PUSH
8097 xmlGenericError(xmlGenericErrorContext,
8098 "PP: entering EPILOG\n");
8099#endif
8100 } else {
8101 ctxt->instate = XML_PARSER_CONTENT;
8102#ifdef DEBUG_PUSH
8103 xmlGenericError(xmlGenericErrorContext,
8104 "PP: entering CONTENT\n");
8105#endif
8106 }
8107 break;
8108 case XML_PARSER_DTD: {
8109 /*
8110 * Sorry but progressive parsing of the internal subset
8111 * is not expected to be supported. We first check that
8112 * the full content of the internal subset is available and
8113 * the parsing is launched only at that point.
8114 * Internal subset ends up with "']' S? '>'" in an unescaped
8115 * section and not in a ']]>' sequence which are conditional
8116 * sections (whoever argued to keep that crap in XML deserve
8117 * a place in hell !).
8118 */
8119 int base, i;
8120 xmlChar *buf;
8121 xmlChar quote = 0;
8122
8123 base = ctxt->input->cur - ctxt->input->base;
8124 if (base < 0) return(0);
8125 if (ctxt->checkIndex > base)
8126 base = ctxt->checkIndex;
8127 buf = ctxt->input->buf->buffer->content;
8128 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8129 base++) {
8130 if (quote != 0) {
8131 if (buf[base] == quote)
8132 quote = 0;
8133 continue;
8134 }
8135 if (buf[base] == '"') {
8136 quote = '"';
8137 continue;
8138 }
8139 if (buf[base] == '\'') {
8140 quote = '\'';
8141 continue;
8142 }
8143 if (buf[base] == ']') {
8144 if ((unsigned int) base +1 >=
8145 ctxt->input->buf->buffer->use)
8146 break;
8147 if (buf[base + 1] == ']') {
8148 /* conditional crap, skip both ']' ! */
8149 base++;
8150 continue;
8151 }
8152 for (i = 0;
8153 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8154 i++) {
8155 if (buf[base + i] == '>')
8156 goto found_end_int_subset;
8157 }
8158 break;
8159 }
8160 }
8161 /*
8162 * We didn't found the end of the Internal subset
8163 */
8164 if (quote == 0)
8165 ctxt->checkIndex = base;
8166#ifdef DEBUG_PUSH
8167 if (next == 0)
8168 xmlGenericError(xmlGenericErrorContext,
8169 "PP: lookup of int subset end filed\n");
8170#endif
8171 goto done;
8172
8173found_end_int_subset:
8174 xmlParseInternalSubset(ctxt);
8175 ctxt->inSubset = 2;
8176 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8177 (ctxt->sax->externalSubset != NULL))
8178 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8179 ctxt->extSubSystem, ctxt->extSubURI);
8180 ctxt->inSubset = 0;
8181 ctxt->instate = XML_PARSER_PROLOG;
8182 ctxt->checkIndex = 0;
8183#ifdef DEBUG_PUSH
8184 xmlGenericError(xmlGenericErrorContext,
8185 "PP: entering PROLOG\n");
8186#endif
8187 break;
8188 }
8189 case XML_PARSER_COMMENT:
8190 xmlGenericError(xmlGenericErrorContext,
8191 "PP: internal error, state == COMMENT\n");
8192 ctxt->instate = XML_PARSER_CONTENT;
8193#ifdef DEBUG_PUSH
8194 xmlGenericError(xmlGenericErrorContext,
8195 "PP: entering CONTENT\n");
8196#endif
8197 break;
8198 case XML_PARSER_PI:
8199 xmlGenericError(xmlGenericErrorContext,
8200 "PP: internal error, state == PI\n");
8201 ctxt->instate = XML_PARSER_CONTENT;
8202#ifdef DEBUG_PUSH
8203 xmlGenericError(xmlGenericErrorContext,
8204 "PP: entering CONTENT\n");
8205#endif
8206 break;
8207 case XML_PARSER_ENTITY_DECL:
8208 xmlGenericError(xmlGenericErrorContext,
8209 "PP: internal error, state == ENTITY_DECL\n");
8210 ctxt->instate = XML_PARSER_DTD;
8211#ifdef DEBUG_PUSH
8212 xmlGenericError(xmlGenericErrorContext,
8213 "PP: entering DTD\n");
8214#endif
8215 break;
8216 case XML_PARSER_ENTITY_VALUE:
8217 xmlGenericError(xmlGenericErrorContext,
8218 "PP: internal error, state == ENTITY_VALUE\n");
8219 ctxt->instate = XML_PARSER_CONTENT;
8220#ifdef DEBUG_PUSH
8221 xmlGenericError(xmlGenericErrorContext,
8222 "PP: entering DTD\n");
8223#endif
8224 break;
8225 case XML_PARSER_ATTRIBUTE_VALUE:
8226 xmlGenericError(xmlGenericErrorContext,
8227 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8228 ctxt->instate = XML_PARSER_START_TAG;
8229#ifdef DEBUG_PUSH
8230 xmlGenericError(xmlGenericErrorContext,
8231 "PP: entering START_TAG\n");
8232#endif
8233 break;
8234 case XML_PARSER_SYSTEM_LITERAL:
8235 xmlGenericError(xmlGenericErrorContext,
8236 "PP: internal error, state == SYSTEM_LITERAL\n");
8237 ctxt->instate = XML_PARSER_START_TAG;
8238#ifdef DEBUG_PUSH
8239 xmlGenericError(xmlGenericErrorContext,
8240 "PP: entering START_TAG\n");
8241#endif
8242 break;
8243 }
8244 }
8245done:
8246#ifdef DEBUG_PUSH
8247 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8248#endif
8249 return(ret);
8250}
8251
8252/**
8253 * xmlParseTry:
8254 * @ctxt: an XML parser context
8255 *
8256 * Try to progress on parsing
8257 *
8258 * Returns zero if no parsing was possible
8259 */
8260int
8261xmlParseTry(xmlParserCtxtPtr ctxt) {
8262 return(xmlParseTryOrFinish(ctxt, 0));
8263}
8264
8265/**
8266 * xmlParseChunk:
8267 * @ctxt: an XML parser context
8268 * @chunk: an char array
8269 * @size: the size in byte of the chunk
8270 * @terminate: last chunk indicator
8271 *
8272 * Parse a Chunk of memory
8273 *
8274 * Returns zero if no error, the xmlParserErrors otherwise.
8275 */
8276int
8277xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8278 int terminate) {
8279 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8280 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8281 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8282 int cur = ctxt->input->cur - ctxt->input->base;
8283
8284 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8285 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8286 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008287 ctxt->input->end =
8288 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008289#ifdef DEBUG_PUSH
8290 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8291#endif
8292
8293 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8294 xmlParseTryOrFinish(ctxt, terminate);
8295 } else if (ctxt->instate != XML_PARSER_EOF) {
8296 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8297 xmlParserInputBufferPtr in = ctxt->input->buf;
8298 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8299 (in->raw != NULL)) {
8300 int nbchars;
8301
8302 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8303 if (nbchars < 0) {
8304 xmlGenericError(xmlGenericErrorContext,
8305 "xmlParseChunk: encoder error\n");
8306 return(XML_ERR_INVALID_ENCODING);
8307 }
8308 }
8309 }
8310 }
8311 xmlParseTryOrFinish(ctxt, terminate);
8312 if (terminate) {
8313 /*
8314 * Check for termination
8315 */
8316 if ((ctxt->instate != XML_PARSER_EOF) &&
8317 (ctxt->instate != XML_PARSER_EPILOG)) {
8318 ctxt->errNo = XML_ERR_DOCUMENT_END;
8319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8320 ctxt->sax->error(ctxt->userData,
8321 "Extra content at the end of the document\n");
8322 ctxt->wellFormed = 0;
8323 ctxt->disableSAX = 1;
8324 }
8325 if (ctxt->instate != XML_PARSER_EOF) {
8326 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8327 (!ctxt->disableSAX))
8328 ctxt->sax->endDocument(ctxt->userData);
8329 }
8330 ctxt->instate = XML_PARSER_EOF;
8331 }
8332 return((xmlParserErrors) ctxt->errNo);
8333}
8334
8335/************************************************************************
8336 * *
8337 * I/O front end functions to the parser *
8338 * *
8339 ************************************************************************/
8340
8341/**
8342 * xmlStopParser:
8343 * @ctxt: an XML parser context
8344 *
8345 * Blocks further parser processing
8346 */
8347void
8348xmlStopParser(xmlParserCtxtPtr ctxt) {
8349 ctxt->instate = XML_PARSER_EOF;
8350 if (ctxt->input != NULL)
8351 ctxt->input->cur = BAD_CAST"";
8352}
8353
8354/**
8355 * xmlCreatePushParserCtxt:
8356 * @sax: a SAX handler
8357 * @user_data: The user data returned on SAX callbacks
8358 * @chunk: a pointer to an array of chars
8359 * @size: number of chars in the array
8360 * @filename: an optional file name or URI
8361 *
8362 * Create a parser context for using the XML parser in push mode
8363 * To allow content encoding detection, @size should be >= 4
8364 * The value of @filename is used for fetching external entities
8365 * and error/warning reports.
8366 *
8367 * Returns the new parser context or NULL
8368 */
8369xmlParserCtxtPtr
8370xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8371 const char *chunk, int size, const char *filename) {
8372 xmlParserCtxtPtr ctxt;
8373 xmlParserInputPtr inputStream;
8374 xmlParserInputBufferPtr buf;
8375 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8376
8377 /*
8378 * plug some encoding conversion routines
8379 */
8380 if ((chunk != NULL) && (size >= 4))
8381 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8382
8383 buf = xmlAllocParserInputBuffer(enc);
8384 if (buf == NULL) return(NULL);
8385
8386 ctxt = xmlNewParserCtxt();
8387 if (ctxt == NULL) {
8388 xmlFree(buf);
8389 return(NULL);
8390 }
8391 if (sax != NULL) {
8392 if (ctxt->sax != &xmlDefaultSAXHandler)
8393 xmlFree(ctxt->sax);
8394 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8395 if (ctxt->sax == NULL) {
8396 xmlFree(buf);
8397 xmlFree(ctxt);
8398 return(NULL);
8399 }
8400 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8401 if (user_data != NULL)
8402 ctxt->userData = user_data;
8403 }
8404 if (filename == NULL) {
8405 ctxt->directory = NULL;
8406 } else {
8407 ctxt->directory = xmlParserGetDirectory(filename);
8408 }
8409
8410 inputStream = xmlNewInputStream(ctxt);
8411 if (inputStream == NULL) {
8412 xmlFreeParserCtxt(ctxt);
8413 return(NULL);
8414 }
8415
8416 if (filename == NULL)
8417 inputStream->filename = NULL;
8418 else
8419 inputStream->filename = xmlMemStrdup(filename);
8420 inputStream->buf = buf;
8421 inputStream->base = inputStream->buf->buffer->content;
8422 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008423 inputStream->end =
8424 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008425 if (enc != XML_CHAR_ENCODING_NONE) {
8426 xmlSwitchEncoding(ctxt, enc);
8427 }
8428
8429 inputPush(ctxt, inputStream);
8430
8431 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8432 (ctxt->input->buf != NULL)) {
8433 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8434#ifdef DEBUG_PUSH
8435 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8436#endif
8437 }
8438
8439 return(ctxt);
8440}
8441
8442/**
8443 * xmlCreateIOParserCtxt:
8444 * @sax: a SAX handler
8445 * @user_data: The user data returned on SAX callbacks
8446 * @ioread: an I/O read function
8447 * @ioclose: an I/O close function
8448 * @ioctx: an I/O handler
8449 * @enc: the charset encoding if known
8450 *
8451 * Create a parser context for using the XML parser with an existing
8452 * I/O stream
8453 *
8454 * Returns the new parser context or NULL
8455 */
8456xmlParserCtxtPtr
8457xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8458 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8459 void *ioctx, xmlCharEncoding enc) {
8460 xmlParserCtxtPtr ctxt;
8461 xmlParserInputPtr inputStream;
8462 xmlParserInputBufferPtr buf;
8463
8464 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8465 if (buf == NULL) return(NULL);
8466
8467 ctxt = xmlNewParserCtxt();
8468 if (ctxt == NULL) {
8469 xmlFree(buf);
8470 return(NULL);
8471 }
8472 if (sax != NULL) {
8473 if (ctxt->sax != &xmlDefaultSAXHandler)
8474 xmlFree(ctxt->sax);
8475 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8476 if (ctxt->sax == NULL) {
8477 xmlFree(buf);
8478 xmlFree(ctxt);
8479 return(NULL);
8480 }
8481 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8482 if (user_data != NULL)
8483 ctxt->userData = user_data;
8484 }
8485
8486 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8487 if (inputStream == NULL) {
8488 xmlFreeParserCtxt(ctxt);
8489 return(NULL);
8490 }
8491 inputPush(ctxt, inputStream);
8492
8493 return(ctxt);
8494}
8495
8496/************************************************************************
8497 * *
8498 * Front ends when parsing a Dtd *
8499 * *
8500 ************************************************************************/
8501
8502/**
8503 * xmlIOParseDTD:
8504 * @sax: the SAX handler block or NULL
8505 * @input: an Input Buffer
8506 * @enc: the charset encoding if known
8507 *
8508 * Load and parse a DTD
8509 *
8510 * Returns the resulting xmlDtdPtr or NULL in case of error.
8511 * @input will be freed at parsing end.
8512 */
8513
8514xmlDtdPtr
8515xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8516 xmlCharEncoding enc) {
8517 xmlDtdPtr ret = NULL;
8518 xmlParserCtxtPtr ctxt;
8519 xmlParserInputPtr pinput = NULL;
8520
8521 if (input == NULL)
8522 return(NULL);
8523
8524 ctxt = xmlNewParserCtxt();
8525 if (ctxt == NULL) {
8526 return(NULL);
8527 }
8528
8529 /*
8530 * Set-up the SAX context
8531 */
8532 if (sax != NULL) {
8533 if (ctxt->sax != NULL)
8534 xmlFree(ctxt->sax);
8535 ctxt->sax = sax;
8536 ctxt->userData = NULL;
8537 }
8538
8539 /*
8540 * generate a parser input from the I/O handler
8541 */
8542
8543 pinput = xmlNewIOInputStream(ctxt, input, enc);
8544 if (pinput == NULL) {
8545 if (sax != NULL) ctxt->sax = NULL;
8546 xmlFreeParserCtxt(ctxt);
8547 return(NULL);
8548 }
8549
8550 /*
8551 * plug some encoding conversion routines here.
8552 */
8553 xmlPushInput(ctxt, pinput);
8554
8555 pinput->filename = NULL;
8556 pinput->line = 1;
8557 pinput->col = 1;
8558 pinput->base = ctxt->input->cur;
8559 pinput->cur = ctxt->input->cur;
8560 pinput->free = NULL;
8561
8562 /*
8563 * let's parse that entity knowing it's an external subset.
8564 */
8565 ctxt->inSubset = 2;
8566 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8567 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8568 BAD_CAST "none", BAD_CAST "none");
8569 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8570
8571 if (ctxt->myDoc != NULL) {
8572 if (ctxt->wellFormed) {
8573 ret = ctxt->myDoc->extSubset;
8574 ctxt->myDoc->extSubset = NULL;
8575 } else {
8576 ret = NULL;
8577 }
8578 xmlFreeDoc(ctxt->myDoc);
8579 ctxt->myDoc = NULL;
8580 }
8581 if (sax != NULL) ctxt->sax = NULL;
8582 xmlFreeParserCtxt(ctxt);
8583
8584 return(ret);
8585}
8586
8587/**
8588 * xmlSAXParseDTD:
8589 * @sax: the SAX handler block
8590 * @ExternalID: a NAME* containing the External ID of the DTD
8591 * @SystemID: a NAME* containing the URL to the DTD
8592 *
8593 * Load and parse an external subset.
8594 *
8595 * Returns the resulting xmlDtdPtr or NULL in case of error.
8596 */
8597
8598xmlDtdPtr
8599xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8600 const xmlChar *SystemID) {
8601 xmlDtdPtr ret = NULL;
8602 xmlParserCtxtPtr ctxt;
8603 xmlParserInputPtr input = NULL;
8604 xmlCharEncoding enc;
8605
8606 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8607
8608 ctxt = xmlNewParserCtxt();
8609 if (ctxt == NULL) {
8610 return(NULL);
8611 }
8612
8613 /*
8614 * Set-up the SAX context
8615 */
8616 if (sax != NULL) {
8617 if (ctxt->sax != NULL)
8618 xmlFree(ctxt->sax);
8619 ctxt->sax = sax;
8620 ctxt->userData = NULL;
8621 }
8622
8623 /*
8624 * Ask the Entity resolver to load the damn thing
8625 */
8626
8627 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8628 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8629 if (input == NULL) {
8630 if (sax != NULL) ctxt->sax = NULL;
8631 xmlFreeParserCtxt(ctxt);
8632 return(NULL);
8633 }
8634
8635 /*
8636 * plug some encoding conversion routines here.
8637 */
8638 xmlPushInput(ctxt, input);
8639 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8640 xmlSwitchEncoding(ctxt, enc);
8641
8642 if (input->filename == NULL)
8643 input->filename = (char *) xmlStrdup(SystemID);
8644 input->line = 1;
8645 input->col = 1;
8646 input->base = ctxt->input->cur;
8647 input->cur = ctxt->input->cur;
8648 input->free = NULL;
8649
8650 /*
8651 * let's parse that entity knowing it's an external subset.
8652 */
8653 ctxt->inSubset = 2;
8654 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8655 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8656 ExternalID, SystemID);
8657 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8658
8659 if (ctxt->myDoc != NULL) {
8660 if (ctxt->wellFormed) {
8661 ret = ctxt->myDoc->extSubset;
8662 ctxt->myDoc->extSubset = NULL;
8663 } else {
8664 ret = NULL;
8665 }
8666 xmlFreeDoc(ctxt->myDoc);
8667 ctxt->myDoc = NULL;
8668 }
8669 if (sax != NULL) ctxt->sax = NULL;
8670 xmlFreeParserCtxt(ctxt);
8671
8672 return(ret);
8673}
8674
8675/**
8676 * xmlParseDTD:
8677 * @ExternalID: a NAME* containing the External ID of the DTD
8678 * @SystemID: a NAME* containing the URL to the DTD
8679 *
8680 * Load and parse an external subset.
8681 *
8682 * Returns the resulting xmlDtdPtr or NULL in case of error.
8683 */
8684
8685xmlDtdPtr
8686xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8687 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8688}
8689
8690/************************************************************************
8691 * *
8692 * Front ends when parsing an Entity *
8693 * *
8694 ************************************************************************/
8695
8696/**
8697 * xmlSAXParseBalancedChunk:
8698 * @ctx: an XML parser context (possibly NULL)
8699 * @sax: the SAX handler bloc (possibly NULL)
8700 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8701 * @input: a parser input stream
8702 * @enc: the encoding
8703 *
8704 * Parse a well-balanced chunk of an XML document
8705 * The user has to provide SAX callback block whose routines will be
8706 * called by the parser
8707 * The allowed sequence for the Well Balanced Chunk is the one defined by
8708 * the content production in the XML grammar:
8709 *
8710 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8711 *
8712 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
8713 * the error code otherwise
8714 */
8715
8716int
8717xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8718 void *user_data, xmlParserInputPtr input,
8719 xmlCharEncoding enc) {
8720 xmlParserCtxtPtr ctxt;
8721 int ret;
8722
8723 if (input == NULL) return(-1);
8724
8725 if (ctx != NULL)
8726 ctxt = ctx;
8727 else {
8728 ctxt = xmlNewParserCtxt();
8729 if (ctxt == NULL)
8730 return(-1);
8731 if (sax == NULL)
8732 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8733 }
8734
8735 /*
8736 * Set-up the SAX context
8737 */
8738 if (sax != NULL) {
8739 if (ctxt->sax != NULL)
8740 xmlFree(ctxt->sax);
8741 ctxt->sax = sax;
8742 ctxt->userData = user_data;
8743 }
8744
8745 /*
8746 * plug some encoding conversion routines here.
8747 */
8748 xmlPushInput(ctxt, input);
8749 if (enc != XML_CHAR_ENCODING_NONE)
8750 xmlSwitchEncoding(ctxt, enc);
8751
8752 /*
8753 * let's parse that entity knowing it's an external subset.
8754 */
8755 xmlParseContent(ctxt);
8756 ret = ctxt->errNo;
8757
8758 if (ctx == NULL) {
8759 if (sax != NULL)
8760 ctxt->sax = NULL;
8761 else
8762 xmlFreeDoc(ctxt->myDoc);
8763 xmlFreeParserCtxt(ctxt);
8764 }
8765 return(ret);
8766}
8767
8768/**
8769 * xmlParseCtxtExternalEntity:
8770 * @ctx: the existing parsing context
8771 * @URL: the URL for the entity to load
8772 * @ID: the System ID for the entity to load
8773 * @list: the return value for the set of parsed nodes
8774 *
8775 * Parse an external general entity within an existing parsing context
8776 * An external general parsed entity is well-formed if it matches the
8777 * production labeled extParsedEnt.
8778 *
8779 * [78] extParsedEnt ::= TextDecl? content
8780 *
8781 * Returns 0 if the entity is well formed, -1 in case of args problem and
8782 * the parser error code otherwise
8783 */
8784
8785int
8786xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8787 const xmlChar *ID, xmlNodePtr *list) {
8788 xmlParserCtxtPtr ctxt;
8789 xmlDocPtr newDoc;
8790 xmlSAXHandlerPtr oldsax = NULL;
8791 int ret = 0;
8792
8793 if (ctx->depth > 40) {
8794 return(XML_ERR_ENTITY_LOOP);
8795 }
8796
8797 if (list != NULL)
8798 *list = NULL;
8799 if ((URL == NULL) && (ID == NULL))
8800 return(-1);
8801 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8802 return(-1);
8803
8804
8805 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8806 if (ctxt == NULL) return(-1);
8807 ctxt->userData = ctxt;
8808 oldsax = ctxt->sax;
8809 ctxt->sax = ctx->sax;
8810 newDoc = xmlNewDoc(BAD_CAST "1.0");
8811 if (newDoc == NULL) {
8812 xmlFreeParserCtxt(ctxt);
8813 return(-1);
8814 }
8815 if (ctx->myDoc != NULL) {
8816 newDoc->intSubset = ctx->myDoc->intSubset;
8817 newDoc->extSubset = ctx->myDoc->extSubset;
8818 }
8819 if (ctx->myDoc->URL != NULL) {
8820 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8821 }
8822 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8823 if (newDoc->children == NULL) {
8824 ctxt->sax = oldsax;
8825 xmlFreeParserCtxt(ctxt);
8826 newDoc->intSubset = NULL;
8827 newDoc->extSubset = NULL;
8828 xmlFreeDoc(newDoc);
8829 return(-1);
8830 }
8831 nodePush(ctxt, newDoc->children);
8832 if (ctx->myDoc == NULL) {
8833 ctxt->myDoc = newDoc;
8834 } else {
8835 ctxt->myDoc = ctx->myDoc;
8836 newDoc->children->doc = ctx->myDoc;
8837 }
8838
8839 /*
8840 * Parse a possible text declaration first
8841 */
8842 GROW;
8843 if ((RAW == '<') && (NXT(1) == '?') &&
8844 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8845 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8846 xmlParseTextDecl(ctxt);
8847 }
8848
8849 /*
8850 * Doing validity checking on chunk doesn't make sense
8851 */
8852 ctxt->instate = XML_PARSER_CONTENT;
8853 ctxt->validate = ctx->validate;
8854 ctxt->loadsubset = ctx->loadsubset;
8855 ctxt->depth = ctx->depth + 1;
8856 ctxt->replaceEntities = ctx->replaceEntities;
8857 if (ctxt->validate) {
8858 ctxt->vctxt.error = ctx->vctxt.error;
8859 ctxt->vctxt.warning = ctx->vctxt.warning;
8860 /* Allocate the Node stack */
8861 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8862 if (ctxt->vctxt.nodeTab == NULL) {
8863 xmlGenericError(xmlGenericErrorContext,
8864 "xmlParseCtxtExternalEntity: out of memory\n");
8865 ctxt->validate = 0;
8866 ctxt->vctxt.error = NULL;
8867 ctxt->vctxt.warning = NULL;
8868 } else {
8869 ctxt->vctxt.nodeNr = 0;
8870 ctxt->vctxt.nodeMax = 4;
8871 ctxt->vctxt.node = NULL;
8872 }
8873 } else {
8874 ctxt->vctxt.error = NULL;
8875 ctxt->vctxt.warning = NULL;
8876 }
8877
8878 xmlParseContent(ctxt);
8879
8880 if ((RAW == '<') && (NXT(1) == '/')) {
8881 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8883 ctxt->sax->error(ctxt->userData,
8884 "chunk is not well balanced\n");
8885 ctxt->wellFormed = 0;
8886 ctxt->disableSAX = 1;
8887 } else if (RAW != 0) {
8888 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8890 ctxt->sax->error(ctxt->userData,
8891 "extra content at the end of well balanced chunk\n");
8892 ctxt->wellFormed = 0;
8893 ctxt->disableSAX = 1;
8894 }
8895 if (ctxt->node != newDoc->children) {
8896 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8898 ctxt->sax->error(ctxt->userData,
8899 "chunk is not well balanced\n");
8900 ctxt->wellFormed = 0;
8901 ctxt->disableSAX = 1;
8902 }
8903
8904 if (!ctxt->wellFormed) {
8905 if (ctxt->errNo == 0)
8906 ret = 1;
8907 else
8908 ret = ctxt->errNo;
8909 } else {
8910 if (list != NULL) {
8911 xmlNodePtr cur;
8912
8913 /*
8914 * Return the newly created nodeset after unlinking it from
8915 * they pseudo parent.
8916 */
8917 cur = newDoc->children->children;
8918 *list = cur;
8919 while (cur != NULL) {
8920 cur->parent = NULL;
8921 cur = cur->next;
8922 }
8923 newDoc->children->children = NULL;
8924 }
8925 ret = 0;
8926 }
8927 ctxt->sax = oldsax;
8928 xmlFreeParserCtxt(ctxt);
8929 newDoc->intSubset = NULL;
8930 newDoc->extSubset = NULL;
8931 xmlFreeDoc(newDoc);
8932
8933 return(ret);
8934}
8935
8936/**
8937 * xmlParseExternalEntity:
8938 * @doc: the document the chunk pertains to
8939 * @sax: the SAX handler bloc (possibly NULL)
8940 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8941 * @depth: Used for loop detection, use 0
8942 * @URL: the URL for the entity to load
8943 * @ID: the System ID for the entity to load
8944 * @list: the return value for the set of parsed nodes
8945 *
8946 * Parse an external general entity
8947 * An external general parsed entity is well-formed if it matches the
8948 * production labeled extParsedEnt.
8949 *
8950 * [78] extParsedEnt ::= TextDecl? content
8951 *
8952 * Returns 0 if the entity is well formed, -1 in case of args problem and
8953 * the parser error code otherwise
8954 */
8955
8956int
8957xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
8958 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
8959 xmlParserCtxtPtr ctxt;
8960 xmlDocPtr newDoc;
8961 xmlSAXHandlerPtr oldsax = NULL;
8962 int ret = 0;
8963
8964 if (depth > 40) {
8965 return(XML_ERR_ENTITY_LOOP);
8966 }
8967
8968
8969
8970 if (list != NULL)
8971 *list = NULL;
8972 if ((URL == NULL) && (ID == NULL))
8973 return(-1);
8974 if (doc == NULL) /* @@ relax but check for dereferences */
8975 return(-1);
8976
8977
8978 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8979 if (ctxt == NULL) return(-1);
8980 ctxt->userData = ctxt;
8981 if (sax != NULL) {
8982 oldsax = ctxt->sax;
8983 ctxt->sax = sax;
8984 if (user_data != NULL)
8985 ctxt->userData = user_data;
8986 }
8987 newDoc = xmlNewDoc(BAD_CAST "1.0");
8988 if (newDoc == NULL) {
8989 xmlFreeParserCtxt(ctxt);
8990 return(-1);
8991 }
8992 if (doc != NULL) {
8993 newDoc->intSubset = doc->intSubset;
8994 newDoc->extSubset = doc->extSubset;
8995 }
8996 if (doc->URL != NULL) {
8997 newDoc->URL = xmlStrdup(doc->URL);
8998 }
8999 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9000 if (newDoc->children == NULL) {
9001 if (sax != NULL)
9002 ctxt->sax = oldsax;
9003 xmlFreeParserCtxt(ctxt);
9004 newDoc->intSubset = NULL;
9005 newDoc->extSubset = NULL;
9006 xmlFreeDoc(newDoc);
9007 return(-1);
9008 }
9009 nodePush(ctxt, newDoc->children);
9010 if (doc == NULL) {
9011 ctxt->myDoc = newDoc;
9012 } else {
9013 ctxt->myDoc = doc;
9014 newDoc->children->doc = doc;
9015 }
9016
9017 /*
9018 * Parse a possible text declaration first
9019 */
9020 GROW;
9021 if ((RAW == '<') && (NXT(1) == '?') &&
9022 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9023 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9024 xmlParseTextDecl(ctxt);
9025 }
9026
9027 /*
9028 * Doing validity checking on chunk doesn't make sense
9029 */
9030 ctxt->instate = XML_PARSER_CONTENT;
9031 ctxt->validate = 0;
9032 ctxt->loadsubset = 0;
9033 ctxt->depth = depth;
9034
9035 xmlParseContent(ctxt);
9036
9037 if ((RAW == '<') && (NXT(1) == '/')) {
9038 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9040 ctxt->sax->error(ctxt->userData,
9041 "chunk is not well balanced\n");
9042 ctxt->wellFormed = 0;
9043 ctxt->disableSAX = 1;
9044 } else if (RAW != 0) {
9045 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9047 ctxt->sax->error(ctxt->userData,
9048 "extra content at the end of well balanced chunk\n");
9049 ctxt->wellFormed = 0;
9050 ctxt->disableSAX = 1;
9051 }
9052 if (ctxt->node != newDoc->children) {
9053 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9055 ctxt->sax->error(ctxt->userData,
9056 "chunk is not well balanced\n");
9057 ctxt->wellFormed = 0;
9058 ctxt->disableSAX = 1;
9059 }
9060
9061 if (!ctxt->wellFormed) {
9062 if (ctxt->errNo == 0)
9063 ret = 1;
9064 else
9065 ret = ctxt->errNo;
9066 } else {
9067 if (list != NULL) {
9068 xmlNodePtr cur;
9069
9070 /*
9071 * Return the newly created nodeset after unlinking it from
9072 * they pseudo parent.
9073 */
9074 cur = newDoc->children->children;
9075 *list = cur;
9076 while (cur != NULL) {
9077 cur->parent = NULL;
9078 cur = cur->next;
9079 }
9080 newDoc->children->children = NULL;
9081 }
9082 ret = 0;
9083 }
9084 if (sax != NULL)
9085 ctxt->sax = oldsax;
9086 xmlFreeParserCtxt(ctxt);
9087 newDoc->intSubset = NULL;
9088 newDoc->extSubset = NULL;
9089 xmlFreeDoc(newDoc);
9090
9091 return(ret);
9092}
9093
9094/**
9095 * xmlParseBalancedChunk:
9096 * @doc: the document the chunk pertains to
9097 * @sax: the SAX handler bloc (possibly NULL)
9098 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9099 * @depth: Used for loop detection, use 0
9100 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9101 * @list: the return value for the set of parsed nodes
9102 *
9103 * Parse a well-balanced chunk of an XML document
9104 * called by the parser
9105 * The allowed sequence for the Well Balanced Chunk is the one defined by
9106 * the content production in the XML grammar:
9107 *
9108 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9109 *
9110 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9111 * the parser error code otherwise
9112 */
9113
9114int
9115xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9116 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9117 xmlParserCtxtPtr ctxt;
9118 xmlDocPtr newDoc;
9119 xmlSAXHandlerPtr oldsax = NULL;
9120 int size;
9121 int ret = 0;
9122
9123 if (depth > 40) {
9124 return(XML_ERR_ENTITY_LOOP);
9125 }
9126
9127
9128 if (list != NULL)
9129 *list = NULL;
9130 if (string == NULL)
9131 return(-1);
9132
9133 size = xmlStrlen(string);
9134
9135 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9136 if (ctxt == NULL) return(-1);
9137 ctxt->userData = ctxt;
9138 if (sax != NULL) {
9139 oldsax = ctxt->sax;
9140 ctxt->sax = sax;
9141 if (user_data != NULL)
9142 ctxt->userData = user_data;
9143 }
9144 newDoc = xmlNewDoc(BAD_CAST "1.0");
9145 if (newDoc == NULL) {
9146 xmlFreeParserCtxt(ctxt);
9147 return(-1);
9148 }
9149 if (doc != NULL) {
9150 newDoc->intSubset = doc->intSubset;
9151 newDoc->extSubset = doc->extSubset;
9152 }
9153 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9154 if (newDoc->children == NULL) {
9155 if (sax != NULL)
9156 ctxt->sax = oldsax;
9157 xmlFreeParserCtxt(ctxt);
9158 newDoc->intSubset = NULL;
9159 newDoc->extSubset = NULL;
9160 xmlFreeDoc(newDoc);
9161 return(-1);
9162 }
9163 nodePush(ctxt, newDoc->children);
9164 if (doc == NULL) {
9165 ctxt->myDoc = newDoc;
9166 } else {
9167 ctxt->myDoc = doc;
9168 newDoc->children->doc = doc;
9169 }
9170 ctxt->instate = XML_PARSER_CONTENT;
9171 ctxt->depth = depth;
9172
9173 /*
9174 * Doing validity checking on chunk doesn't make sense
9175 */
9176 ctxt->validate = 0;
9177 ctxt->loadsubset = 0;
9178
9179 xmlParseContent(ctxt);
9180
9181 if ((RAW == '<') && (NXT(1) == '/')) {
9182 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9184 ctxt->sax->error(ctxt->userData,
9185 "chunk is not well balanced\n");
9186 ctxt->wellFormed = 0;
9187 ctxt->disableSAX = 1;
9188 } else if (RAW != 0) {
9189 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9191 ctxt->sax->error(ctxt->userData,
9192 "extra content at the end of well balanced chunk\n");
9193 ctxt->wellFormed = 0;
9194 ctxt->disableSAX = 1;
9195 }
9196 if (ctxt->node != newDoc->children) {
9197 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9199 ctxt->sax->error(ctxt->userData,
9200 "chunk is not well balanced\n");
9201 ctxt->wellFormed = 0;
9202 ctxt->disableSAX = 1;
9203 }
9204
9205 if (!ctxt->wellFormed) {
9206 if (ctxt->errNo == 0)
9207 ret = 1;
9208 else
9209 ret = ctxt->errNo;
9210 } else {
9211 if (list != NULL) {
9212 xmlNodePtr cur;
9213
9214 /*
9215 * Return the newly created nodeset after unlinking it from
9216 * they pseudo parent.
9217 */
9218 cur = newDoc->children->children;
9219 *list = cur;
9220 while (cur != NULL) {
9221 cur->parent = NULL;
9222 cur = cur->next;
9223 }
9224 newDoc->children->children = NULL;
9225 }
9226 ret = 0;
9227 }
9228 if (sax != NULL)
9229 ctxt->sax = oldsax;
9230 xmlFreeParserCtxt(ctxt);
9231 newDoc->intSubset = NULL;
9232 newDoc->extSubset = NULL;
9233 xmlFreeDoc(newDoc);
9234
9235 return(ret);
9236}
9237
9238/**
9239 * xmlSAXParseEntity:
9240 * @sax: the SAX handler block
9241 * @filename: the filename
9242 *
9243 * parse an XML external entity out of context and build a tree.
9244 * It use the given SAX function block to handle the parsing callback.
9245 * If sax is NULL, fallback to the default DOM tree building routines.
9246 *
9247 * [78] extParsedEnt ::= TextDecl? content
9248 *
9249 * This correspond to a "Well Balanced" chunk
9250 *
9251 * Returns the resulting document tree
9252 */
9253
9254xmlDocPtr
9255xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9256 xmlDocPtr ret;
9257 xmlParserCtxtPtr ctxt;
9258 char *directory = NULL;
9259
9260 ctxt = xmlCreateFileParserCtxt(filename);
9261 if (ctxt == NULL) {
9262 return(NULL);
9263 }
9264 if (sax != NULL) {
9265 if (ctxt->sax != NULL)
9266 xmlFree(ctxt->sax);
9267 ctxt->sax = sax;
9268 ctxt->userData = NULL;
9269 }
9270
9271 if ((ctxt->directory == NULL) && (directory == NULL))
9272 directory = xmlParserGetDirectory(filename);
9273
9274 xmlParseExtParsedEnt(ctxt);
9275
9276 if (ctxt->wellFormed)
9277 ret = ctxt->myDoc;
9278 else {
9279 ret = NULL;
9280 xmlFreeDoc(ctxt->myDoc);
9281 ctxt->myDoc = NULL;
9282 }
9283 if (sax != NULL)
9284 ctxt->sax = NULL;
9285 xmlFreeParserCtxt(ctxt);
9286
9287 return(ret);
9288}
9289
9290/**
9291 * xmlParseEntity:
9292 * @filename: the filename
9293 *
9294 * parse an XML external entity out of context and build a tree.
9295 *
9296 * [78] extParsedEnt ::= TextDecl? content
9297 *
9298 * This correspond to a "Well Balanced" chunk
9299 *
9300 * Returns the resulting document tree
9301 */
9302
9303xmlDocPtr
9304xmlParseEntity(const char *filename) {
9305 return(xmlSAXParseEntity(NULL, filename));
9306}
9307
9308/**
9309 * xmlCreateEntityParserCtxt:
9310 * @URL: the entity URL
9311 * @ID: the entity PUBLIC ID
9312 * @base: a posible base for the target URI
9313 *
9314 * Create a parser context for an external entity
9315 * Automatic support for ZLIB/Compress compressed document is provided
9316 * by default if found at compile-time.
9317 *
9318 * Returns the new parser context or NULL
9319 */
9320xmlParserCtxtPtr
9321xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9322 const xmlChar *base) {
9323 xmlParserCtxtPtr ctxt;
9324 xmlParserInputPtr inputStream;
9325 char *directory = NULL;
9326 xmlChar *uri;
9327
9328 ctxt = xmlNewParserCtxt();
9329 if (ctxt == NULL) {
9330 return(NULL);
9331 }
9332
9333 uri = xmlBuildURI(URL, base);
9334
9335 if (uri == NULL) {
9336 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9337 if (inputStream == NULL) {
9338 xmlFreeParserCtxt(ctxt);
9339 return(NULL);
9340 }
9341
9342 inputPush(ctxt, inputStream);
9343
9344 if ((ctxt->directory == NULL) && (directory == NULL))
9345 directory = xmlParserGetDirectory((char *)URL);
9346 if ((ctxt->directory == NULL) && (directory != NULL))
9347 ctxt->directory = directory;
9348 } else {
9349 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9350 if (inputStream == NULL) {
9351 xmlFree(uri);
9352 xmlFreeParserCtxt(ctxt);
9353 return(NULL);
9354 }
9355
9356 inputPush(ctxt, inputStream);
9357
9358 if ((ctxt->directory == NULL) && (directory == NULL))
9359 directory = xmlParserGetDirectory((char *)uri);
9360 if ((ctxt->directory == NULL) && (directory != NULL))
9361 ctxt->directory = directory;
9362 xmlFree(uri);
9363 }
9364
9365 return(ctxt);
9366}
9367
9368/************************************************************************
9369 * *
9370 * Front ends when parsing from a file *
9371 * *
9372 ************************************************************************/
9373
9374/**
9375 * xmlCreateFileParserCtxt:
9376 * @filename: the filename
9377 *
9378 * Create a parser context for a file content.
9379 * Automatic support for ZLIB/Compress compressed document is provided
9380 * by default if found at compile-time.
9381 *
9382 * Returns the new parser context or NULL
9383 */
9384xmlParserCtxtPtr
9385xmlCreateFileParserCtxt(const char *filename)
9386{
9387 xmlParserCtxtPtr ctxt;
9388 xmlParserInputPtr inputStream;
9389 xmlParserInputBufferPtr buf;
9390 char *directory = NULL;
9391
9392 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9393 if (buf == NULL) {
9394 return(NULL);
9395 }
9396
9397 ctxt = xmlNewParserCtxt();
9398 if (ctxt == NULL) {
9399 if (xmlDefaultSAXHandler.error != NULL) {
9400 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9401 }
9402 return(NULL);
9403 }
9404
9405 inputStream = xmlNewInputStream(ctxt);
9406 if (inputStream == NULL) {
9407 xmlFreeParserCtxt(ctxt);
9408 return(NULL);
9409 }
9410
9411 inputStream->filename = xmlMemStrdup(filename);
9412 inputStream->buf = buf;
9413 inputStream->base = inputStream->buf->buffer->content;
9414 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009415 inputStream->end =
9416 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009417
9418 inputPush(ctxt, inputStream);
9419 if ((ctxt->directory == NULL) && (directory == NULL))
9420 directory = xmlParserGetDirectory(filename);
9421 if ((ctxt->directory == NULL) && (directory != NULL))
9422 ctxt->directory = directory;
9423
9424 return(ctxt);
9425}
9426
9427/**
9428 * xmlSAXParseFile:
9429 * @sax: the SAX handler block
9430 * @filename: the filename
9431 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9432 * documents
9433 *
9434 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9435 * compressed document is provided by default if found at compile-time.
9436 * It use the given SAX function block to handle the parsing callback.
9437 * If sax is NULL, fallback to the default DOM tree building routines.
9438 *
9439 * Returns the resulting document tree
9440 */
9441
9442xmlDocPtr
9443xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9444 int recovery) {
9445 xmlDocPtr ret;
9446 xmlParserCtxtPtr ctxt;
9447 char *directory = NULL;
9448
9449 ctxt = xmlCreateFileParserCtxt(filename);
9450 if (ctxt == NULL) {
9451 return(NULL);
9452 }
9453 if (sax != NULL) {
9454 if (ctxt->sax != NULL)
9455 xmlFree(ctxt->sax);
9456 ctxt->sax = sax;
9457 ctxt->userData = NULL;
9458 }
9459
9460 if ((ctxt->directory == NULL) && (directory == NULL))
9461 directory = xmlParserGetDirectory(filename);
9462 if ((ctxt->directory == NULL) && (directory != NULL))
9463 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9464
9465 xmlParseDocument(ctxt);
9466
9467 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9468 else {
9469 ret = NULL;
9470 xmlFreeDoc(ctxt->myDoc);
9471 ctxt->myDoc = NULL;
9472 }
9473 if (sax != NULL)
9474 ctxt->sax = NULL;
9475 xmlFreeParserCtxt(ctxt);
9476
9477 return(ret);
9478}
9479
9480/**
9481 * xmlRecoverDoc:
9482 * @cur: a pointer to an array of xmlChar
9483 *
9484 * parse an XML in-memory document and build a tree.
9485 * In the case the document is not Well Formed, a tree is built anyway
9486 *
9487 * Returns the resulting document tree
9488 */
9489
9490xmlDocPtr
9491xmlRecoverDoc(xmlChar *cur) {
9492 return(xmlSAXParseDoc(NULL, cur, 1));
9493}
9494
9495/**
9496 * xmlParseFile:
9497 * @filename: the filename
9498 *
9499 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9500 * compressed document is provided by default if found at compile-time.
9501 *
9502 * Returns the resulting document tree
9503 */
9504
9505xmlDocPtr
9506xmlParseFile(const char *filename) {
9507 return(xmlSAXParseFile(NULL, filename, 0));
9508}
9509
9510/**
9511 * xmlRecoverFile:
9512 * @filename: the filename
9513 *
9514 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9515 * compressed document is provided by default if found at compile-time.
9516 * In the case the document is not Well Formed, a tree is built anyway
9517 *
9518 * Returns the resulting document tree
9519 */
9520
9521xmlDocPtr
9522xmlRecoverFile(const char *filename) {
9523 return(xmlSAXParseFile(NULL, filename, 1));
9524}
9525
9526
9527/**
9528 * xmlSetupParserForBuffer:
9529 * @ctxt: an XML parser context
9530 * @buffer: a xmlChar * buffer
9531 * @filename: a file name
9532 *
9533 * Setup the parser context to parse a new buffer; Clears any prior
9534 * contents from the parser context. The buffer parameter must not be
9535 * NULL, but the filename parameter can be
9536 */
9537void
9538xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9539 const char* filename)
9540{
9541 xmlParserInputPtr input;
9542
9543 input = xmlNewInputStream(ctxt);
9544 if (input == NULL) {
9545 perror("malloc");
9546 xmlFree(ctxt);
9547 return;
9548 }
9549
9550 xmlClearParserCtxt(ctxt);
9551 if (filename != NULL)
9552 input->filename = xmlMemStrdup(filename);
9553 input->base = buffer;
9554 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009555 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009556 inputPush(ctxt, input);
9557}
9558
9559/**
9560 * xmlSAXUserParseFile:
9561 * @sax: a SAX handler
9562 * @user_data: The user data returned on SAX callbacks
9563 * @filename: a file name
9564 *
9565 * parse an XML file and call the given SAX handler routines.
9566 * Automatic support for ZLIB/Compress compressed document is provided
9567 *
9568 * Returns 0 in case of success or a error number otherwise
9569 */
9570int
9571xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9572 const char *filename) {
9573 int ret = 0;
9574 xmlParserCtxtPtr ctxt;
9575
9576 ctxt = xmlCreateFileParserCtxt(filename);
9577 if (ctxt == NULL) return -1;
9578 if (ctxt->sax != &xmlDefaultSAXHandler)
9579 xmlFree(ctxt->sax);
9580 ctxt->sax = sax;
9581 if (user_data != NULL)
9582 ctxt->userData = user_data;
9583
9584 xmlParseDocument(ctxt);
9585
9586 if (ctxt->wellFormed)
9587 ret = 0;
9588 else {
9589 if (ctxt->errNo != 0)
9590 ret = ctxt->errNo;
9591 else
9592 ret = -1;
9593 }
9594 if (sax != NULL)
9595 ctxt->sax = NULL;
9596 xmlFreeParserCtxt(ctxt);
9597
9598 return ret;
9599}
9600
9601/************************************************************************
9602 * *
9603 * Front ends when parsing from memory *
9604 * *
9605 ************************************************************************/
9606
9607/**
9608 * xmlCreateMemoryParserCtxt:
9609 * @buffer: a pointer to a char array
9610 * @size: the size of the array
9611 *
9612 * Create a parser context for an XML in-memory document.
9613 *
9614 * Returns the new parser context or NULL
9615 */
9616xmlParserCtxtPtr
9617xmlCreateMemoryParserCtxt(char *buffer, int size) {
9618 xmlParserCtxtPtr ctxt;
9619 xmlParserInputPtr input;
9620 xmlParserInputBufferPtr buf;
9621
9622 if (buffer == NULL)
9623 return(NULL);
9624 if (size <= 0)
9625 return(NULL);
9626
9627 ctxt = xmlNewParserCtxt();
9628 if (ctxt == NULL)
9629 return(NULL);
9630
9631 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9632 if (buf == NULL) return(NULL);
9633
9634 input = xmlNewInputStream(ctxt);
9635 if (input == NULL) {
9636 xmlFreeParserCtxt(ctxt);
9637 return(NULL);
9638 }
9639
9640 input->filename = NULL;
9641 input->buf = buf;
9642 input->base = input->buf->buffer->content;
9643 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009644 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009645
9646 inputPush(ctxt, input);
9647 return(ctxt);
9648}
9649
9650/**
9651 * xmlSAXParseMemory:
9652 * @sax: the SAX handler block
9653 * @buffer: an pointer to a char array
9654 * @size: the size of the array
9655 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9656 * documents
9657 *
9658 * parse an XML in-memory block and use the given SAX function block
9659 * to handle the parsing callback. If sax is NULL, fallback to the default
9660 * DOM tree building routines.
9661 *
9662 * Returns the resulting document tree
9663 */
9664xmlDocPtr
9665xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9666 xmlDocPtr ret;
9667 xmlParserCtxtPtr ctxt;
9668
9669 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9670 if (ctxt == NULL) return(NULL);
9671 if (sax != NULL) {
9672 ctxt->sax = sax;
9673 ctxt->userData = NULL;
9674 }
9675
9676 xmlParseDocument(ctxt);
9677
9678 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9679 else {
9680 ret = NULL;
9681 xmlFreeDoc(ctxt->myDoc);
9682 ctxt->myDoc = NULL;
9683 }
9684 if (sax != NULL)
9685 ctxt->sax = NULL;
9686 xmlFreeParserCtxt(ctxt);
9687
9688 return(ret);
9689}
9690
9691/**
9692 * xmlParseMemory:
9693 * @buffer: an pointer to a char array
9694 * @size: the size of the array
9695 *
9696 * parse an XML in-memory block and build a tree.
9697 *
9698 * Returns the resulting document tree
9699 */
9700
9701xmlDocPtr xmlParseMemory(char *buffer, int size) {
9702 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9703}
9704
9705/**
9706 * xmlRecoverMemory:
9707 * @buffer: an pointer to a char array
9708 * @size: the size of the array
9709 *
9710 * parse an XML in-memory block and build a tree.
9711 * In the case the document is not Well Formed, a tree is built anyway
9712 *
9713 * Returns the resulting document tree
9714 */
9715
9716xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9717 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9718}
9719
9720/**
9721 * xmlSAXUserParseMemory:
9722 * @sax: a SAX handler
9723 * @user_data: The user data returned on SAX callbacks
9724 * @buffer: an in-memory XML document input
9725 * @size: the length of the XML document in bytes
9726 *
9727 * A better SAX parsing routine.
9728 * parse an XML in-memory buffer and call the given SAX handler routines.
9729 *
9730 * Returns 0 in case of success or a error number otherwise
9731 */
9732int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9733 char *buffer, int size) {
9734 int ret = 0;
9735 xmlParserCtxtPtr ctxt;
9736 xmlSAXHandlerPtr oldsax = NULL;
9737
9738 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9739 if (ctxt == NULL) return -1;
9740 if (sax != NULL) {
9741 oldsax = ctxt->sax;
9742 ctxt->sax = sax;
9743 }
9744 ctxt->userData = user_data;
9745
9746 xmlParseDocument(ctxt);
9747
9748 if (ctxt->wellFormed)
9749 ret = 0;
9750 else {
9751 if (ctxt->errNo != 0)
9752 ret = ctxt->errNo;
9753 else
9754 ret = -1;
9755 }
9756 if (sax != NULL) {
9757 ctxt->sax = oldsax;
9758 }
9759 xmlFreeParserCtxt(ctxt);
9760
9761 return ret;
9762}
9763
9764/**
9765 * xmlCreateDocParserCtxt:
9766 * @cur: a pointer to an array of xmlChar
9767 *
9768 * Creates a parser context for an XML in-memory document.
9769 *
9770 * Returns the new parser context or NULL
9771 */
9772xmlParserCtxtPtr
9773xmlCreateDocParserCtxt(xmlChar *cur) {
9774 int len;
9775
9776 if (cur == NULL)
9777 return(NULL);
9778 len = xmlStrlen(cur);
9779 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9780}
9781
9782/**
9783 * xmlSAXParseDoc:
9784 * @sax: the SAX handler block
9785 * @cur: a pointer to an array of xmlChar
9786 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9787 * documents
9788 *
9789 * parse an XML in-memory document and build a tree.
9790 * It use the given SAX function block to handle the parsing callback.
9791 * If sax is NULL, fallback to the default DOM tree building routines.
9792 *
9793 * Returns the resulting document tree
9794 */
9795
9796xmlDocPtr
9797xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9798 xmlDocPtr ret;
9799 xmlParserCtxtPtr ctxt;
9800
9801 if (cur == NULL) return(NULL);
9802
9803
9804 ctxt = xmlCreateDocParserCtxt(cur);
9805 if (ctxt == NULL) return(NULL);
9806 if (sax != NULL) {
9807 ctxt->sax = sax;
9808 ctxt->userData = NULL;
9809 }
9810
9811 xmlParseDocument(ctxt);
9812 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9813 else {
9814 ret = NULL;
9815 xmlFreeDoc(ctxt->myDoc);
9816 ctxt->myDoc = NULL;
9817 }
9818 if (sax != NULL)
9819 ctxt->sax = NULL;
9820 xmlFreeParserCtxt(ctxt);
9821
9822 return(ret);
9823}
9824
9825/**
9826 * xmlParseDoc:
9827 * @cur: a pointer to an array of xmlChar
9828 *
9829 * parse an XML in-memory document and build a tree.
9830 *
9831 * Returns the resulting document tree
9832 */
9833
9834xmlDocPtr
9835xmlParseDoc(xmlChar *cur) {
9836 return(xmlSAXParseDoc(NULL, cur, 0));
9837}
9838
9839
9840/************************************************************************
9841 * *
9842 * Miscellaneous *
9843 * *
9844 ************************************************************************/
9845
9846#ifdef LIBXML_XPATH_ENABLED
9847#include <libxml/xpath.h>
9848#endif
9849
9850static int xmlParserInitialized = 0;
9851
9852/**
9853 * xmlInitParser:
9854 *
9855 * Initialization function for the XML parser.
9856 * This is not reentrant. Call once before processing in case of
9857 * use in multithreaded programs.
9858 */
9859
9860void
9861xmlInitParser(void) {
9862 if (xmlParserInitialized) return;
9863
9864 xmlInitCharEncodingHandlers();
9865 xmlInitializePredefinedEntities();
9866 xmlDefaultSAXHandlerInit();
9867 xmlRegisterDefaultInputCallbacks();
9868 xmlRegisterDefaultOutputCallbacks();
9869#ifdef LIBXML_HTML_ENABLED
9870 htmlInitAutoClose();
9871 htmlDefaultSAXHandlerInit();
9872#endif
9873#ifdef LIBXML_XPATH_ENABLED
9874 xmlXPathInit();
9875#endif
9876 xmlParserInitialized = 1;
9877}
9878
9879/**
9880 * xmlCleanupParser:
9881 *
9882 * Cleanup function for the XML parser. It tries to reclaim all
9883 * parsing related global memory allocated for the parser processing.
9884 * It doesn't deallocate any document related memory. Calling this
9885 * function should not prevent reusing the parser.
9886 */
9887
9888void
9889xmlCleanupParser(void) {
9890 xmlParserInitialized = 0;
9891 xmlCleanupCharEncodingHandlers();
9892 xmlCleanupPredefinedEntities();
9893}
9894
9895/**
9896 * xmlPedanticParserDefault:
9897 * @val: int 0 or 1
9898 *
9899 * Set and return the previous value for enabling pedantic warnings.
9900 *
9901 * Returns the last value for 0 for no substitution, 1 for substitution.
9902 */
9903
9904int
9905xmlPedanticParserDefault(int val) {
9906 int old = xmlPedanticParserDefaultValue;
9907
9908 xmlPedanticParserDefaultValue = val;
9909 return(old);
9910}
9911
9912/**
9913 * xmlSubstituteEntitiesDefault:
9914 * @val: int 0 or 1
9915 *
9916 * Set and return the previous value for default entity support.
9917 * Initially the parser always keep entity references instead of substituting
9918 * entity values in the output. This function has to be used to change the
9919 * default parser behaviour
9920 * SAX::subtituteEntities() has to be used for changing that on a file by
9921 * file basis.
9922 *
9923 * Returns the last value for 0 for no substitution, 1 for substitution.
9924 */
9925
9926int
9927xmlSubstituteEntitiesDefault(int val) {
9928 int old = xmlSubstituteEntitiesDefaultValue;
9929
9930 xmlSubstituteEntitiesDefaultValue = val;
9931 return(old);
9932}
9933
9934/**
9935 * xmlKeepBlanksDefault:
9936 * @val: int 0 or 1
9937 *
9938 * Set and return the previous value for default blanks text nodes support.
9939 * The 1.x version of the parser used an heuristic to try to detect
9940 * ignorable white spaces. As a result the SAX callback was generating
9941 * ignorableWhitespace() callbacks instead of characters() one, and when
9942 * using the DOM output text nodes containing those blanks were not generated.
9943 * The 2.x and later version will switch to the XML standard way and
9944 * ignorableWhitespace() are only generated when running the parser in
9945 * validating mode and when the current element doesn't allow CDATA or
9946 * mixed content.
9947 * This function is provided as a way to force the standard behaviour
9948 * on 1.X libs and to switch back to the old mode for compatibility when
9949 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9950 * by using xmlIsBlankNode() commodity function to detect the "empty"
9951 * nodes generated.
9952 * This value also affect autogeneration of indentation when saving code
9953 * if blanks sections are kept, indentation is not generated.
9954 *
9955 * Returns the last value for 0 for no substitution, 1 for substitution.
9956 */
9957
9958int
9959xmlKeepBlanksDefault(int val) {
9960 int old = xmlKeepBlanksDefaultValue;
9961
9962 xmlKeepBlanksDefaultValue = val;
9963 xmlIndentTreeOutput = !val;
9964 return(old);
9965}
9966