blob: faa28536a9411fcaa296673e5e4bc11fb36bcdd9 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
Bjorn Reese70a9da52001-04-21 16:57:29 +000036#include "libxml.h"
37
Owen Taylor3473f882001-02-23 17:55:21 +000038#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '\\'
40#else
Owen Taylor3473f882001-02-23 17:55:21 +000041#define XML_DIR_SEP '/'
42#endif
43
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <stdlib.h>
45#include <string.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
56
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
83int xmlGetWarningsDefaultValue = 1;
84int xmlParserDebugEntities = 0;
85#ifdef VMS
86int xmlSubstituteEntitiesDefaultVal = 0;
87#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
88int xmlDoValidityCheckingDefaultVal = 0;
89#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
90#else
91int xmlSubstituteEntitiesDefaultValue = 0;
92int xmlDoValidityCheckingDefaultValue = 0;
93#endif
94int xmlLoadExtDtdDefaultValue = 0;
95int xmlPedanticParserDefaultValue = 0;
96int xmlKeepBlanksDefaultValue = 1;
97
98/*
99 * List of XML prefixed PI allowed by W3C specs
100 */
101
102const char *xmlW3CPIs[] = {
103 "xml-stylesheet",
104 NULL
105};
106
107/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
108void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
109xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
110 const xmlChar **str);
111
112
113/************************************************************************
114 * *
115 * Parser stacks related functions and macros *
116 * *
117 ************************************************************************/
118
119xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
120 const xmlChar ** str);
121
122/*
123 * Generic function for accessing stacks in the Parser Context
124 */
125
126#define PUSH_AND_POP(scope, type, name) \
127scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
128 if (ctxt->name##Nr >= ctxt->name##Max) { \
129 ctxt->name##Max *= 2; \
130 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
131 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
132 if (ctxt->name##Tab == NULL) { \
133 xmlGenericError(xmlGenericErrorContext, \
134 "realloc failed !\n"); \
135 return(0); \
136 } \
137 } \
138 ctxt->name##Tab[ctxt->name##Nr] = value; \
139 ctxt->name = value; \
140 return(ctxt->name##Nr++); \
141} \
142scope type name##Pop(xmlParserCtxtPtr ctxt) { \
143 type ret; \
144 if (ctxt->name##Nr <= 0) return(0); \
145 ctxt->name##Nr--; \
146 if (ctxt->name##Nr > 0) \
147 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
148 else \
149 ctxt->name = NULL; \
150 ret = ctxt->name##Tab[ctxt->name##Nr]; \
151 ctxt->name##Tab[ctxt->name##Nr] = 0; \
152 return(ret); \
153} \
154
155/*
156 * Those macros actually generate the functions
157 */
158PUSH_AND_POP(extern, xmlParserInputPtr, input)
159PUSH_AND_POP(extern, xmlNodePtr, node)
160PUSH_AND_POP(extern, xmlChar*, name)
161
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000162static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000163 if (ctxt->spaceNr >= ctxt->spaceMax) {
164 ctxt->spaceMax *= 2;
165 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
166 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
167 if (ctxt->spaceTab == NULL) {
168 xmlGenericError(xmlGenericErrorContext,
169 "realloc failed !\n");
170 return(0);
171 }
172 }
173 ctxt->spaceTab[ctxt->spaceNr] = val;
174 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
175 return(ctxt->spaceNr++);
176}
177
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000178static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000179 int ret;
180 if (ctxt->spaceNr <= 0) return(0);
181 ctxt->spaceNr--;
182 if (ctxt->spaceNr > 0)
183 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
184 else
185 ctxt->space = NULL;
186 ret = ctxt->spaceTab[ctxt->spaceNr];
187 ctxt->spaceTab[ctxt->spaceNr] = -1;
188 return(ret);
189}
190
191/*
192 * Macros for accessing the content. Those should be used only by the parser,
193 * and not exported.
194 *
195 * Dirty macros, i.e. one often need to make assumption on the context to
196 * use them
197 *
198 * CUR_PTR return the current pointer to the xmlChar to be parsed.
199 * To be used with extreme caution since operations consuming
200 * characters may move the input buffer to a different location !
201 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
202 * This should be used internally by the parser
203 * only to compare to ASCII values otherwise it would break when
204 * running with UTF-8 encoding.
205 * RAW same as CUR but in the input buffer, bypass any token
206 * extraction that may have been done
207 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
208 * to compare on ASCII based substring.
209 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
210 * strings within the parser.
211 *
212 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
213 *
214 * NEXT Skip to the next character, this does the proper decoding
215 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
216 * NEXTL(l) Skip l xmlChars in the input buffer
217 * CUR_CHAR(l) returns the current unicode character (int), set l
218 * to the number of xmlChars used for the encoding [0-5].
219 * CUR_SCHAR same but operate on a string instead of the context
220 * COPY_BUF copy the current unicode char to the target buffer, increment
221 * the index
222 * GROW, SHRINK handling of input buffers
223 */
224
225#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
226#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
227#define NXT(val) ctxt->input->cur[(val)]
228#define CUR_PTR ctxt->input->cur
229
230#define SKIP(val) do { \
231 ctxt->nbChars += (val),ctxt->input->cur += (val); \
232 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000233 if ((*ctxt->input->cur == 0) && \
234 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
235 xmlPopInput(ctxt); \
236 } while (0)
237
Daniel Veillard48b2f892001-02-25 16:11:03 +0000238#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000239 xmlParserInputShrink(ctxt->input); \
240 if ((*ctxt->input->cur == 0) && \
241 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
242 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000243 }
Owen Taylor3473f882001-02-23 17:55:21 +0000244
Daniel Veillard48b2f892001-02-25 16:11:03 +0000245#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000246 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
247 if ((*ctxt->input->cur == 0) && \
248 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
249 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000250 }
Owen Taylor3473f882001-02-23 17:55:21 +0000251
252#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
253
254#define NEXT xmlNextChar(ctxt)
255
Daniel Veillard21a0f912001-02-25 19:54:14 +0000256#define NEXT1 { \
257 ctxt->input->cur++; \
258 ctxt->nbChars++; \
259 if (*ctxt->input->cur == 0) \
260 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
261 }
262
Owen Taylor3473f882001-02-23 17:55:21 +0000263#define NEXTL(l) do { \
264 if (*(ctxt->input->cur) == '\n') { \
265 ctxt->input->line++; ctxt->input->col = 1; \
266 } else ctxt->input->col++; \
267 ctxt->token = 0; ctxt->input->cur += l; \
268 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000269 } while (0)
270
271#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
272#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
273
274#define COPY_BUF(l,b,i,v) \
275 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000276 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000277
278/**
279 * xmlSkipBlankChars:
280 * @ctxt: the XML parser context
281 *
282 * skip all blanks character found at that point in the input streams.
283 * It pops up finished entities in the process if allowable at that point.
284 *
285 * Returns the number of space chars skipped
286 */
287
288int
289xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000290 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000291
Daniel Veillard02141ea2001-04-30 11:46:40 +0000292 if (ctxt->token != 0) {
293 if (!IS_BLANK(ctxt->token))
294 return(0);
295 ctxt->token = 0;
296 res++;
297 }
Owen Taylor3473f882001-02-23 17:55:21 +0000298 /*
299 * It's Okay to use CUR/NEXT here since all the blanks are on
300 * the ASCII range.
301 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000302 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
303 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000304 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000305 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000306 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000307 cur = ctxt->input->cur;
308 while (IS_BLANK(*cur)) {
309 if (*cur == '\n') {
310 ctxt->input->line++; ctxt->input->col = 1;
311 }
312 cur++;
313 res++;
314 if (*cur == 0) {
315 ctxt->input->cur = cur;
316 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
317 cur = ctxt->input->cur;
318 }
319 }
320 ctxt->input->cur = cur;
321 } else {
322 int cur;
323 do {
324 cur = CUR;
325 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
326 NEXT;
327 cur = CUR;
328 res++;
329 }
330 while ((cur == 0) && (ctxt->inputNr > 1) &&
331 (ctxt->instate != XML_PARSER_COMMENT)) {
332 xmlPopInput(ctxt);
333 cur = CUR;
334 }
335 /*
336 * Need to handle support of entities branching here
337 */
338 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
339 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
340 }
Owen Taylor3473f882001-02-23 17:55:21 +0000341 return(res);
342}
343
344/************************************************************************
345 * *
346 * Commodity functions to handle entities *
347 * *
348 ************************************************************************/
349
350/**
351 * xmlPopInput:
352 * @ctxt: an XML parser context
353 *
354 * xmlPopInput: the current input pointed by ctxt->input came to an end
355 * pop it and return the next char.
356 *
357 * Returns the current xmlChar in the parser context
358 */
359xmlChar
360xmlPopInput(xmlParserCtxtPtr ctxt) {
361 if (ctxt->inputNr == 1) return(0); /* End of main Input */
362 if (xmlParserDebugEntities)
363 xmlGenericError(xmlGenericErrorContext,
364 "Popping input %d\n", ctxt->inputNr);
365 xmlFreeInputStream(inputPop(ctxt));
366 if ((*ctxt->input->cur == 0) &&
367 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
368 return(xmlPopInput(ctxt));
369 return(CUR);
370}
371
372/**
373 * xmlPushInput:
374 * @ctxt: an XML parser context
375 * @input: an XML parser input fragment (entity, XML fragment ...).
376 *
377 * xmlPushInput: switch to a new input stream which is stacked on top
378 * of the previous one(s).
379 */
380void
381xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
382 if (input == NULL) return;
383
384 if (xmlParserDebugEntities) {
385 if ((ctxt->input != NULL) && (ctxt->input->filename))
386 xmlGenericError(xmlGenericErrorContext,
387 "%s(%d): ", ctxt->input->filename,
388 ctxt->input->line);
389 xmlGenericError(xmlGenericErrorContext,
390 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
391 }
392 inputPush(ctxt, input);
393 GROW;
394}
395
396/**
397 * xmlParseCharRef:
398 * @ctxt: an XML parser context
399 *
400 * parse Reference declarations
401 *
402 * [66] CharRef ::= '&#' [0-9]+ ';' |
403 * '&#x' [0-9a-fA-F]+ ';'
404 *
405 * [ WFC: Legal Character ]
406 * Characters referred to using character references must match the
407 * production for Char.
408 *
409 * Returns the value parsed (as an int), 0 in case of error
410 */
411int
412xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000413 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000414 int count = 0;
415
416 if (ctxt->token != 0) {
417 val = ctxt->token;
418 ctxt->token = 0;
419 return(val);
420 }
421 /*
422 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
423 */
424 if ((RAW == '&') && (NXT(1) == '#') &&
425 (NXT(2) == 'x')) {
426 SKIP(3);
427 GROW;
428 while (RAW != ';') { /* loop blocked by count */
429 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
430 val = val * 16 + (CUR - '0');
431 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
432 val = val * 16 + (CUR - 'a') + 10;
433 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
434 val = val * 16 + (CUR - 'A') + 10;
435 else {
436 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
437 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
438 ctxt->sax->error(ctxt->userData,
439 "xmlParseCharRef: invalid hexadecimal value\n");
440 ctxt->wellFormed = 0;
441 ctxt->disableSAX = 1;
442 val = 0;
443 break;
444 }
445 NEXT;
446 count++;
447 }
448 if (RAW == ';') {
449 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
450 ctxt->nbChars ++;
451 ctxt->input->cur++;
452 }
453 } else if ((RAW == '&') && (NXT(1) == '#')) {
454 SKIP(2);
455 GROW;
456 while (RAW != ';') { /* loop blocked by count */
457 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
458 val = val * 10 + (CUR - '0');
459 else {
460 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
462 ctxt->sax->error(ctxt->userData,
463 "xmlParseCharRef: invalid decimal value\n");
464 ctxt->wellFormed = 0;
465 ctxt->disableSAX = 1;
466 val = 0;
467 break;
468 }
469 NEXT;
470 count++;
471 }
472 if (RAW == ';') {
473 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
474 ctxt->nbChars ++;
475 ctxt->input->cur++;
476 }
477 } else {
478 ctxt->errNo = XML_ERR_INVALID_CHARREF;
479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
480 ctxt->sax->error(ctxt->userData,
481 "xmlParseCharRef: invalid value\n");
482 ctxt->wellFormed = 0;
483 ctxt->disableSAX = 1;
484 }
485
486 /*
487 * [ WFC: Legal Character ]
488 * Characters referred to using character references must match the
489 * production for Char.
490 */
491 if (IS_CHAR(val)) {
492 return(val);
493 } else {
494 ctxt->errNo = XML_ERR_INVALID_CHAR;
495 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
496 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
497 val);
498 ctxt->wellFormed = 0;
499 ctxt->disableSAX = 1;
500 }
501 return(0);
502}
503
504/**
505 * xmlParseStringCharRef:
506 * @ctxt: an XML parser context
507 * @str: a pointer to an index in the string
508 *
509 * parse Reference declarations, variant parsing from a string rather
510 * than an an input flow.
511 *
512 * [66] CharRef ::= '&#' [0-9]+ ';' |
513 * '&#x' [0-9a-fA-F]+ ';'
514 *
515 * [ WFC: Legal Character ]
516 * Characters referred to using character references must match the
517 * production for Char.
518 *
519 * Returns the value parsed (as an int), 0 in case of error, str will be
520 * updated to the current value of the index
521 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000522static int
Owen Taylor3473f882001-02-23 17:55:21 +0000523xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
524 const xmlChar *ptr;
525 xmlChar cur;
526 int val = 0;
527
528 if ((str == NULL) || (*str == NULL)) return(0);
529 ptr = *str;
530 cur = *ptr;
531 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
532 ptr += 3;
533 cur = *ptr;
534 while (cur != ';') { /* Non input consuming loop */
535 if ((cur >= '0') && (cur <= '9'))
536 val = val * 16 + (cur - '0');
537 else if ((cur >= 'a') && (cur <= 'f'))
538 val = val * 16 + (cur - 'a') + 10;
539 else if ((cur >= 'A') && (cur <= 'F'))
540 val = val * 16 + (cur - 'A') + 10;
541 else {
542 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
544 ctxt->sax->error(ctxt->userData,
545 "xmlParseStringCharRef: invalid hexadecimal value\n");
546 ctxt->wellFormed = 0;
547 ctxt->disableSAX = 1;
548 val = 0;
549 break;
550 }
551 ptr++;
552 cur = *ptr;
553 }
554 if (cur == ';')
555 ptr++;
556 } else if ((cur == '&') && (ptr[1] == '#')){
557 ptr += 2;
558 cur = *ptr;
559 while (cur != ';') { /* Non input consuming loops */
560 if ((cur >= '0') && (cur <= '9'))
561 val = val * 10 + (cur - '0');
562 else {
563 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
565 ctxt->sax->error(ctxt->userData,
566 "xmlParseStringCharRef: invalid decimal value\n");
567 ctxt->wellFormed = 0;
568 ctxt->disableSAX = 1;
569 val = 0;
570 break;
571 }
572 ptr++;
573 cur = *ptr;
574 }
575 if (cur == ';')
576 ptr++;
577 } else {
578 ctxt->errNo = XML_ERR_INVALID_CHARREF;
579 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
580 ctxt->sax->error(ctxt->userData,
581 "xmlParseCharRef: invalid value\n");
582 ctxt->wellFormed = 0;
583 ctxt->disableSAX = 1;
584 return(0);
585 }
586 *str = ptr;
587
588 /*
589 * [ WFC: Legal Character ]
590 * Characters referred to using character references must match the
591 * production for Char.
592 */
593 if (IS_CHAR(val)) {
594 return(val);
595 } else {
596 ctxt->errNo = XML_ERR_INVALID_CHAR;
597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
598 ctxt->sax->error(ctxt->userData,
599 "CharRef: invalid xmlChar value %d\n", val);
600 ctxt->wellFormed = 0;
601 ctxt->disableSAX = 1;
602 }
603 return(0);
604}
605
606/**
607 * xmlParserHandlePEReference:
608 * @ctxt: the parser context
609 *
610 * [69] PEReference ::= '%' Name ';'
611 *
612 * [ WFC: No Recursion ]
613 * A parsed entity must not contain a recursive
614 * reference to itself, either directly or indirectly.
615 *
616 * [ WFC: Entity Declared ]
617 * In a document without any DTD, a document with only an internal DTD
618 * subset which contains no parameter entity references, or a document
619 * with "standalone='yes'", ... ... The declaration of a parameter
620 * entity must precede any reference to it...
621 *
622 * [ VC: Entity Declared ]
623 * In a document with an external subset or external parameter entities
624 * with "standalone='no'", ... ... The declaration of a parameter entity
625 * must precede any reference to it...
626 *
627 * [ WFC: In DTD ]
628 * Parameter-entity references may only appear in the DTD.
629 * NOTE: misleading but this is handled.
630 *
631 * A PEReference may have been detected in the current input stream
632 * the handling is done accordingly to
633 * http://www.w3.org/TR/REC-xml#entproc
634 * i.e.
635 * - Included in literal in entity values
636 * - Included as Paraemeter Entity reference within DTDs
637 */
638void
639xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
640 xmlChar *name;
641 xmlEntityPtr entity = NULL;
642 xmlParserInputPtr input;
643
644 if (ctxt->token != 0) {
645 return;
646 }
647 if (RAW != '%') return;
648 switch(ctxt->instate) {
649 case XML_PARSER_CDATA_SECTION:
650 return;
651 case XML_PARSER_COMMENT:
652 return;
653 case XML_PARSER_START_TAG:
654 return;
655 case XML_PARSER_END_TAG:
656 return;
657 case XML_PARSER_EOF:
658 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
660 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
661 ctxt->wellFormed = 0;
662 ctxt->disableSAX = 1;
663 return;
664 case XML_PARSER_PROLOG:
665 case XML_PARSER_START:
666 case XML_PARSER_MISC:
667 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
669 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
670 ctxt->wellFormed = 0;
671 ctxt->disableSAX = 1;
672 return;
673 case XML_PARSER_ENTITY_DECL:
674 case XML_PARSER_CONTENT:
675 case XML_PARSER_ATTRIBUTE_VALUE:
676 case XML_PARSER_PI:
677 case XML_PARSER_SYSTEM_LITERAL:
678 /* we just ignore it there */
679 return;
680 case XML_PARSER_EPILOG:
681 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
682 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
683 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
684 ctxt->wellFormed = 0;
685 ctxt->disableSAX = 1;
686 return;
687 case XML_PARSER_ENTITY_VALUE:
688 /*
689 * NOTE: in the case of entity values, we don't do the
690 * substitution here since we need the literal
691 * entity value to be able to save the internal
692 * subset of the document.
693 * This will be handled by xmlStringDecodeEntities
694 */
695 return;
696 case XML_PARSER_DTD:
697 /*
698 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
699 * In the internal DTD subset, parameter-entity references
700 * can occur only where markup declarations can occur, not
701 * within markup declarations.
702 * In that case this is handled in xmlParseMarkupDecl
703 */
704 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
705 return;
706 break;
707 case XML_PARSER_IGNORE:
708 return;
709 }
710
711 NEXT;
712 name = xmlParseName(ctxt);
713 if (xmlParserDebugEntities)
714 xmlGenericError(xmlGenericErrorContext,
715 "PE Reference: %s\n", name);
716 if (name == NULL) {
717 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
718 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
719 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
720 ctxt->wellFormed = 0;
721 ctxt->disableSAX = 1;
722 } else {
723 if (RAW == ';') {
724 NEXT;
725 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
726 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
727 if (entity == NULL) {
728
729 /*
730 * [ WFC: Entity Declared ]
731 * In a document without any DTD, a document with only an
732 * internal DTD subset which contains no parameter entity
733 * references, or a document with "standalone='yes'", ...
734 * ... The declaration of a parameter entity must precede
735 * any reference to it...
736 */
737 if ((ctxt->standalone == 1) ||
738 ((ctxt->hasExternalSubset == 0) &&
739 (ctxt->hasPErefs == 0))) {
740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
741 ctxt->sax->error(ctxt->userData,
742 "PEReference: %%%s; not found\n", name);
743 ctxt->wellFormed = 0;
744 ctxt->disableSAX = 1;
745 } else {
746 /*
747 * [ VC: Entity Declared ]
748 * In a document with an external subset or external
749 * parameter entities with "standalone='no'", ...
750 * ... The declaration of a parameter entity must precede
751 * any reference to it...
752 */
753 if ((!ctxt->disableSAX) &&
754 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
755 ctxt->vctxt.error(ctxt->vctxt.userData,
756 "PEReference: %%%s; not found\n", name);
757 } else if ((!ctxt->disableSAX) &&
758 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
759 ctxt->sax->warning(ctxt->userData,
760 "PEReference: %%%s; not found\n", name);
761 ctxt->valid = 0;
762 }
763 } else {
764 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
765 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
766 /*
767 * handle the extra spaces added before and after
768 * c.f. http://www.w3.org/TR/REC-xml#as-PE
769 * this is done independantly.
770 */
771 input = xmlNewEntityInputStream(ctxt, entity);
772 xmlPushInput(ctxt, input);
773 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
774 (RAW == '<') && (NXT(1) == '?') &&
775 (NXT(2) == 'x') && (NXT(3) == 'm') &&
776 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
777 xmlParseTextDecl(ctxt);
778 }
779 if (ctxt->token == 0)
780 ctxt->token = ' ';
781 } else {
782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
783 ctxt->sax->error(ctxt->userData,
784 "xmlHandlePEReference: %s is not a parameter entity\n",
785 name);
786 ctxt->wellFormed = 0;
787 ctxt->disableSAX = 1;
788 }
789 }
790 } else {
791 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
793 ctxt->sax->error(ctxt->userData,
794 "xmlHandlePEReference: expecting ';'\n");
795 ctxt->wellFormed = 0;
796 ctxt->disableSAX = 1;
797 }
798 xmlFree(name);
799 }
800}
801
802/*
803 * Macro used to grow the current buffer.
804 */
805#define growBuffer(buffer) { \
806 buffer##_size *= 2; \
807 buffer = (xmlChar *) \
808 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
809 if (buffer == NULL) { \
810 perror("realloc failed"); \
811 return(NULL); \
812 } \
813}
814
815/**
816 * xmlStringDecodeEntities:
817 * @ctxt: the parser context
818 * @str: the input string
819 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
820 * @end: an end marker xmlChar, 0 if none
821 * @end2: an end marker xmlChar, 0 if none
822 * @end3: an end marker xmlChar, 0 if none
823 *
824 * Takes a entity string content and process to do the adequate subtitutions.
825 *
826 * [67] Reference ::= EntityRef | CharRef
827 *
828 * [69] PEReference ::= '%' Name ';'
829 *
830 * Returns A newly allocated string with the substitution done. The caller
831 * must deallocate it !
832 */
833xmlChar *
834xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
835 xmlChar end, xmlChar end2, xmlChar end3) {
836 xmlChar *buffer = NULL;
837 int buffer_size = 0;
838
839 xmlChar *current = NULL;
840 xmlEntityPtr ent;
841 int c,l;
842 int nbchars = 0;
843
844 if (str == NULL)
845 return(NULL);
846
847 if (ctxt->depth > 40) {
848 ctxt->errNo = XML_ERR_ENTITY_LOOP;
849 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
850 ctxt->sax->error(ctxt->userData,
851 "Detected entity reference loop\n");
852 ctxt->wellFormed = 0;
853 ctxt->disableSAX = 1;
854 return(NULL);
855 }
856
857 /*
858 * allocate a translation buffer.
859 */
860 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
861 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
862 if (buffer == NULL) {
863 perror("xmlDecodeEntities: malloc failed");
864 return(NULL);
865 }
866
867 /*
868 * Ok loop until we reach one of the ending char or a size limit.
869 * we are operating on already parsed values.
870 */
871 c = CUR_SCHAR(str, l);
872 while ((c != 0) && (c != end) && /* non input consuming loop */
873 (c != end2) && (c != end3)) {
874
875 if (c == 0) break;
876 if ((c == '&') && (str[1] == '#')) {
877 int val = xmlParseStringCharRef(ctxt, &str);
878 if (val != 0) {
879 COPY_BUF(0,buffer,nbchars,val);
880 }
881 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
882 if (xmlParserDebugEntities)
883 xmlGenericError(xmlGenericErrorContext,
884 "String decoding Entity Reference: %.30s\n",
885 str);
886 ent = xmlParseStringEntityRef(ctxt, &str);
887 if ((ent != NULL) &&
888 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
889 if (ent->content != NULL) {
890 COPY_BUF(0,buffer,nbchars,ent->content[0]);
891 } else {
892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
893 ctxt->sax->error(ctxt->userData,
894 "internal error entity has no content\n");
895 }
896 } else if ((ent != NULL) && (ent->content != NULL)) {
897 xmlChar *rep;
898
899 ctxt->depth++;
900 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
901 0, 0, 0);
902 ctxt->depth--;
903 if (rep != NULL) {
904 current = rep;
905 while (*current != 0) { /* non input consuming loop */
906 buffer[nbchars++] = *current++;
907 if (nbchars >
908 buffer_size - XML_PARSER_BUFFER_SIZE) {
909 growBuffer(buffer);
910 }
911 }
912 xmlFree(rep);
913 }
914 } else if (ent != NULL) {
915 int i = xmlStrlen(ent->name);
916 const xmlChar *cur = ent->name;
917
918 buffer[nbchars++] = '&';
919 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
920 growBuffer(buffer);
921 }
922 for (;i > 0;i--)
923 buffer[nbchars++] = *cur++;
924 buffer[nbchars++] = ';';
925 }
926 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
927 if (xmlParserDebugEntities)
928 xmlGenericError(xmlGenericErrorContext,
929 "String decoding PE Reference: %.30s\n", str);
930 ent = xmlParseStringPEReference(ctxt, &str);
931 if (ent != NULL) {
932 xmlChar *rep;
933
934 ctxt->depth++;
935 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
936 0, 0, 0);
937 ctxt->depth--;
938 if (rep != NULL) {
939 current = rep;
940 while (*current != 0) { /* non input consuming loop */
941 buffer[nbchars++] = *current++;
942 if (nbchars >
943 buffer_size - XML_PARSER_BUFFER_SIZE) {
944 growBuffer(buffer);
945 }
946 }
947 xmlFree(rep);
948 }
949 }
950 } else {
951 COPY_BUF(l,buffer,nbchars,c);
952 str += l;
953 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
954 growBuffer(buffer);
955 }
956 }
957 c = CUR_SCHAR(str, l);
958 }
959 buffer[nbchars++] = 0;
960 return(buffer);
961}
962
963
964/************************************************************************
965 * *
966 * Commodity functions to handle xmlChars *
967 * *
968 ************************************************************************/
969
970/**
971 * xmlStrndup:
972 * @cur: the input xmlChar *
973 * @len: the len of @cur
974 *
975 * a strndup for array of xmlChar's
976 *
977 * Returns a new xmlChar * or NULL
978 */
979xmlChar *
980xmlStrndup(const xmlChar *cur, int len) {
981 xmlChar *ret;
982
983 if ((cur == NULL) || (len < 0)) return(NULL);
984 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
985 if (ret == NULL) {
986 xmlGenericError(xmlGenericErrorContext,
987 "malloc of %ld byte failed\n",
988 (len + 1) * (long)sizeof(xmlChar));
989 return(NULL);
990 }
991 memcpy(ret, cur, len * sizeof(xmlChar));
992 ret[len] = 0;
993 return(ret);
994}
995
996/**
997 * xmlStrdup:
998 * @cur: the input xmlChar *
999 *
1000 * a strdup for array of xmlChar's. Since they are supposed to be
1001 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1002 * a termination mark of '0'.
1003 *
1004 * Returns a new xmlChar * or NULL
1005 */
1006xmlChar *
1007xmlStrdup(const xmlChar *cur) {
1008 const xmlChar *p = cur;
1009
1010 if (cur == NULL) return(NULL);
1011 while (*p != 0) p++; /* non input consuming */
1012 return(xmlStrndup(cur, p - cur));
1013}
1014
1015/**
1016 * xmlCharStrndup:
1017 * @cur: the input char *
1018 * @len: the len of @cur
1019 *
1020 * a strndup for char's to xmlChar's
1021 *
1022 * Returns a new xmlChar * or NULL
1023 */
1024
1025xmlChar *
1026xmlCharStrndup(const char *cur, int len) {
1027 int i;
1028 xmlChar *ret;
1029
1030 if ((cur == NULL) || (len < 0)) return(NULL);
1031 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1032 if (ret == NULL) {
1033 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1034 (len + 1) * (long)sizeof(xmlChar));
1035 return(NULL);
1036 }
1037 for (i = 0;i < len;i++)
1038 ret[i] = (xmlChar) cur[i];
1039 ret[len] = 0;
1040 return(ret);
1041}
1042
1043/**
1044 * xmlCharStrdup:
1045 * @cur: the input char *
1046 * @len: the len of @cur
1047 *
1048 * a strdup for char's to xmlChar's
1049 *
1050 * Returns a new xmlChar * or NULL
1051 */
1052
1053xmlChar *
1054xmlCharStrdup(const char *cur) {
1055 const char *p = cur;
1056
1057 if (cur == NULL) return(NULL);
1058 while (*p != '\0') p++; /* non input consuming */
1059 return(xmlCharStrndup(cur, p - cur));
1060}
1061
1062/**
1063 * xmlStrcmp:
1064 * @str1: the first xmlChar *
1065 * @str2: the second xmlChar *
1066 *
1067 * a strcmp for xmlChar's
1068 *
1069 * Returns the integer result of the comparison
1070 */
1071
1072int
1073xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1074 register int tmp;
1075
1076 if (str1 == str2) return(0);
1077 if (str1 == NULL) return(-1);
1078 if (str2 == NULL) return(1);
1079 do {
1080 tmp = *str1++ - *str2;
1081 if (tmp != 0) return(tmp);
1082 } while (*str2++ != 0);
1083 return 0;
1084}
1085
1086/**
1087 * xmlStrEqual:
1088 * @str1: the first xmlChar *
1089 * @str2: the second xmlChar *
1090 *
1091 * Check if both string are equal of have same content
1092 * Should be a bit more readable and faster than xmlStrEqual()
1093 *
1094 * Returns 1 if they are equal, 0 if they are different
1095 */
1096
1097int
1098xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1099 if (str1 == str2) return(1);
1100 if (str1 == NULL) return(0);
1101 if (str2 == NULL) return(0);
1102 do {
1103 if (*str1++ != *str2) return(0);
1104 } while (*str2++);
1105 return(1);
1106}
1107
1108/**
1109 * xmlStrncmp:
1110 * @str1: the first xmlChar *
1111 * @str2: the second xmlChar *
1112 * @len: the max comparison length
1113 *
1114 * a strncmp for xmlChar's
1115 *
1116 * Returns the integer result of the comparison
1117 */
1118
1119int
1120xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1121 register int tmp;
1122
1123 if (len <= 0) return(0);
1124 if (str1 == str2) return(0);
1125 if (str1 == NULL) return(-1);
1126 if (str2 == NULL) return(1);
1127 do {
1128 tmp = *str1++ - *str2;
1129 if (tmp != 0 || --len == 0) return(tmp);
1130 } while (*str2++ != 0);
1131 return 0;
1132}
1133
1134static xmlChar casemap[256] = {
1135 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1136 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1137 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1138 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1139 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1140 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1141 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1142 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1143 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1144 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1145 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1146 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1147 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1148 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1149 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1150 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1151 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1152 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1153 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1154 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1155 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1156 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1157 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1158 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1159 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1160 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1161 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1162 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1163 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1164 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1165 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1166 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1167};
1168
1169/**
1170 * xmlStrcasecmp:
1171 * @str1: the first xmlChar *
1172 * @str2: the second xmlChar *
1173 *
1174 * a strcasecmp for xmlChar's
1175 *
1176 * Returns the integer result of the comparison
1177 */
1178
1179int
1180xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1181 register int tmp;
1182
1183 if (str1 == str2) return(0);
1184 if (str1 == NULL) return(-1);
1185 if (str2 == NULL) return(1);
1186 do {
1187 tmp = casemap[*str1++] - casemap[*str2];
1188 if (tmp != 0) return(tmp);
1189 } while (*str2++ != 0);
1190 return 0;
1191}
1192
1193/**
1194 * xmlStrncasecmp:
1195 * @str1: the first xmlChar *
1196 * @str2: the second xmlChar *
1197 * @len: the max comparison length
1198 *
1199 * a strncasecmp for xmlChar's
1200 *
1201 * Returns the integer result of the comparison
1202 */
1203
1204int
1205xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1206 register int tmp;
1207
1208 if (len <= 0) return(0);
1209 if (str1 == str2) return(0);
1210 if (str1 == NULL) return(-1);
1211 if (str2 == NULL) return(1);
1212 do {
1213 tmp = casemap[*str1++] - casemap[*str2];
1214 if (tmp != 0 || --len == 0) return(tmp);
1215 } while (*str2++ != 0);
1216 return 0;
1217}
1218
1219/**
1220 * xmlStrchr:
1221 * @str: the xmlChar * array
1222 * @val: the xmlChar to search
1223 *
1224 * a strchr for xmlChar's
1225 *
1226 * Returns the xmlChar * for the first occurence or NULL.
1227 */
1228
1229const xmlChar *
1230xmlStrchr(const xmlChar *str, xmlChar val) {
1231 if (str == NULL) return(NULL);
1232 while (*str != 0) { /* non input consuming */
1233 if (*str == val) return((xmlChar *) str);
1234 str++;
1235 }
1236 return(NULL);
1237}
1238
1239/**
1240 * xmlStrstr:
1241 * @str: the xmlChar * array (haystack)
1242 * @val: the xmlChar to search (needle)
1243 *
1244 * a strstr for xmlChar's
1245 *
1246 * Returns the xmlChar * for the first occurence or NULL.
1247 */
1248
1249const xmlChar *
1250xmlStrstr(const xmlChar *str, xmlChar *val) {
1251 int n;
1252
1253 if (str == NULL) return(NULL);
1254 if (val == NULL) return(NULL);
1255 n = xmlStrlen(val);
1256
1257 if (n == 0) return(str);
1258 while (*str != 0) { /* non input consuming */
1259 if (*str == *val) {
1260 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1261 }
1262 str++;
1263 }
1264 return(NULL);
1265}
1266
1267/**
1268 * xmlStrcasestr:
1269 * @str: the xmlChar * array (haystack)
1270 * @val: the xmlChar to search (needle)
1271 *
1272 * a case-ignoring strstr for xmlChar's
1273 *
1274 * Returns the xmlChar * for the first occurence or NULL.
1275 */
1276
1277const xmlChar *
1278xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1279 int n;
1280
1281 if (str == NULL) return(NULL);
1282 if (val == NULL) return(NULL);
1283 n = xmlStrlen(val);
1284
1285 if (n == 0) return(str);
1286 while (*str != 0) { /* non input consuming */
1287 if (casemap[*str] == casemap[*val])
1288 if (!xmlStrncasecmp(str, val, n)) return(str);
1289 str++;
1290 }
1291 return(NULL);
1292}
1293
1294/**
1295 * xmlStrsub:
1296 * @str: the xmlChar * array (haystack)
1297 * @start: the index of the first char (zero based)
1298 * @len: the length of the substring
1299 *
1300 * Extract a substring of a given string
1301 *
1302 * Returns the xmlChar * for the first occurence or NULL.
1303 */
1304
1305xmlChar *
1306xmlStrsub(const xmlChar *str, int start, int len) {
1307 int i;
1308
1309 if (str == NULL) return(NULL);
1310 if (start < 0) return(NULL);
1311 if (len < 0) return(NULL);
1312
1313 for (i = 0;i < start;i++) {
1314 if (*str == 0) return(NULL);
1315 str++;
1316 }
1317 if (*str == 0) return(NULL);
1318 return(xmlStrndup(str, len));
1319}
1320
1321/**
1322 * xmlStrlen:
1323 * @str: the xmlChar * array
1324 *
1325 * length of a xmlChar's string
1326 *
1327 * Returns the number of xmlChar contained in the ARRAY.
1328 */
1329
1330int
1331xmlStrlen(const xmlChar *str) {
1332 int len = 0;
1333
1334 if (str == NULL) return(0);
1335 while (*str != 0) { /* non input consuming */
1336 str++;
1337 len++;
1338 }
1339 return(len);
1340}
1341
1342/**
1343 * xmlStrncat:
1344 * @cur: the original xmlChar * array
1345 * @add: the xmlChar * array added
1346 * @len: the length of @add
1347 *
1348 * a strncat for array of xmlChar's, it will extend cur with the len
1349 * first bytes of @add.
1350 *
1351 * Returns a new xmlChar *, the original @cur is reallocated if needed
1352 * and should not be freed
1353 */
1354
1355xmlChar *
1356xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1357 int size;
1358 xmlChar *ret;
1359
1360 if ((add == NULL) || (len == 0))
1361 return(cur);
1362 if (cur == NULL)
1363 return(xmlStrndup(add, len));
1364
1365 size = xmlStrlen(cur);
1366 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1367 if (ret == NULL) {
1368 xmlGenericError(xmlGenericErrorContext,
1369 "xmlStrncat: realloc of %ld byte failed\n",
1370 (size + len + 1) * (long)sizeof(xmlChar));
1371 return(cur);
1372 }
1373 memcpy(&ret[size], add, len * sizeof(xmlChar));
1374 ret[size + len] = 0;
1375 return(ret);
1376}
1377
1378/**
1379 * xmlStrcat:
1380 * @cur: the original xmlChar * array
1381 * @add: the xmlChar * array added
1382 *
1383 * a strcat for array of xmlChar's. Since they are supposed to be
1384 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1385 * a termination mark of '0'.
1386 *
1387 * Returns a new xmlChar * containing the concatenated string.
1388 */
1389xmlChar *
1390xmlStrcat(xmlChar *cur, const xmlChar *add) {
1391 const xmlChar *p = add;
1392
1393 if (add == NULL) return(cur);
1394 if (cur == NULL)
1395 return(xmlStrdup(add));
1396
1397 while (*p != 0) p++; /* non input consuming */
1398 return(xmlStrncat(cur, add, p - add));
1399}
1400
1401/************************************************************************
1402 * *
1403 * Commodity functions, cleanup needed ? *
1404 * *
1405 ************************************************************************/
1406
1407/**
1408 * areBlanks:
1409 * @ctxt: an XML parser context
1410 * @str: a xmlChar *
1411 * @len: the size of @str
1412 *
1413 * Is this a sequence of blank chars that one can ignore ?
1414 *
1415 * Returns 1 if ignorable 0 otherwise.
1416 */
1417
1418static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1419 int i, ret;
1420 xmlNodePtr lastChild;
1421
Daniel Veillard2f362242001-03-02 17:36:21 +00001422 if (ctxt->keepBlanks)
1423 return(0);
1424
Owen Taylor3473f882001-02-23 17:55:21 +00001425 /*
1426 * Check for xml:space value.
1427 */
1428 if (*(ctxt->space) == 1)
1429 return(0);
1430
1431 /*
1432 * Check that the string is made of blanks
1433 */
1434 for (i = 0;i < len;i++)
1435 if (!(IS_BLANK(str[i]))) return(0);
1436
1437 /*
1438 * Look if the element is mixed content in the Dtd if available
1439 */
1440 if (ctxt->myDoc != NULL) {
1441 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1442 if (ret == 0) return(1);
1443 if (ret == 1) return(0);
1444 }
1445
1446 /*
1447 * Otherwise, heuristic :-\
1448 */
Owen Taylor3473f882001-02-23 17:55:21 +00001449 if (RAW != '<') return(0);
1450 if (ctxt->node == NULL) return(0);
1451 if ((ctxt->node->children == NULL) &&
1452 (RAW == '<') && (NXT(1) == '/')) return(0);
1453
1454 lastChild = xmlGetLastChild(ctxt->node);
1455 if (lastChild == NULL) {
1456 if (ctxt->node->content != NULL) return(0);
1457 } else if (xmlNodeIsText(lastChild))
1458 return(0);
1459 else if ((ctxt->node->children != NULL) &&
1460 (xmlNodeIsText(ctxt->node->children)))
1461 return(0);
1462 return(1);
1463}
1464
1465/*
1466 * Forward definition for recusive behaviour.
1467 */
1468void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1469void xmlParseReference(xmlParserCtxtPtr ctxt);
1470
1471/************************************************************************
1472 * *
1473 * Extra stuff for namespace support *
1474 * Relates to http://www.w3.org/TR/WD-xml-names *
1475 * *
1476 ************************************************************************/
1477
1478/**
1479 * xmlSplitQName:
1480 * @ctxt: an XML parser context
1481 * @name: an XML parser context
1482 * @prefix: a xmlChar **
1483 *
1484 * parse an UTF8 encoded XML qualified name string
1485 *
1486 * [NS 5] QName ::= (Prefix ':')? LocalPart
1487 *
1488 * [NS 6] Prefix ::= NCName
1489 *
1490 * [NS 7] LocalPart ::= NCName
1491 *
1492 * Returns the local part, and prefix is updated
1493 * to get the Prefix if any.
1494 */
1495
1496xmlChar *
1497xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1498 xmlChar buf[XML_MAX_NAMELEN + 5];
1499 xmlChar *buffer = NULL;
1500 int len = 0;
1501 int max = XML_MAX_NAMELEN;
1502 xmlChar *ret = NULL;
1503 const xmlChar *cur = name;
1504 int c;
1505
1506 *prefix = NULL;
1507
1508#ifndef XML_XML_NAMESPACE
1509 /* xml: prefix is not really a namespace */
1510 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1511 (cur[2] == 'l') && (cur[3] == ':'))
1512 return(xmlStrdup(name));
1513#endif
1514
1515 /* nasty but valid */
1516 if (cur[0] == ':')
1517 return(xmlStrdup(name));
1518
1519 c = *cur++;
1520 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1521 buf[len++] = c;
1522 c = *cur++;
1523 }
1524 if (len >= max) {
1525 /*
1526 * Okay someone managed to make a huge name, so he's ready to pay
1527 * for the processing speed.
1528 */
1529 max = len * 2;
1530
1531 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1532 if (buffer == NULL) {
1533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1534 ctxt->sax->error(ctxt->userData,
1535 "xmlSplitQName: out of memory\n");
1536 return(NULL);
1537 }
1538 memcpy(buffer, buf, len);
1539 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1540 if (len + 10 > max) {
1541 max *= 2;
1542 buffer = (xmlChar *) xmlRealloc(buffer,
1543 max * sizeof(xmlChar));
1544 if (buffer == NULL) {
1545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1546 ctxt->sax->error(ctxt->userData,
1547 "xmlSplitQName: out of memory\n");
1548 return(NULL);
1549 }
1550 }
1551 buffer[len++] = c;
1552 c = *cur++;
1553 }
1554 buffer[len] = 0;
1555 }
1556
1557 if (buffer == NULL)
1558 ret = xmlStrndup(buf, len);
1559 else {
1560 ret = buffer;
1561 buffer = NULL;
1562 max = XML_MAX_NAMELEN;
1563 }
1564
1565
1566 if (c == ':') {
1567 c = *cur++;
1568 if (c == 0) return(ret);
1569 *prefix = ret;
1570 len = 0;
1571
1572 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1573 buf[len++] = c;
1574 c = *cur++;
1575 }
1576 if (len >= max) {
1577 /*
1578 * Okay someone managed to make a huge name, so he's ready to pay
1579 * for the processing speed.
1580 */
1581 max = len * 2;
1582
1583 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1584 if (buffer == NULL) {
1585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1586 ctxt->sax->error(ctxt->userData,
1587 "xmlSplitQName: out of memory\n");
1588 return(NULL);
1589 }
1590 memcpy(buffer, buf, len);
1591 while (c != 0) { /* tested bigname2.xml */
1592 if (len + 10 > max) {
1593 max *= 2;
1594 buffer = (xmlChar *) xmlRealloc(buffer,
1595 max * sizeof(xmlChar));
1596 if (buffer == NULL) {
1597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1598 ctxt->sax->error(ctxt->userData,
1599 "xmlSplitQName: out of memory\n");
1600 return(NULL);
1601 }
1602 }
1603 buffer[len++] = c;
1604 c = *cur++;
1605 }
1606 buffer[len] = 0;
1607 }
1608
1609 if (buffer == NULL)
1610 ret = xmlStrndup(buf, len);
1611 else {
1612 ret = buffer;
1613 }
1614 }
1615
1616 return(ret);
1617}
1618
1619/************************************************************************
1620 * *
1621 * The parser itself *
1622 * Relates to http://www.w3.org/TR/REC-xml *
1623 * *
1624 ************************************************************************/
1625
Daniel Veillard21a0f912001-02-25 19:54:14 +00001626xmlChar *xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001627/**
1628 * xmlParseName:
1629 * @ctxt: an XML parser context
1630 *
1631 * parse an XML name.
1632 *
1633 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1634 * CombiningChar | Extender
1635 *
1636 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1637 *
1638 * [6] Names ::= Name (S Name)*
1639 *
1640 * Returns the Name parsed or NULL
1641 */
1642
1643xmlChar *
1644xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001645 const xmlChar *in;
1646 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001647 int count = 0;
1648
1649 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001650
1651 /*
1652 * Accelerator for simple ASCII names
1653 */
1654 in = ctxt->input->cur;
1655 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1656 ((*in >= 0x41) && (*in <= 0x5A)) ||
1657 (*in == '_') || (*in == ':')) {
1658 in++;
1659 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1660 ((*in >= 0x41) && (*in <= 0x5A)) ||
1661 ((*in >= 0x30) && (*in <= 0x39)) ||
1662 (*in == '_') || (*in == ':'))
1663 in++;
1664 if ((*in == ' ') || (*in == '>') || (*in == '/')) {
1665 count = in - ctxt->input->cur;
1666 ret = xmlStrndup(ctxt->input->cur, count);
1667 ctxt->input->cur = in;
1668 return(ret);
1669 }
1670 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001671 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001672}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001673
Daniel Veillard21a0f912001-02-25 19:54:14 +00001674xmlChar *
1675xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1676 xmlChar buf[XML_MAX_NAMELEN + 5];
1677 int len = 0, l;
1678 int c;
1679 int count = 0;
1680
1681 /*
1682 * Handler for more complex cases
1683 */
1684 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001685 c = CUR_CHAR(l);
1686 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1687 (!IS_LETTER(c) && (c != '_') &&
1688 (c != ':'))) {
1689 return(NULL);
1690 }
1691
1692 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1693 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1694 (c == '.') || (c == '-') ||
1695 (c == '_') || (c == ':') ||
1696 (IS_COMBINING(c)) ||
1697 (IS_EXTENDER(c)))) {
1698 if (count++ > 100) {
1699 count = 0;
1700 GROW;
1701 }
1702 COPY_BUF(l,buf,len,c);
1703 NEXTL(l);
1704 c = CUR_CHAR(l);
1705 if (len >= XML_MAX_NAMELEN) {
1706 /*
1707 * Okay someone managed to make a huge name, so he's ready to pay
1708 * for the processing speed.
1709 */
1710 xmlChar *buffer;
1711 int max = len * 2;
1712
1713 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1714 if (buffer == NULL) {
1715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1716 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001717 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001718 return(NULL);
1719 }
1720 memcpy(buffer, buf, len);
1721 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1722 (c == '.') || (c == '-') ||
1723 (c == '_') || (c == ':') ||
1724 (IS_COMBINING(c)) ||
1725 (IS_EXTENDER(c))) {
1726 if (count++ > 100) {
1727 count = 0;
1728 GROW;
1729 }
1730 if (len + 10 > max) {
1731 max *= 2;
1732 buffer = (xmlChar *) xmlRealloc(buffer,
1733 max * sizeof(xmlChar));
1734 if (buffer == NULL) {
1735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1736 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001737 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001738 return(NULL);
1739 }
1740 }
1741 COPY_BUF(l,buffer,len,c);
1742 NEXTL(l);
1743 c = CUR_CHAR(l);
1744 }
1745 buffer[len] = 0;
1746 return(buffer);
1747 }
1748 }
1749 return(xmlStrndup(buf, len));
1750}
1751
1752/**
1753 * xmlParseStringName:
1754 * @ctxt: an XML parser context
1755 * @str: a pointer to the string pointer (IN/OUT)
1756 *
1757 * parse an XML name.
1758 *
1759 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1760 * CombiningChar | Extender
1761 *
1762 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1763 *
1764 * [6] Names ::= Name (S Name)*
1765 *
1766 * Returns the Name parsed or NULL. The str pointer
1767 * is updated to the current location in the string.
1768 */
1769
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001770static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001771xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1772 xmlChar buf[XML_MAX_NAMELEN + 5];
1773 const xmlChar *cur = *str;
1774 int len = 0, l;
1775 int c;
1776
1777 c = CUR_SCHAR(cur, l);
1778 if (!IS_LETTER(c) && (c != '_') &&
1779 (c != ':')) {
1780 return(NULL);
1781 }
1782
1783 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1784 (c == '.') || (c == '-') ||
1785 (c == '_') || (c == ':') ||
1786 (IS_COMBINING(c)) ||
1787 (IS_EXTENDER(c))) {
1788 COPY_BUF(l,buf,len,c);
1789 cur += l;
1790 c = CUR_SCHAR(cur, l);
1791 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1792 /*
1793 * Okay someone managed to make a huge name, so he's ready to pay
1794 * for the processing speed.
1795 */
1796 xmlChar *buffer;
1797 int max = len * 2;
1798
1799 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1800 if (buffer == NULL) {
1801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1802 ctxt->sax->error(ctxt->userData,
1803 "xmlParseStringName: out of memory\n");
1804 return(NULL);
1805 }
1806 memcpy(buffer, buf, len);
1807 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1808 (c == '.') || (c == '-') ||
1809 (c == '_') || (c == ':') ||
1810 (IS_COMBINING(c)) ||
1811 (IS_EXTENDER(c))) {
1812 if (len + 10 > max) {
1813 max *= 2;
1814 buffer = (xmlChar *) xmlRealloc(buffer,
1815 max * sizeof(xmlChar));
1816 if (buffer == NULL) {
1817 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1818 ctxt->sax->error(ctxt->userData,
1819 "xmlParseStringName: out of memory\n");
1820 return(NULL);
1821 }
1822 }
1823 COPY_BUF(l,buffer,len,c);
1824 cur += l;
1825 c = CUR_SCHAR(cur, l);
1826 }
1827 buffer[len] = 0;
1828 *str = cur;
1829 return(buffer);
1830 }
1831 }
1832 *str = cur;
1833 return(xmlStrndup(buf, len));
1834}
1835
1836/**
1837 * xmlParseNmtoken:
1838 * @ctxt: an XML parser context
1839 *
1840 * parse an XML Nmtoken.
1841 *
1842 * [7] Nmtoken ::= (NameChar)+
1843 *
1844 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1845 *
1846 * Returns the Nmtoken parsed or NULL
1847 */
1848
1849xmlChar *
1850xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1851 xmlChar buf[XML_MAX_NAMELEN + 5];
1852 int len = 0, l;
1853 int c;
1854 int count = 0;
1855
1856 GROW;
1857 c = CUR_CHAR(l);
1858
1859 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1860 (c == '.') || (c == '-') ||
1861 (c == '_') || (c == ':') ||
1862 (IS_COMBINING(c)) ||
1863 (IS_EXTENDER(c))) {
1864 if (count++ > 100) {
1865 count = 0;
1866 GROW;
1867 }
1868 COPY_BUF(l,buf,len,c);
1869 NEXTL(l);
1870 c = CUR_CHAR(l);
1871 if (len >= XML_MAX_NAMELEN) {
1872 /*
1873 * Okay someone managed to make a huge token, so he's ready to pay
1874 * for the processing speed.
1875 */
1876 xmlChar *buffer;
1877 int max = len * 2;
1878
1879 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1880 if (buffer == NULL) {
1881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1882 ctxt->sax->error(ctxt->userData,
1883 "xmlParseNmtoken: out of memory\n");
1884 return(NULL);
1885 }
1886 memcpy(buffer, buf, len);
1887 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1888 (c == '.') || (c == '-') ||
1889 (c == '_') || (c == ':') ||
1890 (IS_COMBINING(c)) ||
1891 (IS_EXTENDER(c))) {
1892 if (count++ > 100) {
1893 count = 0;
1894 GROW;
1895 }
1896 if (len + 10 > max) {
1897 max *= 2;
1898 buffer = (xmlChar *) xmlRealloc(buffer,
1899 max * sizeof(xmlChar));
1900 if (buffer == NULL) {
1901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1902 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001903 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001904 return(NULL);
1905 }
1906 }
1907 COPY_BUF(l,buffer,len,c);
1908 NEXTL(l);
1909 c = CUR_CHAR(l);
1910 }
1911 buffer[len] = 0;
1912 return(buffer);
1913 }
1914 }
1915 if (len == 0)
1916 return(NULL);
1917 return(xmlStrndup(buf, len));
1918}
1919
1920/**
1921 * xmlParseEntityValue:
1922 * @ctxt: an XML parser context
1923 * @orig: if non-NULL store a copy of the original entity value
1924 *
1925 * parse a value for ENTITY declarations
1926 *
1927 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1928 * "'" ([^%&'] | PEReference | Reference)* "'"
1929 *
1930 * Returns the EntityValue parsed with reference substitued or NULL
1931 */
1932
1933xmlChar *
1934xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1935 xmlChar *buf = NULL;
1936 int len = 0;
1937 int size = XML_PARSER_BUFFER_SIZE;
1938 int c, l;
1939 xmlChar stop;
1940 xmlChar *ret = NULL;
1941 const xmlChar *cur = NULL;
1942 xmlParserInputPtr input;
1943
1944 if (RAW == '"') stop = '"';
1945 else if (RAW == '\'') stop = '\'';
1946 else {
1947 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1949 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1950 ctxt->wellFormed = 0;
1951 ctxt->disableSAX = 1;
1952 return(NULL);
1953 }
1954 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1955 if (buf == NULL) {
1956 xmlGenericError(xmlGenericErrorContext,
1957 "malloc of %d byte failed\n", size);
1958 return(NULL);
1959 }
1960
1961 /*
1962 * The content of the entity definition is copied in a buffer.
1963 */
1964
1965 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1966 input = ctxt->input;
1967 GROW;
1968 NEXT;
1969 c = CUR_CHAR(l);
1970 /*
1971 * NOTE: 4.4.5 Included in Literal
1972 * When a parameter entity reference appears in a literal entity
1973 * value, ... a single or double quote character in the replacement
1974 * text is always treated as a normal data character and will not
1975 * terminate the literal.
1976 * In practice it means we stop the loop only when back at parsing
1977 * the initial entity and the quote is found
1978 */
1979 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1980 (ctxt->input != input))) {
1981 if (len + 5 >= size) {
1982 size *= 2;
1983 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1984 if (buf == NULL) {
1985 xmlGenericError(xmlGenericErrorContext,
1986 "realloc of %d byte failed\n", size);
1987 return(NULL);
1988 }
1989 }
1990 COPY_BUF(l,buf,len,c);
1991 NEXTL(l);
1992 /*
1993 * Pop-up of finished entities.
1994 */
1995 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1996 xmlPopInput(ctxt);
1997
1998 GROW;
1999 c = CUR_CHAR(l);
2000 if (c == 0) {
2001 GROW;
2002 c = CUR_CHAR(l);
2003 }
2004 }
2005 buf[len] = 0;
2006
2007 /*
2008 * Raise problem w.r.t. '&' and '%' being used in non-entities
2009 * reference constructs. Note Charref will be handled in
2010 * xmlStringDecodeEntities()
2011 */
2012 cur = buf;
2013 while (*cur != 0) { /* non input consuming */
2014 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2015 xmlChar *name;
2016 xmlChar tmp = *cur;
2017
2018 cur++;
2019 name = xmlParseStringName(ctxt, &cur);
2020 if ((name == NULL) || (*cur != ';')) {
2021 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2023 ctxt->sax->error(ctxt->userData,
2024 "EntityValue: '%c' forbidden except for entities references\n",
2025 tmp);
2026 ctxt->wellFormed = 0;
2027 ctxt->disableSAX = 1;
2028 }
2029 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2030 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2031 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2032 ctxt->sax->error(ctxt->userData,
2033 "EntityValue: PEReferences forbidden in internal subset\n",
2034 tmp);
2035 ctxt->wellFormed = 0;
2036 ctxt->disableSAX = 1;
2037 }
2038 if (name != NULL)
2039 xmlFree(name);
2040 }
2041 cur++;
2042 }
2043
2044 /*
2045 * Then PEReference entities are substituted.
2046 */
2047 if (c != stop) {
2048 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2050 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2051 ctxt->wellFormed = 0;
2052 ctxt->disableSAX = 1;
2053 xmlFree(buf);
2054 } else {
2055 NEXT;
2056 /*
2057 * NOTE: 4.4.7 Bypassed
2058 * When a general entity reference appears in the EntityValue in
2059 * an entity declaration, it is bypassed and left as is.
2060 * so XML_SUBSTITUTE_REF is not set here.
2061 */
2062 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2063 0, 0, 0);
2064 if (orig != NULL)
2065 *orig = buf;
2066 else
2067 xmlFree(buf);
2068 }
2069
2070 return(ret);
2071}
2072
2073/**
2074 * xmlParseAttValue:
2075 * @ctxt: an XML parser context
2076 *
2077 * parse a value for an attribute
2078 * Note: the parser won't do substitution of entities here, this
2079 * will be handled later in xmlStringGetNodeList
2080 *
2081 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2082 * "'" ([^<&'] | Reference)* "'"
2083 *
2084 * 3.3.3 Attribute-Value Normalization:
2085 * Before the value of an attribute is passed to the application or
2086 * checked for validity, the XML processor must normalize it as follows:
2087 * - a character reference is processed by appending the referenced
2088 * character to the attribute value
2089 * - an entity reference is processed by recursively processing the
2090 * replacement text of the entity
2091 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2092 * appending #x20 to the normalized value, except that only a single
2093 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2094 * parsed entity or the literal entity value of an internal parsed entity
2095 * - other characters are processed by appending them to the normalized value
2096 * If the declared value is not CDATA, then the XML processor must further
2097 * process the normalized attribute value by discarding any leading and
2098 * trailing space (#x20) characters, and by replacing sequences of space
2099 * (#x20) characters by a single space (#x20) character.
2100 * All attributes for which no declaration has been read should be treated
2101 * by a non-validating parser as if declared CDATA.
2102 *
2103 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2104 */
2105
2106xmlChar *
2107xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2108 xmlChar limit = 0;
2109 xmlChar *buf = NULL;
2110 int len = 0;
2111 int buf_size = 0;
2112 int c, l;
2113 xmlChar *current = NULL;
2114 xmlEntityPtr ent;
2115
2116
2117 SHRINK;
2118 if (NXT(0) == '"') {
2119 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2120 limit = '"';
2121 NEXT;
2122 } else if (NXT(0) == '\'') {
2123 limit = '\'';
2124 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2125 NEXT;
2126 } else {
2127 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2129 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2130 ctxt->wellFormed = 0;
2131 ctxt->disableSAX = 1;
2132 return(NULL);
2133 }
2134
2135 /*
2136 * allocate a translation buffer.
2137 */
2138 buf_size = XML_PARSER_BUFFER_SIZE;
2139 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2140 if (buf == NULL) {
2141 perror("xmlParseAttValue: malloc failed");
2142 return(NULL);
2143 }
2144
2145 /*
2146 * Ok loop until we reach one of the ending char or a size limit.
2147 */
2148 c = CUR_CHAR(l);
2149 while (((NXT(0) != limit) && /* checked */
2150 (c != '<')) || (ctxt->token != 0)) {
2151 if (c == 0) break;
2152 if (ctxt->token == '&') {
2153 /*
2154 * The reparsing will be done in xmlStringGetNodeList()
2155 * called by the attribute() function in SAX.c
2156 */
2157 static xmlChar buffer[6] = "&#38;";
2158
2159 if (len > buf_size - 10) {
2160 growBuffer(buf);
2161 }
2162 current = &buffer[0];
2163 while (*current != 0) { /* non input consuming */
2164 buf[len++] = *current++;
2165 }
2166 ctxt->token = 0;
2167 } else if (c == '&') {
2168 if (NXT(1) == '#') {
2169 int val = xmlParseCharRef(ctxt);
2170 if (val == '&') {
2171 /*
2172 * The reparsing will be done in xmlStringGetNodeList()
2173 * called by the attribute() function in SAX.c
2174 */
2175 static xmlChar buffer[6] = "&#38;";
2176
2177 if (len > buf_size - 10) {
2178 growBuffer(buf);
2179 }
2180 current = &buffer[0];
2181 while (*current != 0) { /* non input consuming */
2182 buf[len++] = *current++;
2183 }
2184 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002185 if (len > buf_size - 10) {
2186 growBuffer(buf);
2187 }
Owen Taylor3473f882001-02-23 17:55:21 +00002188 len += xmlCopyChar(0, &buf[len], val);
2189 }
2190 } else {
2191 ent = xmlParseEntityRef(ctxt);
2192 if ((ent != NULL) &&
2193 (ctxt->replaceEntities != 0)) {
2194 xmlChar *rep;
2195
2196 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2197 rep = xmlStringDecodeEntities(ctxt, ent->content,
2198 XML_SUBSTITUTE_REF, 0, 0, 0);
2199 if (rep != NULL) {
2200 current = rep;
2201 while (*current != 0) { /* non input consuming */
2202 buf[len++] = *current++;
2203 if (len > buf_size - 10) {
2204 growBuffer(buf);
2205 }
2206 }
2207 xmlFree(rep);
2208 }
2209 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002210 if (len > buf_size - 10) {
2211 growBuffer(buf);
2212 }
Owen Taylor3473f882001-02-23 17:55:21 +00002213 if (ent->content != NULL)
2214 buf[len++] = ent->content[0];
2215 }
2216 } else if (ent != NULL) {
2217 int i = xmlStrlen(ent->name);
2218 const xmlChar *cur = ent->name;
2219
2220 /*
2221 * This may look absurd but is needed to detect
2222 * entities problems
2223 */
2224 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2225 (ent->content != NULL)) {
2226 xmlChar *rep;
2227 rep = xmlStringDecodeEntities(ctxt, ent->content,
2228 XML_SUBSTITUTE_REF, 0, 0, 0);
2229 if (rep != NULL)
2230 xmlFree(rep);
2231 }
2232
2233 /*
2234 * Just output the reference
2235 */
2236 buf[len++] = '&';
2237 if (len > buf_size - i - 10) {
2238 growBuffer(buf);
2239 }
2240 for (;i > 0;i--)
2241 buf[len++] = *cur++;
2242 buf[len++] = ';';
2243 }
2244 }
2245 } else {
2246 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2247 COPY_BUF(l,buf,len,0x20);
2248 if (len > buf_size - 10) {
2249 growBuffer(buf);
2250 }
2251 } else {
2252 COPY_BUF(l,buf,len,c);
2253 if (len > buf_size - 10) {
2254 growBuffer(buf);
2255 }
2256 }
2257 NEXTL(l);
2258 }
2259 GROW;
2260 c = CUR_CHAR(l);
2261 }
2262 buf[len++] = 0;
2263 if (RAW == '<') {
2264 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2266 ctxt->sax->error(ctxt->userData,
2267 "Unescaped '<' not allowed in attributes values\n");
2268 ctxt->wellFormed = 0;
2269 ctxt->disableSAX = 1;
2270 } else if (RAW != limit) {
2271 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2273 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2274 ctxt->wellFormed = 0;
2275 ctxt->disableSAX = 1;
2276 } else
2277 NEXT;
2278 return(buf);
2279}
2280
2281/**
2282 * xmlParseSystemLiteral:
2283 * @ctxt: an XML parser context
2284 *
2285 * parse an XML Literal
2286 *
2287 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2288 *
2289 * Returns the SystemLiteral parsed or NULL
2290 */
2291
2292xmlChar *
2293xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2294 xmlChar *buf = NULL;
2295 int len = 0;
2296 int size = XML_PARSER_BUFFER_SIZE;
2297 int cur, l;
2298 xmlChar stop;
2299 int state = ctxt->instate;
2300 int count = 0;
2301
2302 SHRINK;
2303 if (RAW == '"') {
2304 NEXT;
2305 stop = '"';
2306 } else if (RAW == '\'') {
2307 NEXT;
2308 stop = '\'';
2309 } else {
2310 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2311 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2312 ctxt->sax->error(ctxt->userData,
2313 "SystemLiteral \" or ' expected\n");
2314 ctxt->wellFormed = 0;
2315 ctxt->disableSAX = 1;
2316 return(NULL);
2317 }
2318
2319 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2320 if (buf == NULL) {
2321 xmlGenericError(xmlGenericErrorContext,
2322 "malloc of %d byte failed\n", size);
2323 return(NULL);
2324 }
2325 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2326 cur = CUR_CHAR(l);
2327 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2328 if (len + 5 >= size) {
2329 size *= 2;
2330 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2331 if (buf == NULL) {
2332 xmlGenericError(xmlGenericErrorContext,
2333 "realloc of %d byte failed\n", size);
2334 ctxt->instate = (xmlParserInputState) state;
2335 return(NULL);
2336 }
2337 }
2338 count++;
2339 if (count > 50) {
2340 GROW;
2341 count = 0;
2342 }
2343 COPY_BUF(l,buf,len,cur);
2344 NEXTL(l);
2345 cur = CUR_CHAR(l);
2346 if (cur == 0) {
2347 GROW;
2348 SHRINK;
2349 cur = CUR_CHAR(l);
2350 }
2351 }
2352 buf[len] = 0;
2353 ctxt->instate = (xmlParserInputState) state;
2354 if (!IS_CHAR(cur)) {
2355 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2357 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2358 ctxt->wellFormed = 0;
2359 ctxt->disableSAX = 1;
2360 } else {
2361 NEXT;
2362 }
2363 return(buf);
2364}
2365
2366/**
2367 * xmlParsePubidLiteral:
2368 * @ctxt: an XML parser context
2369 *
2370 * parse an XML public literal
2371 *
2372 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2373 *
2374 * Returns the PubidLiteral parsed or NULL.
2375 */
2376
2377xmlChar *
2378xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2379 xmlChar *buf = NULL;
2380 int len = 0;
2381 int size = XML_PARSER_BUFFER_SIZE;
2382 xmlChar cur;
2383 xmlChar stop;
2384 int count = 0;
2385
2386 SHRINK;
2387 if (RAW == '"') {
2388 NEXT;
2389 stop = '"';
2390 } else if (RAW == '\'') {
2391 NEXT;
2392 stop = '\'';
2393 } else {
2394 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2395 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2396 ctxt->sax->error(ctxt->userData,
2397 "SystemLiteral \" or ' expected\n");
2398 ctxt->wellFormed = 0;
2399 ctxt->disableSAX = 1;
2400 return(NULL);
2401 }
2402 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2403 if (buf == NULL) {
2404 xmlGenericError(xmlGenericErrorContext,
2405 "malloc of %d byte failed\n", size);
2406 return(NULL);
2407 }
2408 cur = CUR;
2409 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2410 if (len + 1 >= size) {
2411 size *= 2;
2412 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2413 if (buf == NULL) {
2414 xmlGenericError(xmlGenericErrorContext,
2415 "realloc of %d byte failed\n", size);
2416 return(NULL);
2417 }
2418 }
2419 buf[len++] = cur;
2420 count++;
2421 if (count > 50) {
2422 GROW;
2423 count = 0;
2424 }
2425 NEXT;
2426 cur = CUR;
2427 if (cur == 0) {
2428 GROW;
2429 SHRINK;
2430 cur = CUR;
2431 }
2432 }
2433 buf[len] = 0;
2434 if (cur != stop) {
2435 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2437 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2438 ctxt->wellFormed = 0;
2439 ctxt->disableSAX = 1;
2440 } else {
2441 NEXT;
2442 }
2443 return(buf);
2444}
2445
Daniel Veillard48b2f892001-02-25 16:11:03 +00002446void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002447/**
2448 * xmlParseCharData:
2449 * @ctxt: an XML parser context
2450 * @cdata: int indicating whether we are within a CDATA section
2451 *
2452 * parse a CharData section.
2453 * if we are within a CDATA section ']]>' marks an end of section.
2454 *
2455 * The right angle bracket (>) may be represented using the string "&gt;",
2456 * and must, for compatibility, be escaped using "&gt;" or a character
2457 * reference when it appears in the string "]]>" in content, when that
2458 * string is not marking the end of a CDATA section.
2459 *
2460 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2461 */
2462
2463void
2464xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002465 const xmlChar *in;
2466 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002467 int line = ctxt->input->line;
2468 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002469
2470 SHRINK;
2471 GROW;
2472 /*
2473 * Accelerated common case where input don't need to be
2474 * modified before passing it to the handler.
2475 */
2476 if ((ctxt->token == 0) && (!cdata)) {
2477 in = ctxt->input->cur;
2478 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002479get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002480 while (((*in >= 0x20) && (*in != '<') &&
2481 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2482 in++;
2483 if (*in == 0xA) {
2484 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002485 in++;
2486 while (*in == 0xA) {
2487 ctxt->input->line++;
2488 in++;
2489 }
2490 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002491 }
2492 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002493 if (nbchar > 0) {
2494 if (IS_BLANK(*ctxt->input->cur) &&
2495 areBlanks(ctxt, ctxt->input->cur, nbchar)) {
2496 if (ctxt->sax->ignorableWhitespace != NULL)
2497 ctxt->sax->ignorableWhitespace(ctxt->userData,
2498 ctxt->input->cur, nbchar);
2499 } else {
2500 if (ctxt->sax->characters != NULL)
2501 ctxt->sax->characters(ctxt->userData,
2502 ctxt->input->cur, nbchar);
2503 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002504 }
2505 ctxt->input->cur = in;
2506 if (*in == 0xD) {
2507 in++;
2508 if (*in == 0xA) {
2509 ctxt->input->cur = in;
2510 in++;
2511 ctxt->input->line++;
2512 continue; /* while */
2513 }
2514 in--;
2515 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002516 if (*in == '<') {
2517 return;
2518 }
2519 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002520 return;
2521 }
2522 SHRINK;
2523 GROW;
2524 in = ctxt->input->cur;
2525 } while ((*in >= 0x20) && (*in <= 0x7F));
2526 nbchar = 0;
2527 }
Daniel Veillard50582112001-03-26 22:52:16 +00002528 ctxt->input->line = line;
2529 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002530 xmlParseCharDataComplex(ctxt, cdata);
2531}
2532
2533void
2534xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002535 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2536 int nbchar = 0;
2537 int cur, l;
2538 int count = 0;
2539
2540 SHRINK;
2541 GROW;
2542 cur = CUR_CHAR(l);
2543 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2544 ((cur != '&') || (ctxt->token == '&')) &&
2545 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2546 if ((cur == ']') && (NXT(1) == ']') &&
2547 (NXT(2) == '>')) {
2548 if (cdata) break;
2549 else {
2550 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2552 ctxt->sax->error(ctxt->userData,
2553 "Sequence ']]>' not allowed in content\n");
2554 /* Should this be relaxed ??? I see a "must here */
2555 ctxt->wellFormed = 0;
2556 ctxt->disableSAX = 1;
2557 }
2558 }
2559 COPY_BUF(l,buf,nbchar,cur);
2560 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2561 /*
2562 * Ok the segment is to be consumed as chars.
2563 */
2564 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2565 if (areBlanks(ctxt, buf, nbchar)) {
2566 if (ctxt->sax->ignorableWhitespace != NULL)
2567 ctxt->sax->ignorableWhitespace(ctxt->userData,
2568 buf, nbchar);
2569 } else {
2570 if (ctxt->sax->characters != NULL)
2571 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2572 }
2573 }
2574 nbchar = 0;
2575 }
2576 count++;
2577 if (count > 50) {
2578 GROW;
2579 count = 0;
2580 }
2581 NEXTL(l);
2582 cur = CUR_CHAR(l);
2583 }
2584 if (nbchar != 0) {
2585 /*
2586 * Ok the segment is to be consumed as chars.
2587 */
2588 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2589 if (areBlanks(ctxt, buf, nbchar)) {
2590 if (ctxt->sax->ignorableWhitespace != NULL)
2591 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2592 } else {
2593 if (ctxt->sax->characters != NULL)
2594 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2595 }
2596 }
2597 }
2598}
2599
2600/**
2601 * xmlParseExternalID:
2602 * @ctxt: an XML parser context
2603 * @publicID: a xmlChar** receiving PubidLiteral
2604 * @strict: indicate whether we should restrict parsing to only
2605 * production [75], see NOTE below
2606 *
2607 * Parse an External ID or a Public ID
2608 *
2609 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2610 * 'PUBLIC' S PubidLiteral S SystemLiteral
2611 *
2612 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2613 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2614 *
2615 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2616 *
2617 * Returns the function returns SystemLiteral and in the second
2618 * case publicID receives PubidLiteral, is strict is off
2619 * it is possible to return NULL and have publicID set.
2620 */
2621
2622xmlChar *
2623xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2624 xmlChar *URI = NULL;
2625
2626 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002627
2628 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002629 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2630 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2631 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2632 SKIP(6);
2633 if (!IS_BLANK(CUR)) {
2634 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2636 ctxt->sax->error(ctxt->userData,
2637 "Space required after 'SYSTEM'\n");
2638 ctxt->wellFormed = 0;
2639 ctxt->disableSAX = 1;
2640 }
2641 SKIP_BLANKS;
2642 URI = xmlParseSystemLiteral(ctxt);
2643 if (URI == NULL) {
2644 ctxt->errNo = XML_ERR_URI_REQUIRED;
2645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2646 ctxt->sax->error(ctxt->userData,
2647 "xmlParseExternalID: SYSTEM, no URI\n");
2648 ctxt->wellFormed = 0;
2649 ctxt->disableSAX = 1;
2650 }
2651 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2652 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2653 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2654 SKIP(6);
2655 if (!IS_BLANK(CUR)) {
2656 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2658 ctxt->sax->error(ctxt->userData,
2659 "Space required after 'PUBLIC'\n");
2660 ctxt->wellFormed = 0;
2661 ctxt->disableSAX = 1;
2662 }
2663 SKIP_BLANKS;
2664 *publicID = xmlParsePubidLiteral(ctxt);
2665 if (*publicID == NULL) {
2666 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2668 ctxt->sax->error(ctxt->userData,
2669 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2670 ctxt->wellFormed = 0;
2671 ctxt->disableSAX = 1;
2672 }
2673 if (strict) {
2674 /*
2675 * We don't handle [83] so "S SystemLiteral" is required.
2676 */
2677 if (!IS_BLANK(CUR)) {
2678 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2680 ctxt->sax->error(ctxt->userData,
2681 "Space required after the Public Identifier\n");
2682 ctxt->wellFormed = 0;
2683 ctxt->disableSAX = 1;
2684 }
2685 } else {
2686 /*
2687 * We handle [83] so we return immediately, if
2688 * "S SystemLiteral" is not detected. From a purely parsing
2689 * point of view that's a nice mess.
2690 */
2691 const xmlChar *ptr;
2692 GROW;
2693
2694 ptr = CUR_PTR;
2695 if (!IS_BLANK(*ptr)) return(NULL);
2696
2697 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2698 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2699 }
2700 SKIP_BLANKS;
2701 URI = xmlParseSystemLiteral(ctxt);
2702 if (URI == NULL) {
2703 ctxt->errNo = XML_ERR_URI_REQUIRED;
2704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2705 ctxt->sax->error(ctxt->userData,
2706 "xmlParseExternalID: PUBLIC, no URI\n");
2707 ctxt->wellFormed = 0;
2708 ctxt->disableSAX = 1;
2709 }
2710 }
2711 return(URI);
2712}
2713
2714/**
2715 * xmlParseComment:
2716 * @ctxt: an XML parser context
2717 *
2718 * Skip an XML (SGML) comment <!-- .... -->
2719 * The spec says that "For compatibility, the string "--" (double-hyphen)
2720 * must not occur within comments. "
2721 *
2722 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2723 */
2724void
2725xmlParseComment(xmlParserCtxtPtr ctxt) {
2726 xmlChar *buf = NULL;
2727 int len;
2728 int size = XML_PARSER_BUFFER_SIZE;
2729 int q, ql;
2730 int r, rl;
2731 int cur, l;
2732 xmlParserInputState state;
2733 xmlParserInputPtr input = ctxt->input;
2734 int count = 0;
2735
2736 /*
2737 * Check that there is a comment right here.
2738 */
2739 if ((RAW != '<') || (NXT(1) != '!') ||
2740 (NXT(2) != '-') || (NXT(3) != '-')) return;
2741
2742 state = ctxt->instate;
2743 ctxt->instate = XML_PARSER_COMMENT;
2744 SHRINK;
2745 SKIP(4);
2746 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2747 if (buf == NULL) {
2748 xmlGenericError(xmlGenericErrorContext,
2749 "malloc of %d byte failed\n", size);
2750 ctxt->instate = state;
2751 return;
2752 }
2753 q = CUR_CHAR(ql);
2754 NEXTL(ql);
2755 r = CUR_CHAR(rl);
2756 NEXTL(rl);
2757 cur = CUR_CHAR(l);
2758 len = 0;
2759 while (IS_CHAR(cur) && /* checked */
2760 ((cur != '>') ||
2761 (r != '-') || (q != '-'))) {
2762 if ((r == '-') && (q == '-') && (len > 1)) {
2763 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2765 ctxt->sax->error(ctxt->userData,
2766 "Comment must not contain '--' (double-hyphen)`\n");
2767 ctxt->wellFormed = 0;
2768 ctxt->disableSAX = 1;
2769 }
2770 if (len + 5 >= size) {
2771 size *= 2;
2772 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2773 if (buf == NULL) {
2774 xmlGenericError(xmlGenericErrorContext,
2775 "realloc of %d byte failed\n", size);
2776 ctxt->instate = state;
2777 return;
2778 }
2779 }
2780 COPY_BUF(ql,buf,len,q);
2781 q = r;
2782 ql = rl;
2783 r = cur;
2784 rl = l;
2785
2786 count++;
2787 if (count > 50) {
2788 GROW;
2789 count = 0;
2790 }
2791 NEXTL(l);
2792 cur = CUR_CHAR(l);
2793 if (cur == 0) {
2794 SHRINK;
2795 GROW;
2796 cur = CUR_CHAR(l);
2797 }
2798 }
2799 buf[len] = 0;
2800 if (!IS_CHAR(cur)) {
2801 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2803 ctxt->sax->error(ctxt->userData,
2804 "Comment not terminated \n<!--%.50s\n", buf);
2805 ctxt->wellFormed = 0;
2806 ctxt->disableSAX = 1;
2807 xmlFree(buf);
2808 } else {
2809 if (input != ctxt->input) {
2810 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2812 ctxt->sax->error(ctxt->userData,
2813"Comment doesn't start and stop in the same entity\n");
2814 ctxt->wellFormed = 0;
2815 ctxt->disableSAX = 1;
2816 }
2817 NEXT;
2818 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2819 (!ctxt->disableSAX))
2820 ctxt->sax->comment(ctxt->userData, buf);
2821 xmlFree(buf);
2822 }
2823 ctxt->instate = state;
2824}
2825
2826/**
2827 * xmlParsePITarget:
2828 * @ctxt: an XML parser context
2829 *
2830 * parse the name of a PI
2831 *
2832 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2833 *
2834 * Returns the PITarget name or NULL
2835 */
2836
2837xmlChar *
2838xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2839 xmlChar *name;
2840
2841 name = xmlParseName(ctxt);
2842 if ((name != NULL) &&
2843 ((name[0] == 'x') || (name[0] == 'X')) &&
2844 ((name[1] == 'm') || (name[1] == 'M')) &&
2845 ((name[2] == 'l') || (name[2] == 'L'))) {
2846 int i;
2847 if ((name[0] == 'x') && (name[1] == 'm') &&
2848 (name[2] == 'l') && (name[3] == 0)) {
2849 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2851 ctxt->sax->error(ctxt->userData,
2852 "XML declaration allowed only at the start of the document\n");
2853 ctxt->wellFormed = 0;
2854 ctxt->disableSAX = 1;
2855 return(name);
2856 } else if (name[3] == 0) {
2857 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2859 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2860 ctxt->wellFormed = 0;
2861 ctxt->disableSAX = 1;
2862 return(name);
2863 }
2864 for (i = 0;;i++) {
2865 if (xmlW3CPIs[i] == NULL) break;
2866 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2867 return(name);
2868 }
2869 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2870 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2871 ctxt->sax->warning(ctxt->userData,
2872 "xmlParsePItarget: invalid name prefix 'xml'\n");
2873 }
2874 }
2875 return(name);
2876}
2877
2878/**
2879 * xmlParsePI:
2880 * @ctxt: an XML parser context
2881 *
2882 * parse an XML Processing Instruction.
2883 *
2884 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2885 *
2886 * The processing is transfered to SAX once parsed.
2887 */
2888
2889void
2890xmlParsePI(xmlParserCtxtPtr ctxt) {
2891 xmlChar *buf = NULL;
2892 int len = 0;
2893 int size = XML_PARSER_BUFFER_SIZE;
2894 int cur, l;
2895 xmlChar *target;
2896 xmlParserInputState state;
2897 int count = 0;
2898
2899 if ((RAW == '<') && (NXT(1) == '?')) {
2900 xmlParserInputPtr input = ctxt->input;
2901 state = ctxt->instate;
2902 ctxt->instate = XML_PARSER_PI;
2903 /*
2904 * this is a Processing Instruction.
2905 */
2906 SKIP(2);
2907 SHRINK;
2908
2909 /*
2910 * Parse the target name and check for special support like
2911 * namespace.
2912 */
2913 target = xmlParsePITarget(ctxt);
2914 if (target != NULL) {
2915 if ((RAW == '?') && (NXT(1) == '>')) {
2916 if (input != ctxt->input) {
2917 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2919 ctxt->sax->error(ctxt->userData,
2920 "PI declaration doesn't start and stop in the same entity\n");
2921 ctxt->wellFormed = 0;
2922 ctxt->disableSAX = 1;
2923 }
2924 SKIP(2);
2925
2926 /*
2927 * SAX: PI detected.
2928 */
2929 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2930 (ctxt->sax->processingInstruction != NULL))
2931 ctxt->sax->processingInstruction(ctxt->userData,
2932 target, NULL);
2933 ctxt->instate = state;
2934 xmlFree(target);
2935 return;
2936 }
2937 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2938 if (buf == NULL) {
2939 xmlGenericError(xmlGenericErrorContext,
2940 "malloc of %d byte failed\n", size);
2941 ctxt->instate = state;
2942 return;
2943 }
2944 cur = CUR;
2945 if (!IS_BLANK(cur)) {
2946 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2948 ctxt->sax->error(ctxt->userData,
2949 "xmlParsePI: PI %s space expected\n", target);
2950 ctxt->wellFormed = 0;
2951 ctxt->disableSAX = 1;
2952 }
2953 SKIP_BLANKS;
2954 cur = CUR_CHAR(l);
2955 while (IS_CHAR(cur) && /* checked */
2956 ((cur != '?') || (NXT(1) != '>'))) {
2957 if (len + 5 >= size) {
2958 size *= 2;
2959 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2960 if (buf == NULL) {
2961 xmlGenericError(xmlGenericErrorContext,
2962 "realloc of %d byte failed\n", size);
2963 ctxt->instate = state;
2964 return;
2965 }
2966 }
2967 count++;
2968 if (count > 50) {
2969 GROW;
2970 count = 0;
2971 }
2972 COPY_BUF(l,buf,len,cur);
2973 NEXTL(l);
2974 cur = CUR_CHAR(l);
2975 if (cur == 0) {
2976 SHRINK;
2977 GROW;
2978 cur = CUR_CHAR(l);
2979 }
2980 }
2981 buf[len] = 0;
2982 if (cur != '?') {
2983 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
2984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2985 ctxt->sax->error(ctxt->userData,
2986 "xmlParsePI: PI %s never end ...\n", target);
2987 ctxt->wellFormed = 0;
2988 ctxt->disableSAX = 1;
2989 } else {
2990 if (input != ctxt->input) {
2991 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2993 ctxt->sax->error(ctxt->userData,
2994 "PI declaration doesn't start and stop in the same entity\n");
2995 ctxt->wellFormed = 0;
2996 ctxt->disableSAX = 1;
2997 }
2998 SKIP(2);
2999
3000 /*
3001 * SAX: PI detected.
3002 */
3003 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3004 (ctxt->sax->processingInstruction != NULL))
3005 ctxt->sax->processingInstruction(ctxt->userData,
3006 target, buf);
3007 }
3008 xmlFree(buf);
3009 xmlFree(target);
3010 } else {
3011 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3013 ctxt->sax->error(ctxt->userData,
3014 "xmlParsePI : no target name\n");
3015 ctxt->wellFormed = 0;
3016 ctxt->disableSAX = 1;
3017 }
3018 ctxt->instate = state;
3019 }
3020}
3021
3022/**
3023 * xmlParseNotationDecl:
3024 * @ctxt: an XML parser context
3025 *
3026 * parse a notation declaration
3027 *
3028 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3029 *
3030 * Hence there is actually 3 choices:
3031 * 'PUBLIC' S PubidLiteral
3032 * 'PUBLIC' S PubidLiteral S SystemLiteral
3033 * and 'SYSTEM' S SystemLiteral
3034 *
3035 * See the NOTE on xmlParseExternalID().
3036 */
3037
3038void
3039xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3040 xmlChar *name;
3041 xmlChar *Pubid;
3042 xmlChar *Systemid;
3043
3044 if ((RAW == '<') && (NXT(1) == '!') &&
3045 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3046 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3047 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3048 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3049 xmlParserInputPtr input = ctxt->input;
3050 SHRINK;
3051 SKIP(10);
3052 if (!IS_BLANK(CUR)) {
3053 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3055 ctxt->sax->error(ctxt->userData,
3056 "Space required after '<!NOTATION'\n");
3057 ctxt->wellFormed = 0;
3058 ctxt->disableSAX = 1;
3059 return;
3060 }
3061 SKIP_BLANKS;
3062
Daniel Veillard29631a82001-03-05 09:49:20 +00003063 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003064 if (name == NULL) {
3065 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3067 ctxt->sax->error(ctxt->userData,
3068 "NOTATION: Name expected here\n");
3069 ctxt->wellFormed = 0;
3070 ctxt->disableSAX = 1;
3071 return;
3072 }
3073 if (!IS_BLANK(CUR)) {
3074 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3076 ctxt->sax->error(ctxt->userData,
3077 "Space required after the NOTATION name'\n");
3078 ctxt->wellFormed = 0;
3079 ctxt->disableSAX = 1;
3080 return;
3081 }
3082 SKIP_BLANKS;
3083
3084 /*
3085 * Parse the IDs.
3086 */
3087 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3088 SKIP_BLANKS;
3089
3090 if (RAW == '>') {
3091 if (input != ctxt->input) {
3092 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3094 ctxt->sax->error(ctxt->userData,
3095"Notation declaration doesn't start and stop in the same entity\n");
3096 ctxt->wellFormed = 0;
3097 ctxt->disableSAX = 1;
3098 }
3099 NEXT;
3100 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3101 (ctxt->sax->notationDecl != NULL))
3102 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3103 } else {
3104 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3105 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3106 ctxt->sax->error(ctxt->userData,
3107 "'>' required to close NOTATION declaration\n");
3108 ctxt->wellFormed = 0;
3109 ctxt->disableSAX = 1;
3110 }
3111 xmlFree(name);
3112 if (Systemid != NULL) xmlFree(Systemid);
3113 if (Pubid != NULL) xmlFree(Pubid);
3114 }
3115}
3116
3117/**
3118 * xmlParseEntityDecl:
3119 * @ctxt: an XML parser context
3120 *
3121 * parse <!ENTITY declarations
3122 *
3123 * [70] EntityDecl ::= GEDecl | PEDecl
3124 *
3125 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3126 *
3127 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3128 *
3129 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3130 *
3131 * [74] PEDef ::= EntityValue | ExternalID
3132 *
3133 * [76] NDataDecl ::= S 'NDATA' S Name
3134 *
3135 * [ VC: Notation Declared ]
3136 * The Name must match the declared name of a notation.
3137 */
3138
3139void
3140xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3141 xmlChar *name = NULL;
3142 xmlChar *value = NULL;
3143 xmlChar *URI = NULL, *literal = NULL;
3144 xmlChar *ndata = NULL;
3145 int isParameter = 0;
3146 xmlChar *orig = NULL;
3147
3148 GROW;
3149 if ((RAW == '<') && (NXT(1) == '!') &&
3150 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3151 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3152 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3153 xmlParserInputPtr input = ctxt->input;
3154 ctxt->instate = XML_PARSER_ENTITY_DECL;
3155 SHRINK;
3156 SKIP(8);
3157 if (!IS_BLANK(CUR)) {
3158 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3159 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3160 ctxt->sax->error(ctxt->userData,
3161 "Space required after '<!ENTITY'\n");
3162 ctxt->wellFormed = 0;
3163 ctxt->disableSAX = 1;
3164 }
3165 SKIP_BLANKS;
3166
3167 if (RAW == '%') {
3168 NEXT;
3169 if (!IS_BLANK(CUR)) {
3170 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3172 ctxt->sax->error(ctxt->userData,
3173 "Space required after '%'\n");
3174 ctxt->wellFormed = 0;
3175 ctxt->disableSAX = 1;
3176 }
3177 SKIP_BLANKS;
3178 isParameter = 1;
3179 }
3180
Daniel Veillard29631a82001-03-05 09:49:20 +00003181 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003182 if (name == NULL) {
3183 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3185 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3186 ctxt->wellFormed = 0;
3187 ctxt->disableSAX = 1;
3188 return;
3189 }
3190 if (!IS_BLANK(CUR)) {
3191 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3193 ctxt->sax->error(ctxt->userData,
3194 "Space required after the entity name\n");
3195 ctxt->wellFormed = 0;
3196 ctxt->disableSAX = 1;
3197 }
3198 SKIP_BLANKS;
3199
3200 /*
3201 * handle the various case of definitions...
3202 */
3203 if (isParameter) {
3204 if ((RAW == '"') || (RAW == '\'')) {
3205 value = xmlParseEntityValue(ctxt, &orig);
3206 if (value) {
3207 if ((ctxt->sax != NULL) &&
3208 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3209 ctxt->sax->entityDecl(ctxt->userData, name,
3210 XML_INTERNAL_PARAMETER_ENTITY,
3211 NULL, NULL, value);
3212 }
3213 } else {
3214 URI = xmlParseExternalID(ctxt, &literal, 1);
3215 if ((URI == NULL) && (literal == NULL)) {
3216 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3217 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3218 ctxt->sax->error(ctxt->userData,
3219 "Entity value required\n");
3220 ctxt->wellFormed = 0;
3221 ctxt->disableSAX = 1;
3222 }
3223 if (URI) {
3224 xmlURIPtr uri;
3225
3226 uri = xmlParseURI((const char *) URI);
3227 if (uri == NULL) {
3228 ctxt->errNo = XML_ERR_INVALID_URI;
3229 if ((ctxt->sax != NULL) &&
3230 (!ctxt->disableSAX) &&
3231 (ctxt->sax->error != NULL))
3232 ctxt->sax->error(ctxt->userData,
3233 "Invalid URI: %s\n", URI);
3234 ctxt->wellFormed = 0;
3235 } else {
3236 if (uri->fragment != NULL) {
3237 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3238 if ((ctxt->sax != NULL) &&
3239 (!ctxt->disableSAX) &&
3240 (ctxt->sax->error != NULL))
3241 ctxt->sax->error(ctxt->userData,
3242 "Fragment not allowed: %s\n", URI);
3243 ctxt->wellFormed = 0;
3244 } else {
3245 if ((ctxt->sax != NULL) &&
3246 (!ctxt->disableSAX) &&
3247 (ctxt->sax->entityDecl != NULL))
3248 ctxt->sax->entityDecl(ctxt->userData, name,
3249 XML_EXTERNAL_PARAMETER_ENTITY,
3250 literal, URI, NULL);
3251 }
3252 xmlFreeURI(uri);
3253 }
3254 }
3255 }
3256 } else {
3257 if ((RAW == '"') || (RAW == '\'')) {
3258 value = xmlParseEntityValue(ctxt, &orig);
3259 if ((ctxt->sax != NULL) &&
3260 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3261 ctxt->sax->entityDecl(ctxt->userData, name,
3262 XML_INTERNAL_GENERAL_ENTITY,
3263 NULL, NULL, value);
3264 } else {
3265 URI = xmlParseExternalID(ctxt, &literal, 1);
3266 if ((URI == NULL) && (literal == NULL)) {
3267 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3269 ctxt->sax->error(ctxt->userData,
3270 "Entity value required\n");
3271 ctxt->wellFormed = 0;
3272 ctxt->disableSAX = 1;
3273 }
3274 if (URI) {
3275 xmlURIPtr uri;
3276
3277 uri = xmlParseURI((const char *)URI);
3278 if (uri == NULL) {
3279 ctxt->errNo = XML_ERR_INVALID_URI;
3280 if ((ctxt->sax != NULL) &&
3281 (!ctxt->disableSAX) &&
3282 (ctxt->sax->error != NULL))
3283 ctxt->sax->error(ctxt->userData,
3284 "Invalid URI: %s\n", URI);
3285 ctxt->wellFormed = 0;
3286 } else {
3287 if (uri->fragment != NULL) {
3288 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3289 if ((ctxt->sax != NULL) &&
3290 (!ctxt->disableSAX) &&
3291 (ctxt->sax->error != NULL))
3292 ctxt->sax->error(ctxt->userData,
3293 "Fragment not allowed: %s\n", URI);
3294 ctxt->wellFormed = 0;
3295 }
3296 xmlFreeURI(uri);
3297 }
3298 }
3299 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3300 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3302 ctxt->sax->error(ctxt->userData,
3303 "Space required before 'NDATA'\n");
3304 ctxt->wellFormed = 0;
3305 ctxt->disableSAX = 1;
3306 }
3307 SKIP_BLANKS;
3308 if ((RAW == 'N') && (NXT(1) == 'D') &&
3309 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3310 (NXT(4) == 'A')) {
3311 SKIP(5);
3312 if (!IS_BLANK(CUR)) {
3313 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3314 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3315 ctxt->sax->error(ctxt->userData,
3316 "Space required after 'NDATA'\n");
3317 ctxt->wellFormed = 0;
3318 ctxt->disableSAX = 1;
3319 }
3320 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003321 ndata = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003322 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3323 (ctxt->sax->unparsedEntityDecl != NULL))
3324 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3325 literal, URI, ndata);
3326 } else {
3327 if ((ctxt->sax != NULL) &&
3328 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3329 ctxt->sax->entityDecl(ctxt->userData, name,
3330 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3331 literal, URI, NULL);
3332 }
3333 }
3334 }
3335 SKIP_BLANKS;
3336 if (RAW != '>') {
3337 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3339 ctxt->sax->error(ctxt->userData,
3340 "xmlParseEntityDecl: entity %s not terminated\n", name);
3341 ctxt->wellFormed = 0;
3342 ctxt->disableSAX = 1;
3343 } else {
3344 if (input != ctxt->input) {
3345 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3347 ctxt->sax->error(ctxt->userData,
3348"Entity declaration doesn't start and stop in the same entity\n");
3349 ctxt->wellFormed = 0;
3350 ctxt->disableSAX = 1;
3351 }
3352 NEXT;
3353 }
3354 if (orig != NULL) {
3355 /*
3356 * Ugly mechanism to save the raw entity value.
3357 */
3358 xmlEntityPtr cur = NULL;
3359
3360 if (isParameter) {
3361 if ((ctxt->sax != NULL) &&
3362 (ctxt->sax->getParameterEntity != NULL))
3363 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3364 } else {
3365 if ((ctxt->sax != NULL) &&
3366 (ctxt->sax->getEntity != NULL))
3367 cur = ctxt->sax->getEntity(ctxt->userData, name);
3368 }
3369 if (cur != NULL) {
3370 if (cur->orig != NULL)
3371 xmlFree(orig);
3372 else
3373 cur->orig = orig;
3374 } else
3375 xmlFree(orig);
3376 }
3377 if (name != NULL) xmlFree(name);
3378 if (value != NULL) xmlFree(value);
3379 if (URI != NULL) xmlFree(URI);
3380 if (literal != NULL) xmlFree(literal);
3381 if (ndata != NULL) xmlFree(ndata);
3382 }
3383}
3384
3385/**
3386 * xmlParseDefaultDecl:
3387 * @ctxt: an XML parser context
3388 * @value: Receive a possible fixed default value for the attribute
3389 *
3390 * Parse an attribute default declaration
3391 *
3392 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3393 *
3394 * [ VC: Required Attribute ]
3395 * if the default declaration is the keyword #REQUIRED, then the
3396 * attribute must be specified for all elements of the type in the
3397 * attribute-list declaration.
3398 *
3399 * [ VC: Attribute Default Legal ]
3400 * The declared default value must meet the lexical constraints of
3401 * the declared attribute type c.f. xmlValidateAttributeDecl()
3402 *
3403 * [ VC: Fixed Attribute Default ]
3404 * if an attribute has a default value declared with the #FIXED
3405 * keyword, instances of that attribute must match the default value.
3406 *
3407 * [ WFC: No < in Attribute Values ]
3408 * handled in xmlParseAttValue()
3409 *
3410 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3411 * or XML_ATTRIBUTE_FIXED.
3412 */
3413
3414int
3415xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3416 int val;
3417 xmlChar *ret;
3418
3419 *value = NULL;
3420 if ((RAW == '#') && (NXT(1) == 'R') &&
3421 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3422 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3423 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3424 (NXT(8) == 'D')) {
3425 SKIP(9);
3426 return(XML_ATTRIBUTE_REQUIRED);
3427 }
3428 if ((RAW == '#') && (NXT(1) == 'I') &&
3429 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3430 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3431 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3432 SKIP(8);
3433 return(XML_ATTRIBUTE_IMPLIED);
3434 }
3435 val = XML_ATTRIBUTE_NONE;
3436 if ((RAW == '#') && (NXT(1) == 'F') &&
3437 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3438 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3439 SKIP(6);
3440 val = XML_ATTRIBUTE_FIXED;
3441 if (!IS_BLANK(CUR)) {
3442 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3444 ctxt->sax->error(ctxt->userData,
3445 "Space required after '#FIXED'\n");
3446 ctxt->wellFormed = 0;
3447 ctxt->disableSAX = 1;
3448 }
3449 SKIP_BLANKS;
3450 }
3451 ret = xmlParseAttValue(ctxt);
3452 ctxt->instate = XML_PARSER_DTD;
3453 if (ret == NULL) {
3454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3455 ctxt->sax->error(ctxt->userData,
3456 "Attribute default value declaration error\n");
3457 ctxt->wellFormed = 0;
3458 ctxt->disableSAX = 1;
3459 } else
3460 *value = ret;
3461 return(val);
3462}
3463
3464/**
3465 * xmlParseNotationType:
3466 * @ctxt: an XML parser context
3467 *
3468 * parse an Notation attribute type.
3469 *
3470 * Note: the leading 'NOTATION' S part has already being parsed...
3471 *
3472 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3473 *
3474 * [ VC: Notation Attributes ]
3475 * Values of this type must match one of the notation names included
3476 * in the declaration; all notation names in the declaration must be declared.
3477 *
3478 * Returns: the notation attribute tree built while parsing
3479 */
3480
3481xmlEnumerationPtr
3482xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3483 xmlChar *name;
3484 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3485
3486 if (RAW != '(') {
3487 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3488 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3489 ctxt->sax->error(ctxt->userData,
3490 "'(' required to start 'NOTATION'\n");
3491 ctxt->wellFormed = 0;
3492 ctxt->disableSAX = 1;
3493 return(NULL);
3494 }
3495 SHRINK;
3496 do {
3497 NEXT;
3498 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003499 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003500 if (name == NULL) {
3501 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3503 ctxt->sax->error(ctxt->userData,
3504 "Name expected in NOTATION declaration\n");
3505 ctxt->wellFormed = 0;
3506 ctxt->disableSAX = 1;
3507 return(ret);
3508 }
3509 cur = xmlCreateEnumeration(name);
3510 xmlFree(name);
3511 if (cur == NULL) return(ret);
3512 if (last == NULL) ret = last = cur;
3513 else {
3514 last->next = cur;
3515 last = cur;
3516 }
3517 SKIP_BLANKS;
3518 } while (RAW == '|');
3519 if (RAW != ')') {
3520 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3522 ctxt->sax->error(ctxt->userData,
3523 "')' required to finish NOTATION declaration\n");
3524 ctxt->wellFormed = 0;
3525 ctxt->disableSAX = 1;
3526 if ((last != NULL) && (last != ret))
3527 xmlFreeEnumeration(last);
3528 return(ret);
3529 }
3530 NEXT;
3531 return(ret);
3532}
3533
3534/**
3535 * xmlParseEnumerationType:
3536 * @ctxt: an XML parser context
3537 *
3538 * parse an Enumeration attribute type.
3539 *
3540 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3541 *
3542 * [ VC: Enumeration ]
3543 * Values of this type must match one of the Nmtoken tokens in
3544 * the declaration
3545 *
3546 * Returns: the enumeration attribute tree built while parsing
3547 */
3548
3549xmlEnumerationPtr
3550xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3551 xmlChar *name;
3552 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3553
3554 if (RAW != '(') {
3555 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3557 ctxt->sax->error(ctxt->userData,
3558 "'(' required to start ATTLIST enumeration\n");
3559 ctxt->wellFormed = 0;
3560 ctxt->disableSAX = 1;
3561 return(NULL);
3562 }
3563 SHRINK;
3564 do {
3565 NEXT;
3566 SKIP_BLANKS;
3567 name = xmlParseNmtoken(ctxt);
3568 if (name == NULL) {
3569 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3571 ctxt->sax->error(ctxt->userData,
3572 "NmToken expected in ATTLIST enumeration\n");
3573 ctxt->wellFormed = 0;
3574 ctxt->disableSAX = 1;
3575 return(ret);
3576 }
3577 cur = xmlCreateEnumeration(name);
3578 xmlFree(name);
3579 if (cur == NULL) return(ret);
3580 if (last == NULL) ret = last = cur;
3581 else {
3582 last->next = cur;
3583 last = cur;
3584 }
3585 SKIP_BLANKS;
3586 } while (RAW == '|');
3587 if (RAW != ')') {
3588 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3590 ctxt->sax->error(ctxt->userData,
3591 "')' required to finish ATTLIST enumeration\n");
3592 ctxt->wellFormed = 0;
3593 ctxt->disableSAX = 1;
3594 return(ret);
3595 }
3596 NEXT;
3597 return(ret);
3598}
3599
3600/**
3601 * xmlParseEnumeratedType:
3602 * @ctxt: an XML parser context
3603 * @tree: the enumeration tree built while parsing
3604 *
3605 * parse an Enumerated attribute type.
3606 *
3607 * [57] EnumeratedType ::= NotationType | Enumeration
3608 *
3609 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3610 *
3611 *
3612 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3613 */
3614
3615int
3616xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3617 if ((RAW == 'N') && (NXT(1) == 'O') &&
3618 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3619 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3620 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3621 SKIP(8);
3622 if (!IS_BLANK(CUR)) {
3623 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3625 ctxt->sax->error(ctxt->userData,
3626 "Space required after 'NOTATION'\n");
3627 ctxt->wellFormed = 0;
3628 ctxt->disableSAX = 1;
3629 return(0);
3630 }
3631 SKIP_BLANKS;
3632 *tree = xmlParseNotationType(ctxt);
3633 if (*tree == NULL) return(0);
3634 return(XML_ATTRIBUTE_NOTATION);
3635 }
3636 *tree = xmlParseEnumerationType(ctxt);
3637 if (*tree == NULL) return(0);
3638 return(XML_ATTRIBUTE_ENUMERATION);
3639}
3640
3641/**
3642 * xmlParseAttributeType:
3643 * @ctxt: an XML parser context
3644 * @tree: the enumeration tree built while parsing
3645 *
3646 * parse the Attribute list def for an element
3647 *
3648 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3649 *
3650 * [55] StringType ::= 'CDATA'
3651 *
3652 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3653 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3654 *
3655 * Validity constraints for attribute values syntax are checked in
3656 * xmlValidateAttributeValue()
3657 *
3658 * [ VC: ID ]
3659 * Values of type ID must match the Name production. A name must not
3660 * appear more than once in an XML document as a value of this type;
3661 * i.e., ID values must uniquely identify the elements which bear them.
3662 *
3663 * [ VC: One ID per Element Type ]
3664 * No element type may have more than one ID attribute specified.
3665 *
3666 * [ VC: ID Attribute Default ]
3667 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3668 *
3669 * [ VC: IDREF ]
3670 * Values of type IDREF must match the Name production, and values
3671 * of type IDREFS must match Names; each IDREF Name must match the value
3672 * of an ID attribute on some element in the XML document; i.e. IDREF
3673 * values must match the value of some ID attribute.
3674 *
3675 * [ VC: Entity Name ]
3676 * Values of type ENTITY must match the Name production, values
3677 * of type ENTITIES must match Names; each Entity Name must match the
3678 * name of an unparsed entity declared in the DTD.
3679 *
3680 * [ VC: Name Token ]
3681 * Values of type NMTOKEN must match the Nmtoken production; values
3682 * of type NMTOKENS must match Nmtokens.
3683 *
3684 * Returns the attribute type
3685 */
3686int
3687xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3688 SHRINK;
3689 if ((RAW == 'C') && (NXT(1) == 'D') &&
3690 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3691 (NXT(4) == 'A')) {
3692 SKIP(5);
3693 return(XML_ATTRIBUTE_CDATA);
3694 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3695 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3696 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3697 SKIP(6);
3698 return(XML_ATTRIBUTE_IDREFS);
3699 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3700 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3701 (NXT(4) == 'F')) {
3702 SKIP(5);
3703 return(XML_ATTRIBUTE_IDREF);
3704 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3705 SKIP(2);
3706 return(XML_ATTRIBUTE_ID);
3707 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3708 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3709 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3710 SKIP(6);
3711 return(XML_ATTRIBUTE_ENTITY);
3712 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3713 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3714 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3715 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3716 SKIP(8);
3717 return(XML_ATTRIBUTE_ENTITIES);
3718 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3719 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3720 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3721 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3722 SKIP(8);
3723 return(XML_ATTRIBUTE_NMTOKENS);
3724 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3725 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3726 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3727 (NXT(6) == 'N')) {
3728 SKIP(7);
3729 return(XML_ATTRIBUTE_NMTOKEN);
3730 }
3731 return(xmlParseEnumeratedType(ctxt, tree));
3732}
3733
3734/**
3735 * xmlParseAttributeListDecl:
3736 * @ctxt: an XML parser context
3737 *
3738 * : parse the Attribute list def for an element
3739 *
3740 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3741 *
3742 * [53] AttDef ::= S Name S AttType S DefaultDecl
3743 *
3744 */
3745void
3746xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3747 xmlChar *elemName;
3748 xmlChar *attrName;
3749 xmlEnumerationPtr tree;
3750
3751 if ((RAW == '<') && (NXT(1) == '!') &&
3752 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3753 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3754 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3755 (NXT(8) == 'T')) {
3756 xmlParserInputPtr input = ctxt->input;
3757
3758 SKIP(9);
3759 if (!IS_BLANK(CUR)) {
3760 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3762 ctxt->sax->error(ctxt->userData,
3763 "Space required after '<!ATTLIST'\n");
3764 ctxt->wellFormed = 0;
3765 ctxt->disableSAX = 1;
3766 }
3767 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003768 elemName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003769 if (elemName == NULL) {
3770 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3771 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3772 ctxt->sax->error(ctxt->userData,
3773 "ATTLIST: no name for Element\n");
3774 ctxt->wellFormed = 0;
3775 ctxt->disableSAX = 1;
3776 return;
3777 }
3778 SKIP_BLANKS;
3779 GROW;
3780 while (RAW != '>') {
3781 const xmlChar *check = CUR_PTR;
3782 int type;
3783 int def;
3784 xmlChar *defaultValue = NULL;
3785
3786 GROW;
3787 tree = NULL;
Daniel Veillard29631a82001-03-05 09:49:20 +00003788 attrName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003789 if (attrName == NULL) {
3790 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3792 ctxt->sax->error(ctxt->userData,
3793 "ATTLIST: no name for Attribute\n");
3794 ctxt->wellFormed = 0;
3795 ctxt->disableSAX = 1;
3796 break;
3797 }
3798 GROW;
3799 if (!IS_BLANK(CUR)) {
3800 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3802 ctxt->sax->error(ctxt->userData,
3803 "Space required after the attribute name\n");
3804 ctxt->wellFormed = 0;
3805 ctxt->disableSAX = 1;
3806 if (attrName != NULL)
3807 xmlFree(attrName);
3808 if (defaultValue != NULL)
3809 xmlFree(defaultValue);
3810 break;
3811 }
3812 SKIP_BLANKS;
3813
3814 type = xmlParseAttributeType(ctxt, &tree);
3815 if (type <= 0) {
3816 if (attrName != NULL)
3817 xmlFree(attrName);
3818 if (defaultValue != NULL)
3819 xmlFree(defaultValue);
3820 break;
3821 }
3822
3823 GROW;
3824 if (!IS_BLANK(CUR)) {
3825 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3827 ctxt->sax->error(ctxt->userData,
3828 "Space required after the attribute type\n");
3829 ctxt->wellFormed = 0;
3830 ctxt->disableSAX = 1;
3831 if (attrName != NULL)
3832 xmlFree(attrName);
3833 if (defaultValue != NULL)
3834 xmlFree(defaultValue);
3835 if (tree != NULL)
3836 xmlFreeEnumeration(tree);
3837 break;
3838 }
3839 SKIP_BLANKS;
3840
3841 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3842 if (def <= 0) {
3843 if (attrName != NULL)
3844 xmlFree(attrName);
3845 if (defaultValue != NULL)
3846 xmlFree(defaultValue);
3847 if (tree != NULL)
3848 xmlFreeEnumeration(tree);
3849 break;
3850 }
3851
3852 GROW;
3853 if (RAW != '>') {
3854 if (!IS_BLANK(CUR)) {
3855 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3857 ctxt->sax->error(ctxt->userData,
3858 "Space required after the attribute default value\n");
3859 ctxt->wellFormed = 0;
3860 ctxt->disableSAX = 1;
3861 if (attrName != NULL)
3862 xmlFree(attrName);
3863 if (defaultValue != NULL)
3864 xmlFree(defaultValue);
3865 if (tree != NULL)
3866 xmlFreeEnumeration(tree);
3867 break;
3868 }
3869 SKIP_BLANKS;
3870 }
3871 if (check == CUR_PTR) {
3872 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3874 ctxt->sax->error(ctxt->userData,
3875 "xmlParseAttributeListDecl: detected internal error\n");
3876 if (attrName != NULL)
3877 xmlFree(attrName);
3878 if (defaultValue != NULL)
3879 xmlFree(defaultValue);
3880 if (tree != NULL)
3881 xmlFreeEnumeration(tree);
3882 break;
3883 }
3884 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3885 (ctxt->sax->attributeDecl != NULL))
3886 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3887 type, def, defaultValue, tree);
3888 if (attrName != NULL)
3889 xmlFree(attrName);
3890 if (defaultValue != NULL)
3891 xmlFree(defaultValue);
3892 GROW;
3893 }
3894 if (RAW == '>') {
3895 if (input != ctxt->input) {
3896 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3898 ctxt->sax->error(ctxt->userData,
3899"Attribute list declaration doesn't start and stop in the same entity\n");
3900 ctxt->wellFormed = 0;
3901 ctxt->disableSAX = 1;
3902 }
3903 NEXT;
3904 }
3905
3906 xmlFree(elemName);
3907 }
3908}
3909
3910/**
3911 * xmlParseElementMixedContentDecl:
3912 * @ctxt: an XML parser context
3913 *
3914 * parse the declaration for a Mixed Element content
3915 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3916 *
3917 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3918 * '(' S? '#PCDATA' S? ')'
3919 *
3920 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3921 *
3922 * [ VC: No Duplicate Types ]
3923 * The same name must not appear more than once in a single
3924 * mixed-content declaration.
3925 *
3926 * returns: the list of the xmlElementContentPtr describing the element choices
3927 */
3928xmlElementContentPtr
3929xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3930 xmlElementContentPtr ret = NULL, cur = NULL, n;
3931 xmlChar *elem = NULL;
3932
3933 GROW;
3934 if ((RAW == '#') && (NXT(1) == 'P') &&
3935 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3936 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3937 (NXT(6) == 'A')) {
3938 SKIP(7);
3939 SKIP_BLANKS;
3940 SHRINK;
3941 if (RAW == ')') {
3942 ctxt->entity = ctxt->input;
3943 NEXT;
3944 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3945 if (RAW == '*') {
3946 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3947 NEXT;
3948 }
3949 return(ret);
3950 }
3951 if ((RAW == '(') || (RAW == '|')) {
3952 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3953 if (ret == NULL) return(NULL);
3954 }
3955 while (RAW == '|') {
3956 NEXT;
3957 if (elem == NULL) {
3958 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3959 if (ret == NULL) return(NULL);
3960 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003961 if (cur != NULL)
3962 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003963 cur = ret;
3964 } else {
3965 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3966 if (n == NULL) return(NULL);
3967 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003968 if (n->c1 != NULL)
3969 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00003970 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003971 if (n != NULL)
3972 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003973 cur = n;
3974 xmlFree(elem);
3975 }
3976 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003977 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003978 if (elem == NULL) {
3979 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3981 ctxt->sax->error(ctxt->userData,
3982 "xmlParseElementMixedContentDecl : Name expected\n");
3983 ctxt->wellFormed = 0;
3984 ctxt->disableSAX = 1;
3985 xmlFreeElementContent(cur);
3986 return(NULL);
3987 }
3988 SKIP_BLANKS;
3989 GROW;
3990 }
3991 if ((RAW == ')') && (NXT(1) == '*')) {
3992 if (elem != NULL) {
3993 cur->c2 = xmlNewElementContent(elem,
3994 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003995 if (cur->c2 != NULL)
3996 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003997 xmlFree(elem);
3998 }
3999 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4000 ctxt->entity = ctxt->input;
4001 SKIP(2);
4002 } else {
4003 if (elem != NULL) xmlFree(elem);
4004 xmlFreeElementContent(ret);
4005 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4007 ctxt->sax->error(ctxt->userData,
4008 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4009 ctxt->wellFormed = 0;
4010 ctxt->disableSAX = 1;
4011 return(NULL);
4012 }
4013
4014 } else {
4015 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4017 ctxt->sax->error(ctxt->userData,
4018 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4019 ctxt->wellFormed = 0;
4020 ctxt->disableSAX = 1;
4021 }
4022 return(ret);
4023}
4024
4025/**
4026 * xmlParseElementChildrenContentDecl:
4027 * @ctxt: an XML parser context
4028 *
4029 * parse the declaration for a Mixed Element content
4030 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4031 *
4032 *
4033 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4034 *
4035 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4036 *
4037 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4038 *
4039 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4040 *
4041 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4042 * TODO Parameter-entity replacement text must be properly nested
4043 * with parenthetized groups. That is to say, if either of the
4044 * opening or closing parentheses in a choice, seq, or Mixed
4045 * construct is contained in the replacement text for a parameter
4046 * entity, both must be contained in the same replacement text. For
4047 * interoperability, if a parameter-entity reference appears in a
4048 * choice, seq, or Mixed construct, its replacement text should not
4049 * be empty, and neither the first nor last non-blank character of
4050 * the replacement text should be a connector (| or ,).
4051 *
4052 * returns: the tree of xmlElementContentPtr describing the element
4053 * hierarchy.
4054 */
4055xmlElementContentPtr
4056#ifdef VMS
4057xmlParseElementChildrenContentD
4058#else
4059xmlParseElementChildrenContentDecl
4060#endif
4061(xmlParserCtxtPtr ctxt) {
4062 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4063 xmlChar *elem;
4064 xmlChar type = 0;
4065
4066 SKIP_BLANKS;
4067 GROW;
4068 if (RAW == '(') {
4069 /* Recurse on first child */
4070 NEXT;
4071 SKIP_BLANKS;
4072 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4073 SKIP_BLANKS;
4074 GROW;
4075 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004076 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004077 if (elem == NULL) {
4078 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4080 ctxt->sax->error(ctxt->userData,
4081 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4082 ctxt->wellFormed = 0;
4083 ctxt->disableSAX = 1;
4084 return(NULL);
4085 }
4086 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4087 GROW;
4088 if (RAW == '?') {
4089 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4090 NEXT;
4091 } else if (RAW == '*') {
4092 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4093 NEXT;
4094 } else if (RAW == '+') {
4095 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4096 NEXT;
4097 } else {
4098 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4099 }
4100 xmlFree(elem);
4101 GROW;
4102 }
4103 SKIP_BLANKS;
4104 SHRINK;
4105 while (RAW != ')') {
4106 /*
4107 * Each loop we parse one separator and one element.
4108 */
4109 if (RAW == ',') {
4110 if (type == 0) type = CUR;
4111
4112 /*
4113 * Detect "Name | Name , Name" error
4114 */
4115 else if (type != CUR) {
4116 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4118 ctxt->sax->error(ctxt->userData,
4119 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4120 type);
4121 ctxt->wellFormed = 0;
4122 ctxt->disableSAX = 1;
4123 if ((op != NULL) && (op != ret))
4124 xmlFreeElementContent(op);
4125 if ((last != NULL) && (last != ret) &&
4126 (last != ret->c1) && (last != ret->c2))
4127 xmlFreeElementContent(last);
4128 if (ret != NULL)
4129 xmlFreeElementContent(ret);
4130 return(NULL);
4131 }
4132 NEXT;
4133
4134 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4135 if (op == NULL) {
4136 xmlFreeElementContent(ret);
4137 return(NULL);
4138 }
4139 if (last == NULL) {
4140 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004141 if (ret != NULL)
4142 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004143 ret = cur = op;
4144 } else {
4145 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004146 if (op != NULL)
4147 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004148 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004149 if (last != NULL)
4150 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004151 cur =op;
4152 last = NULL;
4153 }
4154 } else if (RAW == '|') {
4155 if (type == 0) type = CUR;
4156
4157 /*
4158 * Detect "Name , Name | Name" error
4159 */
4160 else if (type != CUR) {
4161 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4163 ctxt->sax->error(ctxt->userData,
4164 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4165 type);
4166 ctxt->wellFormed = 0;
4167 ctxt->disableSAX = 1;
4168 if ((op != NULL) && (op != ret) && (op != last))
4169 xmlFreeElementContent(op);
4170 if ((last != NULL) && (last != ret) &&
4171 (last != ret->c1) && (last != ret->c2))
4172 xmlFreeElementContent(last);
4173 if (ret != NULL)
4174 xmlFreeElementContent(ret);
4175 return(NULL);
4176 }
4177 NEXT;
4178
4179 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4180 if (op == NULL) {
4181 if ((op != NULL) && (op != ret))
4182 xmlFreeElementContent(op);
4183 if ((last != NULL) && (last != ret) &&
4184 (last != ret->c1) && (last != ret->c2))
4185 xmlFreeElementContent(last);
4186 if (ret != NULL)
4187 xmlFreeElementContent(ret);
4188 return(NULL);
4189 }
4190 if (last == NULL) {
4191 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004192 if (ret != NULL)
4193 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004194 ret = cur = op;
4195 } else {
4196 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004197 if (op != NULL)
4198 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004199 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004200 if (last != NULL)
4201 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004202 cur =op;
4203 last = NULL;
4204 }
4205 } else {
4206 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4208 ctxt->sax->error(ctxt->userData,
4209 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4210 ctxt->wellFormed = 0;
4211 ctxt->disableSAX = 1;
4212 if ((op != NULL) && (op != ret))
4213 xmlFreeElementContent(op);
4214 if ((last != NULL) && (last != ret) &&
4215 (last != ret->c1) && (last != ret->c2))
4216 xmlFreeElementContent(last);
4217 if (ret != NULL)
4218 xmlFreeElementContent(ret);
4219 return(NULL);
4220 }
4221 GROW;
4222 SKIP_BLANKS;
4223 GROW;
4224 if (RAW == '(') {
4225 /* Recurse on second child */
4226 NEXT;
4227 SKIP_BLANKS;
4228 last = xmlParseElementChildrenContentDecl(ctxt);
4229 SKIP_BLANKS;
4230 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004231 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004232 if (elem == NULL) {
4233 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4234 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4235 ctxt->sax->error(ctxt->userData,
4236 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4237 ctxt->wellFormed = 0;
4238 ctxt->disableSAX = 1;
4239 if ((op != NULL) && (op != ret))
4240 xmlFreeElementContent(op);
4241 if ((last != NULL) && (last != ret) &&
4242 (last != ret->c1) && (last != ret->c2))
4243 xmlFreeElementContent(last);
4244 if (ret != NULL)
4245 xmlFreeElementContent(ret);
4246 return(NULL);
4247 }
4248 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4249 xmlFree(elem);
4250 if (RAW == '?') {
4251 last->ocur = XML_ELEMENT_CONTENT_OPT;
4252 NEXT;
4253 } else if (RAW == '*') {
4254 last->ocur = XML_ELEMENT_CONTENT_MULT;
4255 NEXT;
4256 } else if (RAW == '+') {
4257 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4258 NEXT;
4259 } else {
4260 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4261 }
4262 }
4263 SKIP_BLANKS;
4264 GROW;
4265 }
4266 if ((cur != NULL) && (last != NULL)) {
4267 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004268 if (last != NULL)
4269 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004270 }
4271 ctxt->entity = ctxt->input;
4272 NEXT;
4273 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004274 if (ret != NULL)
4275 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004276 NEXT;
4277 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004278 if (ret != NULL)
4279 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004280 NEXT;
4281 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004282 if (ret != NULL)
4283 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004284 NEXT;
4285 }
4286 return(ret);
4287}
4288
4289/**
4290 * xmlParseElementContentDecl:
4291 * @ctxt: an XML parser context
4292 * @name: the name of the element being defined.
4293 * @result: the Element Content pointer will be stored here if any
4294 *
4295 * parse the declaration for an Element content either Mixed or Children,
4296 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4297 *
4298 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4299 *
4300 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4301 */
4302
4303int
4304xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4305 xmlElementContentPtr *result) {
4306
4307 xmlElementContentPtr tree = NULL;
4308 xmlParserInputPtr input = ctxt->input;
4309 int res;
4310
4311 *result = NULL;
4312
4313 if (RAW != '(') {
4314 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4316 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004317 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004318 ctxt->wellFormed = 0;
4319 ctxt->disableSAX = 1;
4320 return(-1);
4321 }
4322 NEXT;
4323 GROW;
4324 SKIP_BLANKS;
4325 if ((RAW == '#') && (NXT(1) == 'P') &&
4326 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4327 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4328 (NXT(6) == 'A')) {
4329 tree = xmlParseElementMixedContentDecl(ctxt);
4330 res = XML_ELEMENT_TYPE_MIXED;
4331 } else {
4332 tree = xmlParseElementChildrenContentDecl(ctxt);
4333 res = XML_ELEMENT_TYPE_ELEMENT;
4334 }
4335 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4336 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4338 ctxt->sax->error(ctxt->userData,
4339"Element content declaration doesn't start and stop in the same entity\n");
4340 ctxt->wellFormed = 0;
4341 ctxt->disableSAX = 1;
4342 }
4343 SKIP_BLANKS;
4344 *result = tree;
4345 return(res);
4346}
4347
4348/**
4349 * xmlParseElementDecl:
4350 * @ctxt: an XML parser context
4351 *
4352 * parse an Element declaration.
4353 *
4354 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4355 *
4356 * [ VC: Unique Element Type Declaration ]
4357 * No element type may be declared more than once
4358 *
4359 * Returns the type of the element, or -1 in case of error
4360 */
4361int
4362xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4363 xmlChar *name;
4364 int ret = -1;
4365 xmlElementContentPtr content = NULL;
4366
4367 GROW;
4368 if ((RAW == '<') && (NXT(1) == '!') &&
4369 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4370 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4371 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4372 (NXT(8) == 'T')) {
4373 xmlParserInputPtr input = ctxt->input;
4374
4375 SKIP(9);
4376 if (!IS_BLANK(CUR)) {
4377 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4378 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4379 ctxt->sax->error(ctxt->userData,
4380 "Space required after 'ELEMENT'\n");
4381 ctxt->wellFormed = 0;
4382 ctxt->disableSAX = 1;
4383 }
4384 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00004385 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004386 if (name == NULL) {
4387 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4389 ctxt->sax->error(ctxt->userData,
4390 "xmlParseElementDecl: no name for Element\n");
4391 ctxt->wellFormed = 0;
4392 ctxt->disableSAX = 1;
4393 return(-1);
4394 }
4395 while ((RAW == 0) && (ctxt->inputNr > 1))
4396 xmlPopInput(ctxt);
4397 if (!IS_BLANK(CUR)) {
4398 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4399 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4400 ctxt->sax->error(ctxt->userData,
4401 "Space required after the element name\n");
4402 ctxt->wellFormed = 0;
4403 ctxt->disableSAX = 1;
4404 }
4405 SKIP_BLANKS;
4406 if ((RAW == 'E') && (NXT(1) == 'M') &&
4407 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4408 (NXT(4) == 'Y')) {
4409 SKIP(5);
4410 /*
4411 * Element must always be empty.
4412 */
4413 ret = XML_ELEMENT_TYPE_EMPTY;
4414 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4415 (NXT(2) == 'Y')) {
4416 SKIP(3);
4417 /*
4418 * Element is a generic container.
4419 */
4420 ret = XML_ELEMENT_TYPE_ANY;
4421 } else if (RAW == '(') {
4422 ret = xmlParseElementContentDecl(ctxt, name, &content);
4423 } else {
4424 /*
4425 * [ WFC: PEs in Internal Subset ] error handling.
4426 */
4427 if ((RAW == '%') && (ctxt->external == 0) &&
4428 (ctxt->inputNr == 1)) {
4429 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4431 ctxt->sax->error(ctxt->userData,
4432 "PEReference: forbidden within markup decl in internal subset\n");
4433 } else {
4434 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4436 ctxt->sax->error(ctxt->userData,
4437 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4438 }
4439 ctxt->wellFormed = 0;
4440 ctxt->disableSAX = 1;
4441 if (name != NULL) xmlFree(name);
4442 return(-1);
4443 }
4444
4445 SKIP_BLANKS;
4446 /*
4447 * Pop-up of finished entities.
4448 */
4449 while ((RAW == 0) && (ctxt->inputNr > 1))
4450 xmlPopInput(ctxt);
4451 SKIP_BLANKS;
4452
4453 if (RAW != '>') {
4454 ctxt->errNo = XML_ERR_GT_REQUIRED;
4455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4456 ctxt->sax->error(ctxt->userData,
4457 "xmlParseElementDecl: expected '>' at the end\n");
4458 ctxt->wellFormed = 0;
4459 ctxt->disableSAX = 1;
4460 } else {
4461 if (input != ctxt->input) {
4462 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4463 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4464 ctxt->sax->error(ctxt->userData,
4465"Element declaration doesn't start and stop in the same entity\n");
4466 ctxt->wellFormed = 0;
4467 ctxt->disableSAX = 1;
4468 }
4469
4470 NEXT;
4471 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4472 (ctxt->sax->elementDecl != NULL))
4473 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4474 content);
4475 }
4476 if (content != NULL) {
4477 xmlFreeElementContent(content);
4478 }
4479 if (name != NULL) {
4480 xmlFree(name);
4481 }
4482 }
4483 return(ret);
4484}
4485
4486/**
4487 * xmlParseMarkupDecl:
4488 * @ctxt: an XML parser context
4489 *
4490 * parse Markup declarations
4491 *
4492 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4493 * NotationDecl | PI | Comment
4494 *
4495 * [ VC: Proper Declaration/PE Nesting ]
4496 * Parameter-entity replacement text must be properly nested with
4497 * markup declarations. That is to say, if either the first character
4498 * or the last character of a markup declaration (markupdecl above) is
4499 * contained in the replacement text for a parameter-entity reference,
4500 * both must be contained in the same replacement text.
4501 *
4502 * [ WFC: PEs in Internal Subset ]
4503 * In the internal DTD subset, parameter-entity references can occur
4504 * only where markup declarations can occur, not within markup declarations.
4505 * (This does not apply to references that occur in external parameter
4506 * entities or to the external subset.)
4507 */
4508void
4509xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4510 GROW;
4511 xmlParseElementDecl(ctxt);
4512 xmlParseAttributeListDecl(ctxt);
4513 xmlParseEntityDecl(ctxt);
4514 xmlParseNotationDecl(ctxt);
4515 xmlParsePI(ctxt);
4516 xmlParseComment(ctxt);
4517 /*
4518 * This is only for internal subset. On external entities,
4519 * the replacement is done before parsing stage
4520 */
4521 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4522 xmlParsePEReference(ctxt);
4523 ctxt->instate = XML_PARSER_DTD;
4524}
4525
4526/**
4527 * xmlParseTextDecl:
4528 * @ctxt: an XML parser context
4529 *
4530 * parse an XML declaration header for external entities
4531 *
4532 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4533 *
4534 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4535 */
4536
4537void
4538xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4539 xmlChar *version;
4540
4541 /*
4542 * We know that '<?xml' is here.
4543 */
4544 if ((RAW == '<') && (NXT(1) == '?') &&
4545 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4546 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4547 SKIP(5);
4548 } else {
4549 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4551 ctxt->sax->error(ctxt->userData,
4552 "Text declaration '<?xml' required\n");
4553 ctxt->wellFormed = 0;
4554 ctxt->disableSAX = 1;
4555
4556 return;
4557 }
4558
4559 if (!IS_BLANK(CUR)) {
4560 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4562 ctxt->sax->error(ctxt->userData,
4563 "Space needed after '<?xml'\n");
4564 ctxt->wellFormed = 0;
4565 ctxt->disableSAX = 1;
4566 }
4567 SKIP_BLANKS;
4568
4569 /*
4570 * We may have the VersionInfo here.
4571 */
4572 version = xmlParseVersionInfo(ctxt);
4573 if (version == NULL)
4574 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4575 ctxt->input->version = version;
4576
4577 /*
4578 * We must have the encoding declaration
4579 */
4580 if (!IS_BLANK(CUR)) {
4581 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4582 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4583 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4584 ctxt->wellFormed = 0;
4585 ctxt->disableSAX = 1;
4586 }
4587 xmlParseEncodingDecl(ctxt);
4588 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4589 /*
4590 * The XML REC instructs us to stop parsing right here
4591 */
4592 return;
4593 }
4594
4595 SKIP_BLANKS;
4596 if ((RAW == '?') && (NXT(1) == '>')) {
4597 SKIP(2);
4598 } else if (RAW == '>') {
4599 /* Deprecated old WD ... */
4600 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4602 ctxt->sax->error(ctxt->userData,
4603 "XML declaration must end-up with '?>'\n");
4604 ctxt->wellFormed = 0;
4605 ctxt->disableSAX = 1;
4606 NEXT;
4607 } else {
4608 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4609 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4610 ctxt->sax->error(ctxt->userData,
4611 "parsing XML declaration: '?>' expected\n");
4612 ctxt->wellFormed = 0;
4613 ctxt->disableSAX = 1;
4614 MOVETO_ENDTAG(CUR_PTR);
4615 NEXT;
4616 }
4617}
4618
4619/*
4620 * xmlParseConditionalSections
4621 * @ctxt: an XML parser context
4622 *
4623 * [61] conditionalSect ::= includeSect | ignoreSect
4624 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4625 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4626 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4627 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4628 */
4629
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004630static void
Owen Taylor3473f882001-02-23 17:55:21 +00004631xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4632 SKIP(3);
4633 SKIP_BLANKS;
4634 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4635 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4636 (NXT(6) == 'E')) {
4637 SKIP(7);
4638 SKIP_BLANKS;
4639 if (RAW != '[') {
4640 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4642 ctxt->sax->error(ctxt->userData,
4643 "XML conditional section '[' expected\n");
4644 ctxt->wellFormed = 0;
4645 ctxt->disableSAX = 1;
4646 } else {
4647 NEXT;
4648 }
4649 if (xmlParserDebugEntities) {
4650 if ((ctxt->input != NULL) && (ctxt->input->filename))
4651 xmlGenericError(xmlGenericErrorContext,
4652 "%s(%d): ", ctxt->input->filename,
4653 ctxt->input->line);
4654 xmlGenericError(xmlGenericErrorContext,
4655 "Entering INCLUDE Conditional Section\n");
4656 }
4657
4658 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4659 (NXT(2) != '>'))) {
4660 const xmlChar *check = CUR_PTR;
4661 int cons = ctxt->input->consumed;
4662 int tok = ctxt->token;
4663
4664 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4665 xmlParseConditionalSections(ctxt);
4666 } else if (IS_BLANK(CUR)) {
4667 NEXT;
4668 } else if (RAW == '%') {
4669 xmlParsePEReference(ctxt);
4670 } else
4671 xmlParseMarkupDecl(ctxt);
4672
4673 /*
4674 * Pop-up of finished entities.
4675 */
4676 while ((RAW == 0) && (ctxt->inputNr > 1))
4677 xmlPopInput(ctxt);
4678
4679 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4680 (tok == ctxt->token)) {
4681 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4682 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4683 ctxt->sax->error(ctxt->userData,
4684 "Content error in the external subset\n");
4685 ctxt->wellFormed = 0;
4686 ctxt->disableSAX = 1;
4687 break;
4688 }
4689 }
4690 if (xmlParserDebugEntities) {
4691 if ((ctxt->input != NULL) && (ctxt->input->filename))
4692 xmlGenericError(xmlGenericErrorContext,
4693 "%s(%d): ", ctxt->input->filename,
4694 ctxt->input->line);
4695 xmlGenericError(xmlGenericErrorContext,
4696 "Leaving INCLUDE Conditional Section\n");
4697 }
4698
4699 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4700 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4701 int state;
4702 int instate;
4703 int depth = 0;
4704
4705 SKIP(6);
4706 SKIP_BLANKS;
4707 if (RAW != '[') {
4708 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4710 ctxt->sax->error(ctxt->userData,
4711 "XML conditional section '[' expected\n");
4712 ctxt->wellFormed = 0;
4713 ctxt->disableSAX = 1;
4714 } else {
4715 NEXT;
4716 }
4717 if (xmlParserDebugEntities) {
4718 if ((ctxt->input != NULL) && (ctxt->input->filename))
4719 xmlGenericError(xmlGenericErrorContext,
4720 "%s(%d): ", ctxt->input->filename,
4721 ctxt->input->line);
4722 xmlGenericError(xmlGenericErrorContext,
4723 "Entering IGNORE Conditional Section\n");
4724 }
4725
4726 /*
4727 * Parse up to the end of the conditionnal section
4728 * But disable SAX event generating DTD building in the meantime
4729 */
4730 state = ctxt->disableSAX;
4731 instate = ctxt->instate;
4732 ctxt->disableSAX = 1;
4733 ctxt->instate = XML_PARSER_IGNORE;
4734
4735 while (depth >= 0) {
4736 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4737 depth++;
4738 SKIP(3);
4739 continue;
4740 }
4741 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4742 if (--depth >= 0) SKIP(3);
4743 continue;
4744 }
4745 NEXT;
4746 continue;
4747 }
4748
4749 ctxt->disableSAX = state;
4750 ctxt->instate = instate;
4751
4752 if (xmlParserDebugEntities) {
4753 if ((ctxt->input != NULL) && (ctxt->input->filename))
4754 xmlGenericError(xmlGenericErrorContext,
4755 "%s(%d): ", ctxt->input->filename,
4756 ctxt->input->line);
4757 xmlGenericError(xmlGenericErrorContext,
4758 "Leaving IGNORE Conditional Section\n");
4759 }
4760
4761 } else {
4762 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4764 ctxt->sax->error(ctxt->userData,
4765 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4766 ctxt->wellFormed = 0;
4767 ctxt->disableSAX = 1;
4768 }
4769
4770 if (RAW == 0)
4771 SHRINK;
4772
4773 if (RAW == 0) {
4774 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4776 ctxt->sax->error(ctxt->userData,
4777 "XML conditional section not closed\n");
4778 ctxt->wellFormed = 0;
4779 ctxt->disableSAX = 1;
4780 } else {
4781 SKIP(3);
4782 }
4783}
4784
4785/**
4786 * xmlParseExternalSubset:
4787 * @ctxt: an XML parser context
4788 * @ExternalID: the external identifier
4789 * @SystemID: the system identifier (or URL)
4790 *
4791 * parse Markup declarations from an external subset
4792 *
4793 * [30] extSubset ::= textDecl? extSubsetDecl
4794 *
4795 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4796 */
4797void
4798xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4799 const xmlChar *SystemID) {
4800 GROW;
4801 if ((RAW == '<') && (NXT(1) == '?') &&
4802 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4803 (NXT(4) == 'l')) {
4804 xmlParseTextDecl(ctxt);
4805 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4806 /*
4807 * The XML REC instructs us to stop parsing right here
4808 */
4809 ctxt->instate = XML_PARSER_EOF;
4810 return;
4811 }
4812 }
4813 if (ctxt->myDoc == NULL) {
4814 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4815 }
4816 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4817 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4818
4819 ctxt->instate = XML_PARSER_DTD;
4820 ctxt->external = 1;
4821 while (((RAW == '<') && (NXT(1) == '?')) ||
4822 ((RAW == '<') && (NXT(1) == '!')) ||
4823 IS_BLANK(CUR)) {
4824 const xmlChar *check = CUR_PTR;
4825 int cons = ctxt->input->consumed;
4826 int tok = ctxt->token;
4827
4828 GROW;
4829 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4830 xmlParseConditionalSections(ctxt);
4831 } else if (IS_BLANK(CUR)) {
4832 NEXT;
4833 } else if (RAW == '%') {
4834 xmlParsePEReference(ctxt);
4835 } else
4836 xmlParseMarkupDecl(ctxt);
4837
4838 /*
4839 * Pop-up of finished entities.
4840 */
4841 while ((RAW == 0) && (ctxt->inputNr > 1))
4842 xmlPopInput(ctxt);
4843
4844 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4845 (tok == ctxt->token)) {
4846 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4847 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4848 ctxt->sax->error(ctxt->userData,
4849 "Content error in the external subset\n");
4850 ctxt->wellFormed = 0;
4851 ctxt->disableSAX = 1;
4852 break;
4853 }
4854 }
4855
4856 if (RAW != 0) {
4857 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4859 ctxt->sax->error(ctxt->userData,
4860 "Extra content at the end of the document\n");
4861 ctxt->wellFormed = 0;
4862 ctxt->disableSAX = 1;
4863 }
4864
4865}
4866
4867/**
4868 * xmlParseReference:
4869 * @ctxt: an XML parser context
4870 *
4871 * parse and handle entity references in content, depending on the SAX
4872 * interface, this may end-up in a call to character() if this is a
4873 * CharRef, a predefined entity, if there is no reference() callback.
4874 * or if the parser was asked to switch to that mode.
4875 *
4876 * [67] Reference ::= EntityRef | CharRef
4877 */
4878void
4879xmlParseReference(xmlParserCtxtPtr ctxt) {
4880 xmlEntityPtr ent;
4881 xmlChar *val;
4882 if (RAW != '&') return;
4883
4884 if (NXT(1) == '#') {
4885 int i = 0;
4886 xmlChar out[10];
4887 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004888 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004889
4890 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4891 /*
4892 * So we are using non-UTF-8 buffers
4893 * Check that the char fit on 8bits, if not
4894 * generate a CharRef.
4895 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004896 if (value <= 0xFF) {
4897 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004898 out[1] = 0;
4899 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4900 (!ctxt->disableSAX))
4901 ctxt->sax->characters(ctxt->userData, out, 1);
4902 } else {
4903 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004904 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004905 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004906 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004907 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4908 (!ctxt->disableSAX))
4909 ctxt->sax->reference(ctxt->userData, out);
4910 }
4911 } else {
4912 /*
4913 * Just encode the value in UTF-8
4914 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004915 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00004916 out[i] = 0;
4917 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4918 (!ctxt->disableSAX))
4919 ctxt->sax->characters(ctxt->userData, out, i);
4920 }
4921 } else {
4922 ent = xmlParseEntityRef(ctxt);
4923 if (ent == NULL) return;
4924 if ((ent->name != NULL) &&
4925 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4926 xmlNodePtr list = NULL;
4927 int ret;
4928
4929
4930 /*
4931 * The first reference to the entity trigger a parsing phase
4932 * where the ent->children is filled with the result from
4933 * the parsing.
4934 */
4935 if (ent->children == NULL) {
4936 xmlChar *value;
4937 value = ent->content;
4938
4939 /*
4940 * Check that this entity is well formed
4941 */
4942 if ((value != NULL) &&
4943 (value[1] == 0) && (value[0] == '<') &&
4944 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4945 /*
4946 * DONE: get definite answer on this !!!
4947 * Lots of entity decls are used to declare a single
4948 * char
4949 * <!ENTITY lt "<">
4950 * Which seems to be valid since
4951 * 2.4: The ampersand character (&) and the left angle
4952 * bracket (<) may appear in their literal form only
4953 * when used ... They are also legal within the literal
4954 * entity value of an internal entity declaration;i
4955 * see "4.3.2 Well-Formed Parsed Entities".
4956 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4957 * Looking at the OASIS test suite and James Clark
4958 * tests, this is broken. However the XML REC uses
4959 * it. Is the XML REC not well-formed ????
4960 * This is a hack to avoid this problem
4961 *
4962 * ANSWER: since lt gt amp .. are already defined,
4963 * this is a redefinition and hence the fact that the
4964 * contentis not well balanced is not a Wf error, this
4965 * is lousy but acceptable.
4966 */
4967 list = xmlNewDocText(ctxt->myDoc, value);
4968 if (list != NULL) {
4969 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4970 (ent->children == NULL)) {
4971 ent->children = list;
4972 ent->last = list;
4973 list->parent = (xmlNodePtr) ent;
4974 } else {
4975 xmlFreeNodeList(list);
4976 }
4977 } else if (list != NULL) {
4978 xmlFreeNodeList(list);
4979 }
4980 } else {
4981 /*
4982 * 4.3.2: An internal general parsed entity is well-formed
4983 * if its replacement text matches the production labeled
4984 * content.
4985 */
4986 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4987 ctxt->depth++;
4988 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
4989 ctxt->sax, NULL, ctxt->depth,
4990 value, &list);
4991 ctxt->depth--;
4992 } else if (ent->etype ==
4993 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4994 ctxt->depth++;
4995 ret = xmlParseExternalEntity(ctxt->myDoc,
4996 ctxt->sax, NULL, ctxt->depth,
4997 ent->URI, ent->ExternalID, &list);
4998 ctxt->depth--;
4999 } else {
5000 ret = -1;
5001 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5002 ctxt->sax->error(ctxt->userData,
5003 "Internal: invalid entity type\n");
5004 }
5005 if (ret == XML_ERR_ENTITY_LOOP) {
5006 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5008 ctxt->sax->error(ctxt->userData,
5009 "Detected entity reference loop\n");
5010 ctxt->wellFormed = 0;
5011 ctxt->disableSAX = 1;
5012 } else if ((ret == 0) && (list != NULL)) {
5013 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5014 (ent->children == NULL)) {
5015 ent->children = list;
5016 while (list != NULL) {
5017 list->parent = (xmlNodePtr) ent;
5018 if (list->next == NULL)
5019 ent->last = list;
5020 list = list->next;
5021 }
5022 } else {
5023 xmlFreeNodeList(list);
5024 }
5025 } else if (ret > 0) {
5026 ctxt->errNo = ret;
5027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5028 ctxt->sax->error(ctxt->userData,
5029 "Entity value required\n");
5030 ctxt->wellFormed = 0;
5031 ctxt->disableSAX = 1;
5032 } else if (list != NULL) {
5033 xmlFreeNodeList(list);
5034 }
5035 }
5036 }
5037 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5038 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5039 /*
5040 * Create a node.
5041 */
5042 ctxt->sax->reference(ctxt->userData, ent->name);
5043 return;
5044 } else if (ctxt->replaceEntities) {
5045 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5046 /*
5047 * Seems we are generating the DOM content, do
5048 * a simple tree copy
5049 */
5050 xmlNodePtr new;
5051 new = xmlCopyNodeList(ent->children);
5052
5053 xmlAddChildList(ctxt->node, new);
5054 /*
5055 * This is to avoid a nasty side effect, see
5056 * characters() in SAX.c
5057 */
5058 ctxt->nodemem = 0;
5059 ctxt->nodelen = 0;
5060 return;
5061 } else {
5062 /*
5063 * Probably running in SAX mode
5064 */
5065 xmlParserInputPtr input;
5066
5067 input = xmlNewEntityInputStream(ctxt, ent);
5068 xmlPushInput(ctxt, input);
5069 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5070 (RAW == '<') && (NXT(1) == '?') &&
5071 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5072 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5073 xmlParseTextDecl(ctxt);
5074 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5075 /*
5076 * The XML REC instructs us to stop parsing right here
5077 */
5078 ctxt->instate = XML_PARSER_EOF;
5079 return;
5080 }
5081 if (input->standalone == 1) {
5082 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5084 ctxt->sax->error(ctxt->userData,
5085 "external parsed entities cannot be standalone\n");
5086 ctxt->wellFormed = 0;
5087 ctxt->disableSAX = 1;
5088 }
5089 }
5090 return;
5091 }
5092 }
5093 } else {
5094 val = ent->content;
5095 if (val == NULL) return;
5096 /*
5097 * inline the entity.
5098 */
5099 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5100 (!ctxt->disableSAX))
5101 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5102 }
5103 }
5104}
5105
5106/**
5107 * xmlParseEntityRef:
5108 * @ctxt: an XML parser context
5109 *
5110 * parse ENTITY references declarations
5111 *
5112 * [68] EntityRef ::= '&' Name ';'
5113 *
5114 * [ WFC: Entity Declared ]
5115 * In a document without any DTD, a document with only an internal DTD
5116 * subset which contains no parameter entity references, or a document
5117 * with "standalone='yes'", the Name given in the entity reference
5118 * must match that in an entity declaration, except that well-formed
5119 * documents need not declare any of the following entities: amp, lt,
5120 * gt, apos, quot. The declaration of a parameter entity must precede
5121 * any reference to it. Similarly, the declaration of a general entity
5122 * must precede any reference to it which appears in a default value in an
5123 * attribute-list declaration. Note that if entities are declared in the
5124 * external subset or in external parameter entities, a non-validating
5125 * processor is not obligated to read and process their declarations;
5126 * for such documents, the rule that an entity must be declared is a
5127 * well-formedness constraint only if standalone='yes'.
5128 *
5129 * [ WFC: Parsed Entity ]
5130 * An entity reference must not contain the name of an unparsed entity
5131 *
5132 * Returns the xmlEntityPtr if found, or NULL otherwise.
5133 */
5134xmlEntityPtr
5135xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5136 xmlChar *name;
5137 xmlEntityPtr ent = NULL;
5138
5139 GROW;
5140
5141 if (RAW == '&') {
5142 NEXT;
5143 name = xmlParseName(ctxt);
5144 if (name == NULL) {
5145 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5146 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5147 ctxt->sax->error(ctxt->userData,
5148 "xmlParseEntityRef: no name\n");
5149 ctxt->wellFormed = 0;
5150 ctxt->disableSAX = 1;
5151 } else {
5152 if (RAW == ';') {
5153 NEXT;
5154 /*
5155 * Ask first SAX for entity resolution, otherwise try the
5156 * predefined set.
5157 */
5158 if (ctxt->sax != NULL) {
5159 if (ctxt->sax->getEntity != NULL)
5160 ent = ctxt->sax->getEntity(ctxt->userData, name);
5161 if (ent == NULL)
5162 ent = xmlGetPredefinedEntity(name);
5163 }
5164 /*
5165 * [ WFC: Entity Declared ]
5166 * In a document without any DTD, a document with only an
5167 * internal DTD subset which contains no parameter entity
5168 * references, or a document with "standalone='yes'", the
5169 * Name given in the entity reference must match that in an
5170 * entity declaration, except that well-formed documents
5171 * need not declare any of the following entities: amp, lt,
5172 * gt, apos, quot.
5173 * The declaration of a parameter entity must precede any
5174 * reference to it.
5175 * Similarly, the declaration of a general entity must
5176 * precede any reference to it which appears in a default
5177 * value in an attribute-list declaration. Note that if
5178 * entities are declared in the external subset or in
5179 * external parameter entities, a non-validating processor
5180 * is not obligated to read and process their declarations;
5181 * for such documents, the rule that an entity must be
5182 * declared is a well-formedness constraint only if
5183 * standalone='yes'.
5184 */
5185 if (ent == NULL) {
5186 if ((ctxt->standalone == 1) ||
5187 ((ctxt->hasExternalSubset == 0) &&
5188 (ctxt->hasPErefs == 0))) {
5189 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5191 ctxt->sax->error(ctxt->userData,
5192 "Entity '%s' not defined\n", name);
5193 ctxt->wellFormed = 0;
5194 ctxt->disableSAX = 1;
5195 } else {
5196 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5197 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5198 ctxt->sax->warning(ctxt->userData,
5199 "Entity '%s' not defined\n", name);
5200 }
5201 }
5202
5203 /*
5204 * [ WFC: Parsed Entity ]
5205 * An entity reference must not contain the name of an
5206 * unparsed entity
5207 */
5208 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5209 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5210 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5211 ctxt->sax->error(ctxt->userData,
5212 "Entity reference to unparsed entity %s\n", name);
5213 ctxt->wellFormed = 0;
5214 ctxt->disableSAX = 1;
5215 }
5216
5217 /*
5218 * [ WFC: No External Entity References ]
5219 * Attribute values cannot contain direct or indirect
5220 * entity references to external entities.
5221 */
5222 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5223 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5224 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5226 ctxt->sax->error(ctxt->userData,
5227 "Attribute references external entity '%s'\n", name);
5228 ctxt->wellFormed = 0;
5229 ctxt->disableSAX = 1;
5230 }
5231 /*
5232 * [ WFC: No < in Attribute Values ]
5233 * The replacement text of any entity referred to directly or
5234 * indirectly in an attribute value (other than "&lt;") must
5235 * not contain a <.
5236 */
5237 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5238 (ent != NULL) &&
5239 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5240 (ent->content != NULL) &&
5241 (xmlStrchr(ent->content, '<'))) {
5242 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5244 ctxt->sax->error(ctxt->userData,
5245 "'<' in entity '%s' is not allowed in attributes values\n", name);
5246 ctxt->wellFormed = 0;
5247 ctxt->disableSAX = 1;
5248 }
5249
5250 /*
5251 * Internal check, no parameter entities here ...
5252 */
5253 else {
5254 switch (ent->etype) {
5255 case XML_INTERNAL_PARAMETER_ENTITY:
5256 case XML_EXTERNAL_PARAMETER_ENTITY:
5257 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5258 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5259 ctxt->sax->error(ctxt->userData,
5260 "Attempt to reference the parameter entity '%s'\n", name);
5261 ctxt->wellFormed = 0;
5262 ctxt->disableSAX = 1;
5263 break;
5264 default:
5265 break;
5266 }
5267 }
5268
5269 /*
5270 * [ WFC: No Recursion ]
5271 * A parsed entity must not contain a recursive reference
5272 * to itself, either directly or indirectly.
5273 * Done somewhere else
5274 */
5275
5276 } else {
5277 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5279 ctxt->sax->error(ctxt->userData,
5280 "xmlParseEntityRef: expecting ';'\n");
5281 ctxt->wellFormed = 0;
5282 ctxt->disableSAX = 1;
5283 }
5284 xmlFree(name);
5285 }
5286 }
5287 return(ent);
5288}
5289
5290/**
5291 * xmlParseStringEntityRef:
5292 * @ctxt: an XML parser context
5293 * @str: a pointer to an index in the string
5294 *
5295 * parse ENTITY references declarations, but this version parses it from
5296 * a string value.
5297 *
5298 * [68] EntityRef ::= '&' Name ';'
5299 *
5300 * [ WFC: Entity Declared ]
5301 * In a document without any DTD, a document with only an internal DTD
5302 * subset which contains no parameter entity references, or a document
5303 * with "standalone='yes'", the Name given in the entity reference
5304 * must match that in an entity declaration, except that well-formed
5305 * documents need not declare any of the following entities: amp, lt,
5306 * gt, apos, quot. The declaration of a parameter entity must precede
5307 * any reference to it. Similarly, the declaration of a general entity
5308 * must precede any reference to it which appears in a default value in an
5309 * attribute-list declaration. Note that if entities are declared in the
5310 * external subset or in external parameter entities, a non-validating
5311 * processor is not obligated to read and process their declarations;
5312 * for such documents, the rule that an entity must be declared is a
5313 * well-formedness constraint only if standalone='yes'.
5314 *
5315 * [ WFC: Parsed Entity ]
5316 * An entity reference must not contain the name of an unparsed entity
5317 *
5318 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5319 * is updated to the current location in the string.
5320 */
5321xmlEntityPtr
5322xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5323 xmlChar *name;
5324 const xmlChar *ptr;
5325 xmlChar cur;
5326 xmlEntityPtr ent = NULL;
5327
5328 if ((str == NULL) || (*str == NULL))
5329 return(NULL);
5330 ptr = *str;
5331 cur = *ptr;
5332 if (cur == '&') {
5333 ptr++;
5334 cur = *ptr;
5335 name = xmlParseStringName(ctxt, &ptr);
5336 if (name == NULL) {
5337 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5339 ctxt->sax->error(ctxt->userData,
5340 "xmlParseEntityRef: no name\n");
5341 ctxt->wellFormed = 0;
5342 ctxt->disableSAX = 1;
5343 } else {
5344 if (*ptr == ';') {
5345 ptr++;
5346 /*
5347 * Ask first SAX for entity resolution, otherwise try the
5348 * predefined set.
5349 */
5350 if (ctxt->sax != NULL) {
5351 if (ctxt->sax->getEntity != NULL)
5352 ent = ctxt->sax->getEntity(ctxt->userData, name);
5353 if (ent == NULL)
5354 ent = xmlGetPredefinedEntity(name);
5355 }
5356 /*
5357 * [ WFC: Entity Declared ]
5358 * In a document without any DTD, a document with only an
5359 * internal DTD subset which contains no parameter entity
5360 * references, or a document with "standalone='yes'", the
5361 * Name given in the entity reference must match that in an
5362 * entity declaration, except that well-formed documents
5363 * need not declare any of the following entities: amp, lt,
5364 * gt, apos, quot.
5365 * The declaration of a parameter entity must precede any
5366 * reference to it.
5367 * Similarly, the declaration of a general entity must
5368 * precede any reference to it which appears in a default
5369 * value in an attribute-list declaration. Note that if
5370 * entities are declared in the external subset or in
5371 * external parameter entities, a non-validating processor
5372 * is not obligated to read and process their declarations;
5373 * for such documents, the rule that an entity must be
5374 * declared is a well-formedness constraint only if
5375 * standalone='yes'.
5376 */
5377 if (ent == NULL) {
5378 if ((ctxt->standalone == 1) ||
5379 ((ctxt->hasExternalSubset == 0) &&
5380 (ctxt->hasPErefs == 0))) {
5381 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5383 ctxt->sax->error(ctxt->userData,
5384 "Entity '%s' not defined\n", name);
5385 ctxt->wellFormed = 0;
5386 ctxt->disableSAX = 1;
5387 } else {
5388 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5389 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5390 ctxt->sax->warning(ctxt->userData,
5391 "Entity '%s' not defined\n", name);
5392 }
5393 }
5394
5395 /*
5396 * [ WFC: Parsed Entity ]
5397 * An entity reference must not contain the name of an
5398 * unparsed entity
5399 */
5400 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5401 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5403 ctxt->sax->error(ctxt->userData,
5404 "Entity reference to unparsed entity %s\n", name);
5405 ctxt->wellFormed = 0;
5406 ctxt->disableSAX = 1;
5407 }
5408
5409 /*
5410 * [ WFC: No External Entity References ]
5411 * Attribute values cannot contain direct or indirect
5412 * entity references to external entities.
5413 */
5414 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5415 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5416 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5417 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5418 ctxt->sax->error(ctxt->userData,
5419 "Attribute references external entity '%s'\n", name);
5420 ctxt->wellFormed = 0;
5421 ctxt->disableSAX = 1;
5422 }
5423 /*
5424 * [ WFC: No < in Attribute Values ]
5425 * The replacement text of any entity referred to directly or
5426 * indirectly in an attribute value (other than "&lt;") must
5427 * not contain a <.
5428 */
5429 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5430 (ent != NULL) &&
5431 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5432 (ent->content != NULL) &&
5433 (xmlStrchr(ent->content, '<'))) {
5434 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5436 ctxt->sax->error(ctxt->userData,
5437 "'<' in entity '%s' is not allowed in attributes values\n", name);
5438 ctxt->wellFormed = 0;
5439 ctxt->disableSAX = 1;
5440 }
5441
5442 /*
5443 * Internal check, no parameter entities here ...
5444 */
5445 else {
5446 switch (ent->etype) {
5447 case XML_INTERNAL_PARAMETER_ENTITY:
5448 case XML_EXTERNAL_PARAMETER_ENTITY:
5449 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5450 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5451 ctxt->sax->error(ctxt->userData,
5452 "Attempt to reference the parameter entity '%s'\n", name);
5453 ctxt->wellFormed = 0;
5454 ctxt->disableSAX = 1;
5455 break;
5456 default:
5457 break;
5458 }
5459 }
5460
5461 /*
5462 * [ WFC: No Recursion ]
5463 * A parsed entity must not contain a recursive reference
5464 * to itself, either directly or indirectly.
5465 * Done somewhwere else
5466 */
5467
5468 } else {
5469 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5471 ctxt->sax->error(ctxt->userData,
5472 "xmlParseEntityRef: expecting ';'\n");
5473 ctxt->wellFormed = 0;
5474 ctxt->disableSAX = 1;
5475 }
5476 xmlFree(name);
5477 }
5478 }
5479 *str = ptr;
5480 return(ent);
5481}
5482
5483/**
5484 * xmlParsePEReference:
5485 * @ctxt: an XML parser context
5486 *
5487 * parse PEReference declarations
5488 * The entity content is handled directly by pushing it's content as
5489 * a new input stream.
5490 *
5491 * [69] PEReference ::= '%' Name ';'
5492 *
5493 * [ WFC: No Recursion ]
5494 * A parsed entity must not contain a recursive
5495 * reference to itself, either directly or indirectly.
5496 *
5497 * [ WFC: Entity Declared ]
5498 * In a document without any DTD, a document with only an internal DTD
5499 * subset which contains no parameter entity references, or a document
5500 * with "standalone='yes'", ... ... The declaration of a parameter
5501 * entity must precede any reference to it...
5502 *
5503 * [ VC: Entity Declared ]
5504 * In a document with an external subset or external parameter entities
5505 * with "standalone='no'", ... ... The declaration of a parameter entity
5506 * must precede any reference to it...
5507 *
5508 * [ WFC: In DTD ]
5509 * Parameter-entity references may only appear in the DTD.
5510 * NOTE: misleading but this is handled.
5511 */
5512void
5513xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5514 xmlChar *name;
5515 xmlEntityPtr entity = NULL;
5516 xmlParserInputPtr input;
5517
5518 if (RAW == '%') {
5519 NEXT;
Daniel Veillard29631a82001-03-05 09:49:20 +00005520 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005521 if (name == NULL) {
5522 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5524 ctxt->sax->error(ctxt->userData,
5525 "xmlParsePEReference: no name\n");
5526 ctxt->wellFormed = 0;
5527 ctxt->disableSAX = 1;
5528 } else {
5529 if (RAW == ';') {
5530 NEXT;
5531 if ((ctxt->sax != NULL) &&
5532 (ctxt->sax->getParameterEntity != NULL))
5533 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5534 name);
5535 if (entity == NULL) {
5536 /*
5537 * [ WFC: Entity Declared ]
5538 * In a document without any DTD, a document with only an
5539 * internal DTD subset which contains no parameter entity
5540 * references, or a document with "standalone='yes'", ...
5541 * ... The declaration of a parameter entity must precede
5542 * any reference to it...
5543 */
5544 if ((ctxt->standalone == 1) ||
5545 ((ctxt->hasExternalSubset == 0) &&
5546 (ctxt->hasPErefs == 0))) {
5547 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5548 if ((!ctxt->disableSAX) &&
5549 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5550 ctxt->sax->error(ctxt->userData,
5551 "PEReference: %%%s; not found\n", name);
5552 ctxt->wellFormed = 0;
5553 ctxt->disableSAX = 1;
5554 } else {
5555 /*
5556 * [ VC: Entity Declared ]
5557 * In a document with an external subset or external
5558 * parameter entities with "standalone='no'", ...
5559 * ... The declaration of a parameter entity must precede
5560 * any reference to it...
5561 */
5562 if ((!ctxt->disableSAX) &&
5563 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5564 ctxt->sax->warning(ctxt->userData,
5565 "PEReference: %%%s; not found\n", name);
5566 ctxt->valid = 0;
5567 }
5568 } else {
5569 /*
5570 * Internal checking in case the entity quest barfed
5571 */
5572 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5573 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5574 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5575 ctxt->sax->warning(ctxt->userData,
5576 "Internal: %%%s; is not a parameter entity\n", name);
5577 } else {
5578 /*
5579 * TODO !!!
5580 * handle the extra spaces added before and after
5581 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5582 */
5583 input = xmlNewEntityInputStream(ctxt, entity);
5584 xmlPushInput(ctxt, input);
5585 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5586 (RAW == '<') && (NXT(1) == '?') &&
5587 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5588 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5589 xmlParseTextDecl(ctxt);
5590 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5591 /*
5592 * The XML REC instructs us to stop parsing
5593 * right here
5594 */
5595 ctxt->instate = XML_PARSER_EOF;
5596 xmlFree(name);
5597 return;
5598 }
5599 }
5600 if (ctxt->token == 0)
5601 ctxt->token = ' ';
5602 }
5603 }
5604 ctxt->hasPErefs = 1;
5605 } else {
5606 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5608 ctxt->sax->error(ctxt->userData,
5609 "xmlParsePEReference: expecting ';'\n");
5610 ctxt->wellFormed = 0;
5611 ctxt->disableSAX = 1;
5612 }
5613 xmlFree(name);
5614 }
5615 }
5616}
5617
5618/**
5619 * xmlParseStringPEReference:
5620 * @ctxt: an XML parser context
5621 * @str: a pointer to an index in the string
5622 *
5623 * parse PEReference declarations
5624 *
5625 * [69] PEReference ::= '%' Name ';'
5626 *
5627 * [ WFC: No Recursion ]
5628 * A parsed entity must not contain a recursive
5629 * reference to itself, either directly or indirectly.
5630 *
5631 * [ WFC: Entity Declared ]
5632 * In a document without any DTD, a document with only an internal DTD
5633 * subset which contains no parameter entity references, or a document
5634 * with "standalone='yes'", ... ... The declaration of a parameter
5635 * entity must precede any reference to it...
5636 *
5637 * [ VC: Entity Declared ]
5638 * In a document with an external subset or external parameter entities
5639 * with "standalone='no'", ... ... The declaration of a parameter entity
5640 * must precede any reference to it...
5641 *
5642 * [ WFC: In DTD ]
5643 * Parameter-entity references may only appear in the DTD.
5644 * NOTE: misleading but this is handled.
5645 *
5646 * Returns the string of the entity content.
5647 * str is updated to the current value of the index
5648 */
5649xmlEntityPtr
5650xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5651 const xmlChar *ptr;
5652 xmlChar cur;
5653 xmlChar *name;
5654 xmlEntityPtr entity = NULL;
5655
5656 if ((str == NULL) || (*str == NULL)) return(NULL);
5657 ptr = *str;
5658 cur = *ptr;
5659 if (cur == '%') {
5660 ptr++;
5661 cur = *ptr;
5662 name = xmlParseStringName(ctxt, &ptr);
5663 if (name == NULL) {
5664 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5666 ctxt->sax->error(ctxt->userData,
5667 "xmlParseStringPEReference: no name\n");
5668 ctxt->wellFormed = 0;
5669 ctxt->disableSAX = 1;
5670 } else {
5671 cur = *ptr;
5672 if (cur == ';') {
5673 ptr++;
5674 cur = *ptr;
5675 if ((ctxt->sax != NULL) &&
5676 (ctxt->sax->getParameterEntity != NULL))
5677 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5678 name);
5679 if (entity == NULL) {
5680 /*
5681 * [ WFC: Entity Declared ]
5682 * In a document without any DTD, a document with only an
5683 * internal DTD subset which contains no parameter entity
5684 * references, or a document with "standalone='yes'", ...
5685 * ... The declaration of a parameter entity must precede
5686 * any reference to it...
5687 */
5688 if ((ctxt->standalone == 1) ||
5689 ((ctxt->hasExternalSubset == 0) &&
5690 (ctxt->hasPErefs == 0))) {
5691 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5693 ctxt->sax->error(ctxt->userData,
5694 "PEReference: %%%s; not found\n", name);
5695 ctxt->wellFormed = 0;
5696 ctxt->disableSAX = 1;
5697 } else {
5698 /*
5699 * [ VC: Entity Declared ]
5700 * In a document with an external subset or external
5701 * parameter entities with "standalone='no'", ...
5702 * ... The declaration of a parameter entity must
5703 * precede any reference to it...
5704 */
5705 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5706 ctxt->sax->warning(ctxt->userData,
5707 "PEReference: %%%s; not found\n", name);
5708 ctxt->valid = 0;
5709 }
5710 } else {
5711 /*
5712 * Internal checking in case the entity quest barfed
5713 */
5714 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5715 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5716 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5717 ctxt->sax->warning(ctxt->userData,
5718 "Internal: %%%s; is not a parameter entity\n", name);
5719 }
5720 }
5721 ctxt->hasPErefs = 1;
5722 } else {
5723 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5724 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5725 ctxt->sax->error(ctxt->userData,
5726 "xmlParseStringPEReference: expecting ';'\n");
5727 ctxt->wellFormed = 0;
5728 ctxt->disableSAX = 1;
5729 }
5730 xmlFree(name);
5731 }
5732 }
5733 *str = ptr;
5734 return(entity);
5735}
5736
5737/**
5738 * xmlParseDocTypeDecl:
5739 * @ctxt: an XML parser context
5740 *
5741 * parse a DOCTYPE declaration
5742 *
5743 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5744 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5745 *
5746 * [ VC: Root Element Type ]
5747 * The Name in the document type declaration must match the element
5748 * type of the root element.
5749 */
5750
5751void
5752xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5753 xmlChar *name = NULL;
5754 xmlChar *ExternalID = NULL;
5755 xmlChar *URI = NULL;
5756
5757 /*
5758 * We know that '<!DOCTYPE' has been detected.
5759 */
5760 SKIP(9);
5761
5762 SKIP_BLANKS;
5763
5764 /*
5765 * Parse the DOCTYPE name.
5766 */
5767 name = xmlParseName(ctxt);
5768 if (name == NULL) {
5769 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5771 ctxt->sax->error(ctxt->userData,
5772 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5773 ctxt->wellFormed = 0;
5774 ctxt->disableSAX = 1;
5775 }
5776 ctxt->intSubName = name;
5777
5778 SKIP_BLANKS;
5779
5780 /*
5781 * Check for SystemID and ExternalID
5782 */
5783 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5784
5785 if ((URI != NULL) || (ExternalID != NULL)) {
5786 ctxt->hasExternalSubset = 1;
5787 }
5788 ctxt->extSubURI = URI;
5789 ctxt->extSubSystem = ExternalID;
5790
5791 SKIP_BLANKS;
5792
5793 /*
5794 * Create and update the internal subset.
5795 */
5796 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5797 (!ctxt->disableSAX))
5798 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5799
5800 /*
5801 * Is there any internal subset declarations ?
5802 * they are handled separately in xmlParseInternalSubset()
5803 */
5804 if (RAW == '[')
5805 return;
5806
5807 /*
5808 * We should be at the end of the DOCTYPE declaration.
5809 */
5810 if (RAW != '>') {
5811 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5813 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5814 ctxt->wellFormed = 0;
5815 ctxt->disableSAX = 1;
5816 }
5817 NEXT;
5818}
5819
5820/**
5821 * xmlParseInternalsubset:
5822 * @ctxt: an XML parser context
5823 *
5824 * parse the internal subset declaration
5825 *
5826 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5827 */
5828
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005829static void
Owen Taylor3473f882001-02-23 17:55:21 +00005830xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5831 /*
5832 * Is there any DTD definition ?
5833 */
5834 if (RAW == '[') {
5835 ctxt->instate = XML_PARSER_DTD;
5836 NEXT;
5837 /*
5838 * Parse the succession of Markup declarations and
5839 * PEReferences.
5840 * Subsequence (markupdecl | PEReference | S)*
5841 */
5842 while (RAW != ']') {
5843 const xmlChar *check = CUR_PTR;
5844 int cons = ctxt->input->consumed;
5845
5846 SKIP_BLANKS;
5847 xmlParseMarkupDecl(ctxt);
5848 xmlParsePEReference(ctxt);
5849
5850 /*
5851 * Pop-up of finished entities.
5852 */
5853 while ((RAW == 0) && (ctxt->inputNr > 1))
5854 xmlPopInput(ctxt);
5855
5856 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5857 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5859 ctxt->sax->error(ctxt->userData,
5860 "xmlParseInternalSubset: error detected in Markup declaration\n");
5861 ctxt->wellFormed = 0;
5862 ctxt->disableSAX = 1;
5863 break;
5864 }
5865 }
5866 if (RAW == ']') {
5867 NEXT;
5868 SKIP_BLANKS;
5869 }
5870 }
5871
5872 /*
5873 * We should be at the end of the DOCTYPE declaration.
5874 */
5875 if (RAW != '>') {
5876 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5878 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5879 ctxt->wellFormed = 0;
5880 ctxt->disableSAX = 1;
5881 }
5882 NEXT;
5883}
5884
5885/**
5886 * xmlParseAttribute:
5887 * @ctxt: an XML parser context
5888 * @value: a xmlChar ** used to store the value of the attribute
5889 *
5890 * parse an attribute
5891 *
5892 * [41] Attribute ::= Name Eq AttValue
5893 *
5894 * [ WFC: No External Entity References ]
5895 * Attribute values cannot contain direct or indirect entity references
5896 * to external entities.
5897 *
5898 * [ WFC: No < in Attribute Values ]
5899 * The replacement text of any entity referred to directly or indirectly in
5900 * an attribute value (other than "&lt;") must not contain a <.
5901 *
5902 * [ VC: Attribute Value Type ]
5903 * The attribute must have been declared; the value must be of the type
5904 * declared for it.
5905 *
5906 * [25] Eq ::= S? '=' S?
5907 *
5908 * With namespace:
5909 *
5910 * [NS 11] Attribute ::= QName Eq AttValue
5911 *
5912 * Also the case QName == xmlns:??? is handled independently as a namespace
5913 * definition.
5914 *
5915 * Returns the attribute name, and the value in *value.
5916 */
5917
5918xmlChar *
5919xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5920 xmlChar *name, *val;
5921
5922 *value = NULL;
5923 name = xmlParseName(ctxt);
5924 if (name == NULL) {
5925 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5927 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5928 ctxt->wellFormed = 0;
5929 ctxt->disableSAX = 1;
5930 return(NULL);
5931 }
5932
5933 /*
5934 * read the value
5935 */
5936 SKIP_BLANKS;
5937 if (RAW == '=') {
5938 NEXT;
5939 SKIP_BLANKS;
5940 val = xmlParseAttValue(ctxt);
5941 ctxt->instate = XML_PARSER_CONTENT;
5942 } else {
5943 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5945 ctxt->sax->error(ctxt->userData,
5946 "Specification mandate value for attribute %s\n", name);
5947 ctxt->wellFormed = 0;
5948 ctxt->disableSAX = 1;
5949 xmlFree(name);
5950 return(NULL);
5951 }
5952
5953 /*
5954 * Check that xml:lang conforms to the specification
5955 * No more registered as an error, just generate a warning now
5956 * since this was deprecated in XML second edition
5957 */
5958 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5959 if (!xmlCheckLanguageID(val)) {
5960 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5961 ctxt->sax->warning(ctxt->userData,
5962 "Malformed value for xml:lang : %s\n", val);
5963 }
5964 }
5965
5966 /*
5967 * Check that xml:space conforms to the specification
5968 */
5969 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5970 if (xmlStrEqual(val, BAD_CAST "default"))
5971 *(ctxt->space) = 0;
5972 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5973 *(ctxt->space) = 1;
5974 else {
5975 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5977 ctxt->sax->error(ctxt->userData,
5978"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5979 val);
5980 ctxt->wellFormed = 0;
5981 ctxt->disableSAX = 1;
5982 }
5983 }
5984
5985 *value = val;
5986 return(name);
5987}
5988
5989/**
5990 * xmlParseStartTag:
5991 * @ctxt: an XML parser context
5992 *
5993 * parse a start of tag either for rule element or
5994 * EmptyElement. In both case we don't parse the tag closing chars.
5995 *
5996 * [40] STag ::= '<' Name (S Attribute)* S? '>'
5997 *
5998 * [ WFC: Unique Att Spec ]
5999 * No attribute name may appear more than once in the same start-tag or
6000 * empty-element tag.
6001 *
6002 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6003 *
6004 * [ WFC: Unique Att Spec ]
6005 * No attribute name may appear more than once in the same start-tag or
6006 * empty-element tag.
6007 *
6008 * With namespace:
6009 *
6010 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6011 *
6012 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6013 *
6014 * Returns the element name parsed
6015 */
6016
6017xmlChar *
6018xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6019 xmlChar *name;
6020 xmlChar *attname;
6021 xmlChar *attvalue;
6022 const xmlChar **atts = NULL;
6023 int nbatts = 0;
6024 int maxatts = 0;
6025 int i;
6026
6027 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006028 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006029
6030 name = xmlParseName(ctxt);
6031 if (name == NULL) {
6032 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6034 ctxt->sax->error(ctxt->userData,
6035 "xmlParseStartTag: invalid element name\n");
6036 ctxt->wellFormed = 0;
6037 ctxt->disableSAX = 1;
6038 return(NULL);
6039 }
6040
6041 /*
6042 * Now parse the attributes, it ends up with the ending
6043 *
6044 * (S Attribute)* S?
6045 */
6046 SKIP_BLANKS;
6047 GROW;
6048
Daniel Veillard21a0f912001-02-25 19:54:14 +00006049 while ((RAW != '>') &&
6050 ((RAW != '/') || (NXT(1) != '>')) &&
6051 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006052 const xmlChar *q = CUR_PTR;
6053 int cons = ctxt->input->consumed;
6054
6055 attname = xmlParseAttribute(ctxt, &attvalue);
6056 if ((attname != NULL) && (attvalue != NULL)) {
6057 /*
6058 * [ WFC: Unique Att Spec ]
6059 * No attribute name may appear more than once in the same
6060 * start-tag or empty-element tag.
6061 */
6062 for (i = 0; i < nbatts;i += 2) {
6063 if (xmlStrEqual(atts[i], attname)) {
6064 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6066 ctxt->sax->error(ctxt->userData,
6067 "Attribute %s redefined\n",
6068 attname);
6069 ctxt->wellFormed = 0;
6070 ctxt->disableSAX = 1;
6071 xmlFree(attname);
6072 xmlFree(attvalue);
6073 goto failed;
6074 }
6075 }
6076
6077 /*
6078 * Add the pair to atts
6079 */
6080 if (atts == NULL) {
6081 maxatts = 10;
6082 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6083 if (atts == NULL) {
6084 xmlGenericError(xmlGenericErrorContext,
6085 "malloc of %ld byte failed\n",
6086 maxatts * (long)sizeof(xmlChar *));
6087 return(NULL);
6088 }
6089 } else if (nbatts + 4 > maxatts) {
6090 maxatts *= 2;
6091 atts = (const xmlChar **) xmlRealloc((void *) atts,
6092 maxatts * sizeof(xmlChar *));
6093 if (atts == NULL) {
6094 xmlGenericError(xmlGenericErrorContext,
6095 "realloc of %ld byte failed\n",
6096 maxatts * (long)sizeof(xmlChar *));
6097 return(NULL);
6098 }
6099 }
6100 atts[nbatts++] = attname;
6101 atts[nbatts++] = attvalue;
6102 atts[nbatts] = NULL;
6103 atts[nbatts + 1] = NULL;
6104 } else {
6105 if (attname != NULL)
6106 xmlFree(attname);
6107 if (attvalue != NULL)
6108 xmlFree(attvalue);
6109 }
6110
6111failed:
6112
6113 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6114 break;
6115 if (!IS_BLANK(RAW)) {
6116 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6118 ctxt->sax->error(ctxt->userData,
6119 "attributes construct error\n");
6120 ctxt->wellFormed = 0;
6121 ctxt->disableSAX = 1;
6122 }
6123 SKIP_BLANKS;
6124 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6125 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6127 ctxt->sax->error(ctxt->userData,
6128 "xmlParseStartTag: problem parsing attributes\n");
6129 ctxt->wellFormed = 0;
6130 ctxt->disableSAX = 1;
6131 break;
6132 }
6133 GROW;
6134 }
6135
6136 /*
6137 * SAX: Start of Element !
6138 */
6139 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6140 (!ctxt->disableSAX))
6141 ctxt->sax->startElement(ctxt->userData, name, atts);
6142
6143 if (atts != NULL) {
6144 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6145 xmlFree((void *) atts);
6146 }
6147 return(name);
6148}
6149
6150/**
6151 * xmlParseEndTag:
6152 * @ctxt: an XML parser context
6153 *
6154 * parse an end of tag
6155 *
6156 * [42] ETag ::= '</' Name S? '>'
6157 *
6158 * With namespace
6159 *
6160 * [NS 9] ETag ::= '</' QName S? '>'
6161 */
6162
6163void
6164xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6165 xmlChar *name;
6166 xmlChar *oldname;
6167
6168 GROW;
6169 if ((RAW != '<') || (NXT(1) != '/')) {
6170 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6172 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6173 ctxt->wellFormed = 0;
6174 ctxt->disableSAX = 1;
6175 return;
6176 }
6177 SKIP(2);
6178
6179 name = xmlParseName(ctxt);
6180
6181 /*
6182 * We should definitely be at the ending "S? '>'" part
6183 */
6184 GROW;
6185 SKIP_BLANKS;
6186 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6187 ctxt->errNo = XML_ERR_GT_REQUIRED;
6188 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6189 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6190 ctxt->wellFormed = 0;
6191 ctxt->disableSAX = 1;
6192 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006193 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006194
6195 /*
6196 * [ WFC: Element Type Match ]
6197 * The Name in an element's end-tag must match the element type in the
6198 * start-tag.
6199 *
6200 */
6201 if ((name == NULL) || (ctxt->name == NULL) ||
6202 (!xmlStrEqual(name, ctxt->name))) {
6203 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6205 if ((name != NULL) && (ctxt->name != NULL)) {
6206 ctxt->sax->error(ctxt->userData,
6207 "Opening and ending tag mismatch: %s and %s\n",
6208 ctxt->name, name);
6209 } else if (ctxt->name != NULL) {
6210 ctxt->sax->error(ctxt->userData,
6211 "Ending tag eror for: %s\n", ctxt->name);
6212 } else {
6213 ctxt->sax->error(ctxt->userData,
6214 "Ending tag error: internal error ???\n");
6215 }
6216
6217 }
6218 ctxt->wellFormed = 0;
6219 ctxt->disableSAX = 1;
6220 }
6221
6222 /*
6223 * SAX: End of Tag
6224 */
6225 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6226 (!ctxt->disableSAX))
6227 ctxt->sax->endElement(ctxt->userData, name);
6228
6229 if (name != NULL)
6230 xmlFree(name);
6231 oldname = namePop(ctxt);
6232 spacePop(ctxt);
6233 if (oldname != NULL) {
6234#ifdef DEBUG_STACK
6235 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6236#endif
6237 xmlFree(oldname);
6238 }
6239 return;
6240}
6241
6242/**
6243 * xmlParseCDSect:
6244 * @ctxt: an XML parser context
6245 *
6246 * Parse escaped pure raw content.
6247 *
6248 * [18] CDSect ::= CDStart CData CDEnd
6249 *
6250 * [19] CDStart ::= '<![CDATA['
6251 *
6252 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6253 *
6254 * [21] CDEnd ::= ']]>'
6255 */
6256void
6257xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6258 xmlChar *buf = NULL;
6259 int len = 0;
6260 int size = XML_PARSER_BUFFER_SIZE;
6261 int r, rl;
6262 int s, sl;
6263 int cur, l;
6264 int count = 0;
6265
6266 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6267 (NXT(2) == '[') && (NXT(3) == 'C') &&
6268 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6269 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6270 (NXT(8) == '[')) {
6271 SKIP(9);
6272 } else
6273 return;
6274
6275 ctxt->instate = XML_PARSER_CDATA_SECTION;
6276 r = CUR_CHAR(rl);
6277 if (!IS_CHAR(r)) {
6278 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6280 ctxt->sax->error(ctxt->userData,
6281 "CData section not finished\n");
6282 ctxt->wellFormed = 0;
6283 ctxt->disableSAX = 1;
6284 ctxt->instate = XML_PARSER_CONTENT;
6285 return;
6286 }
6287 NEXTL(rl);
6288 s = CUR_CHAR(sl);
6289 if (!IS_CHAR(s)) {
6290 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6292 ctxt->sax->error(ctxt->userData,
6293 "CData section not finished\n");
6294 ctxt->wellFormed = 0;
6295 ctxt->disableSAX = 1;
6296 ctxt->instate = XML_PARSER_CONTENT;
6297 return;
6298 }
6299 NEXTL(sl);
6300 cur = CUR_CHAR(l);
6301 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6302 if (buf == NULL) {
6303 xmlGenericError(xmlGenericErrorContext,
6304 "malloc of %d byte failed\n", size);
6305 return;
6306 }
6307 while (IS_CHAR(cur) &&
6308 ((r != ']') || (s != ']') || (cur != '>'))) {
6309 if (len + 5 >= size) {
6310 size *= 2;
6311 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6312 if (buf == NULL) {
6313 xmlGenericError(xmlGenericErrorContext,
6314 "realloc of %d byte failed\n", size);
6315 return;
6316 }
6317 }
6318 COPY_BUF(rl,buf,len,r);
6319 r = s;
6320 rl = sl;
6321 s = cur;
6322 sl = l;
6323 count++;
6324 if (count > 50) {
6325 GROW;
6326 count = 0;
6327 }
6328 NEXTL(l);
6329 cur = CUR_CHAR(l);
6330 }
6331 buf[len] = 0;
6332 ctxt->instate = XML_PARSER_CONTENT;
6333 if (cur != '>') {
6334 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6335 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6336 ctxt->sax->error(ctxt->userData,
6337 "CData section not finished\n%.50s\n", buf);
6338 ctxt->wellFormed = 0;
6339 ctxt->disableSAX = 1;
6340 xmlFree(buf);
6341 return;
6342 }
6343 NEXTL(l);
6344
6345 /*
6346 * Ok the buffer is to be consumed as cdata.
6347 */
6348 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6349 if (ctxt->sax->cdataBlock != NULL)
6350 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6351 }
6352 xmlFree(buf);
6353}
6354
6355/**
6356 * xmlParseContent:
6357 * @ctxt: an XML parser context
6358 *
6359 * Parse a content:
6360 *
6361 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6362 */
6363
6364void
6365xmlParseContent(xmlParserCtxtPtr ctxt) {
6366 GROW;
6367 while (((RAW != 0) || (ctxt->token != 0)) &&
6368 ((RAW != '<') || (NXT(1) != '/'))) {
6369 const xmlChar *test = CUR_PTR;
6370 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006371 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006372 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006373
6374 /*
6375 * Handle possible processed charrefs.
6376 */
6377 if (ctxt->token != 0) {
6378 xmlParseCharData(ctxt, 0);
6379 }
6380 /*
6381 * First case : a Processing Instruction.
6382 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006383 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006384 xmlParsePI(ctxt);
6385 }
6386
6387 /*
6388 * Second case : a CDSection
6389 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006390 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006391 (NXT(2) == '[') && (NXT(3) == 'C') &&
6392 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6393 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6394 (NXT(8) == '[')) {
6395 xmlParseCDSect(ctxt);
6396 }
6397
6398 /*
6399 * Third case : a comment
6400 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006401 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006402 (NXT(2) == '-') && (NXT(3) == '-')) {
6403 xmlParseComment(ctxt);
6404 ctxt->instate = XML_PARSER_CONTENT;
6405 }
6406
6407 /*
6408 * Fourth case : a sub-element.
6409 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006410 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006411 xmlParseElement(ctxt);
6412 }
6413
6414 /*
6415 * Fifth case : a reference. If if has not been resolved,
6416 * parsing returns it's Name, create the node
6417 */
6418
Daniel Veillard21a0f912001-02-25 19:54:14 +00006419 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006420 xmlParseReference(ctxt);
6421 }
6422
6423 /*
6424 * Last case, text. Note that References are handled directly.
6425 */
6426 else {
6427 xmlParseCharData(ctxt, 0);
6428 }
6429
6430 GROW;
6431 /*
6432 * Pop-up of finished entities.
6433 */
6434 while ((RAW == 0) && (ctxt->inputNr > 1))
6435 xmlPopInput(ctxt);
6436 SHRINK;
6437
6438 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6439 (tok == ctxt->token)) {
6440 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6442 ctxt->sax->error(ctxt->userData,
6443 "detected an error in element content\n");
6444 ctxt->wellFormed = 0;
6445 ctxt->disableSAX = 1;
6446 ctxt->instate = XML_PARSER_EOF;
6447 break;
6448 }
6449 }
6450}
6451
6452/**
6453 * xmlParseElement:
6454 * @ctxt: an XML parser context
6455 *
6456 * parse an XML element, this is highly recursive
6457 *
6458 * [39] element ::= EmptyElemTag | STag content ETag
6459 *
6460 * [ WFC: Element Type Match ]
6461 * The Name in an element's end-tag must match the element type in the
6462 * start-tag.
6463 *
6464 * [ VC: Element Valid ]
6465 * An element is valid if there is a declaration matching elementdecl
6466 * where the Name matches the element type and one of the following holds:
6467 * - The declaration matches EMPTY and the element has no content.
6468 * - The declaration matches children and the sequence of child elements
6469 * belongs to the language generated by the regular expression in the
6470 * content model, with optional white space (characters matching the
6471 * nonterminal S) between each pair of child elements.
6472 * - The declaration matches Mixed and the content consists of character
6473 * data and child elements whose types match names in the content model.
6474 * - The declaration matches ANY, and the types of any child elements have
6475 * been declared.
6476 */
6477
6478void
6479xmlParseElement(xmlParserCtxtPtr ctxt) {
6480 const xmlChar *openTag = CUR_PTR;
6481 xmlChar *name;
6482 xmlChar *oldname;
6483 xmlParserNodeInfo node_info;
6484 xmlNodePtr ret;
6485
6486 /* Capture start position */
6487 if (ctxt->record_info) {
6488 node_info.begin_pos = ctxt->input->consumed +
6489 (CUR_PTR - ctxt->input->base);
6490 node_info.begin_line = ctxt->input->line;
6491 }
6492
6493 if (ctxt->spaceNr == 0)
6494 spacePush(ctxt, -1);
6495 else
6496 spacePush(ctxt, *ctxt->space);
6497
6498 name = xmlParseStartTag(ctxt);
6499 if (name == NULL) {
6500 spacePop(ctxt);
6501 return;
6502 }
6503 namePush(ctxt, name);
6504 ret = ctxt->node;
6505
6506 /*
6507 * [ VC: Root Element Type ]
6508 * The Name in the document type declaration must match the element
6509 * type of the root element.
6510 */
6511 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6512 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6513 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6514
6515 /*
6516 * Check for an Empty Element.
6517 */
6518 if ((RAW == '/') && (NXT(1) == '>')) {
6519 SKIP(2);
6520 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6521 (!ctxt->disableSAX))
6522 ctxt->sax->endElement(ctxt->userData, name);
6523 oldname = namePop(ctxt);
6524 spacePop(ctxt);
6525 if (oldname != NULL) {
6526#ifdef DEBUG_STACK
6527 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6528#endif
6529 xmlFree(oldname);
6530 }
6531 if ( ret != NULL && ctxt->record_info ) {
6532 node_info.end_pos = ctxt->input->consumed +
6533 (CUR_PTR - ctxt->input->base);
6534 node_info.end_line = ctxt->input->line;
6535 node_info.node = ret;
6536 xmlParserAddNodeInfo(ctxt, &node_info);
6537 }
6538 return;
6539 }
6540 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006541 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006542 } else {
6543 ctxt->errNo = XML_ERR_GT_REQUIRED;
6544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6545 ctxt->sax->error(ctxt->userData,
6546 "Couldn't find end of Start Tag\n%.30s\n",
6547 openTag);
6548 ctxt->wellFormed = 0;
6549 ctxt->disableSAX = 1;
6550
6551 /*
6552 * end of parsing of this node.
6553 */
6554 nodePop(ctxt);
6555 oldname = namePop(ctxt);
6556 spacePop(ctxt);
6557 if (oldname != NULL) {
6558#ifdef DEBUG_STACK
6559 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6560#endif
6561 xmlFree(oldname);
6562 }
6563
6564 /*
6565 * Capture end position and add node
6566 */
6567 if ( ret != NULL && ctxt->record_info ) {
6568 node_info.end_pos = ctxt->input->consumed +
6569 (CUR_PTR - ctxt->input->base);
6570 node_info.end_line = ctxt->input->line;
6571 node_info.node = ret;
6572 xmlParserAddNodeInfo(ctxt, &node_info);
6573 }
6574 return;
6575 }
6576
6577 /*
6578 * Parse the content of the element:
6579 */
6580 xmlParseContent(ctxt);
6581 if (!IS_CHAR(RAW)) {
6582 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6584 ctxt->sax->error(ctxt->userData,
6585 "Premature end of data in tag %.30s\n", openTag);
6586 ctxt->wellFormed = 0;
6587 ctxt->disableSAX = 1;
6588
6589 /*
6590 * end of parsing of this node.
6591 */
6592 nodePop(ctxt);
6593 oldname = namePop(ctxt);
6594 spacePop(ctxt);
6595 if (oldname != NULL) {
6596#ifdef DEBUG_STACK
6597 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6598#endif
6599 xmlFree(oldname);
6600 }
6601 return;
6602 }
6603
6604 /*
6605 * parse the end of tag: '</' should be here.
6606 */
6607 xmlParseEndTag(ctxt);
6608
6609 /*
6610 * Capture end position and add node
6611 */
6612 if ( ret != NULL && ctxt->record_info ) {
6613 node_info.end_pos = ctxt->input->consumed +
6614 (CUR_PTR - ctxt->input->base);
6615 node_info.end_line = ctxt->input->line;
6616 node_info.node = ret;
6617 xmlParserAddNodeInfo(ctxt, &node_info);
6618 }
6619}
6620
6621/**
6622 * xmlParseVersionNum:
6623 * @ctxt: an XML parser context
6624 *
6625 * parse the XML version value.
6626 *
6627 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6628 *
6629 * Returns the string giving the XML version number, or NULL
6630 */
6631xmlChar *
6632xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6633 xmlChar *buf = NULL;
6634 int len = 0;
6635 int size = 10;
6636 xmlChar cur;
6637
6638 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6639 if (buf == NULL) {
6640 xmlGenericError(xmlGenericErrorContext,
6641 "malloc of %d byte failed\n", size);
6642 return(NULL);
6643 }
6644 cur = CUR;
6645 while (((cur >= 'a') && (cur <= 'z')) ||
6646 ((cur >= 'A') && (cur <= 'Z')) ||
6647 ((cur >= '0') && (cur <= '9')) ||
6648 (cur == '_') || (cur == '.') ||
6649 (cur == ':') || (cur == '-')) {
6650 if (len + 1 >= size) {
6651 size *= 2;
6652 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6653 if (buf == NULL) {
6654 xmlGenericError(xmlGenericErrorContext,
6655 "realloc of %d byte failed\n", size);
6656 return(NULL);
6657 }
6658 }
6659 buf[len++] = cur;
6660 NEXT;
6661 cur=CUR;
6662 }
6663 buf[len] = 0;
6664 return(buf);
6665}
6666
6667/**
6668 * xmlParseVersionInfo:
6669 * @ctxt: an XML parser context
6670 *
6671 * parse the XML version.
6672 *
6673 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6674 *
6675 * [25] Eq ::= S? '=' S?
6676 *
6677 * Returns the version string, e.g. "1.0"
6678 */
6679
6680xmlChar *
6681xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6682 xmlChar *version = NULL;
6683 const xmlChar *q;
6684
6685 if ((RAW == 'v') && (NXT(1) == 'e') &&
6686 (NXT(2) == 'r') && (NXT(3) == 's') &&
6687 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6688 (NXT(6) == 'n')) {
6689 SKIP(7);
6690 SKIP_BLANKS;
6691 if (RAW != '=') {
6692 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6694 ctxt->sax->error(ctxt->userData,
6695 "xmlParseVersionInfo : expected '='\n");
6696 ctxt->wellFormed = 0;
6697 ctxt->disableSAX = 1;
6698 return(NULL);
6699 }
6700 NEXT;
6701 SKIP_BLANKS;
6702 if (RAW == '"') {
6703 NEXT;
6704 q = CUR_PTR;
6705 version = xmlParseVersionNum(ctxt);
6706 if (RAW != '"') {
6707 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6709 ctxt->sax->error(ctxt->userData,
6710 "String not closed\n%.50s\n", q);
6711 ctxt->wellFormed = 0;
6712 ctxt->disableSAX = 1;
6713 } else
6714 NEXT;
6715 } else if (RAW == '\''){
6716 NEXT;
6717 q = CUR_PTR;
6718 version = xmlParseVersionNum(ctxt);
6719 if (RAW != '\'') {
6720 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6722 ctxt->sax->error(ctxt->userData,
6723 "String not closed\n%.50s\n", q);
6724 ctxt->wellFormed = 0;
6725 ctxt->disableSAX = 1;
6726 } else
6727 NEXT;
6728 } else {
6729 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6731 ctxt->sax->error(ctxt->userData,
6732 "xmlParseVersionInfo : expected ' or \"\n");
6733 ctxt->wellFormed = 0;
6734 ctxt->disableSAX = 1;
6735 }
6736 }
6737 return(version);
6738}
6739
6740/**
6741 * xmlParseEncName:
6742 * @ctxt: an XML parser context
6743 *
6744 * parse the XML encoding name
6745 *
6746 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6747 *
6748 * Returns the encoding name value or NULL
6749 */
6750xmlChar *
6751xmlParseEncName(xmlParserCtxtPtr ctxt) {
6752 xmlChar *buf = NULL;
6753 int len = 0;
6754 int size = 10;
6755 xmlChar cur;
6756
6757 cur = CUR;
6758 if (((cur >= 'a') && (cur <= 'z')) ||
6759 ((cur >= 'A') && (cur <= 'Z'))) {
6760 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6761 if (buf == NULL) {
6762 xmlGenericError(xmlGenericErrorContext,
6763 "malloc of %d byte failed\n", size);
6764 return(NULL);
6765 }
6766
6767 buf[len++] = cur;
6768 NEXT;
6769 cur = CUR;
6770 while (((cur >= 'a') && (cur <= 'z')) ||
6771 ((cur >= 'A') && (cur <= 'Z')) ||
6772 ((cur >= '0') && (cur <= '9')) ||
6773 (cur == '.') || (cur == '_') ||
6774 (cur == '-')) {
6775 if (len + 1 >= size) {
6776 size *= 2;
6777 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6778 if (buf == NULL) {
6779 xmlGenericError(xmlGenericErrorContext,
6780 "realloc of %d byte failed\n", size);
6781 return(NULL);
6782 }
6783 }
6784 buf[len++] = cur;
6785 NEXT;
6786 cur = CUR;
6787 if (cur == 0) {
6788 SHRINK;
6789 GROW;
6790 cur = CUR;
6791 }
6792 }
6793 buf[len] = 0;
6794 } else {
6795 ctxt->errNo = XML_ERR_ENCODING_NAME;
6796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6797 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6798 ctxt->wellFormed = 0;
6799 ctxt->disableSAX = 1;
6800 }
6801 return(buf);
6802}
6803
6804/**
6805 * xmlParseEncodingDecl:
6806 * @ctxt: an XML parser context
6807 *
6808 * parse the XML encoding declaration
6809 *
6810 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6811 *
6812 * this setups the conversion filters.
6813 *
6814 * Returns the encoding value or NULL
6815 */
6816
6817xmlChar *
6818xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6819 xmlChar *encoding = NULL;
6820 const xmlChar *q;
6821
6822 SKIP_BLANKS;
6823 if ((RAW == 'e') && (NXT(1) == 'n') &&
6824 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6825 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6826 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6827 SKIP(8);
6828 SKIP_BLANKS;
6829 if (RAW != '=') {
6830 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6832 ctxt->sax->error(ctxt->userData,
6833 "xmlParseEncodingDecl : expected '='\n");
6834 ctxt->wellFormed = 0;
6835 ctxt->disableSAX = 1;
6836 return(NULL);
6837 }
6838 NEXT;
6839 SKIP_BLANKS;
6840 if (RAW == '"') {
6841 NEXT;
6842 q = CUR_PTR;
6843 encoding = xmlParseEncName(ctxt);
6844 if (RAW != '"') {
6845 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6847 ctxt->sax->error(ctxt->userData,
6848 "String not closed\n%.50s\n", q);
6849 ctxt->wellFormed = 0;
6850 ctxt->disableSAX = 1;
6851 } else
6852 NEXT;
6853 } else if (RAW == '\''){
6854 NEXT;
6855 q = CUR_PTR;
6856 encoding = xmlParseEncName(ctxt);
6857 if (RAW != '\'') {
6858 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6860 ctxt->sax->error(ctxt->userData,
6861 "String not closed\n%.50s\n", q);
6862 ctxt->wellFormed = 0;
6863 ctxt->disableSAX = 1;
6864 } else
6865 NEXT;
6866 } else if (RAW == '"'){
6867 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6869 ctxt->sax->error(ctxt->userData,
6870 "xmlParseEncodingDecl : expected ' or \"\n");
6871 ctxt->wellFormed = 0;
6872 ctxt->disableSAX = 1;
6873 }
6874 if (encoding != NULL) {
6875 xmlCharEncoding enc;
6876 xmlCharEncodingHandlerPtr handler;
6877
6878 if (ctxt->input->encoding != NULL)
6879 xmlFree((xmlChar *) ctxt->input->encoding);
6880 ctxt->input->encoding = encoding;
6881
6882 enc = xmlParseCharEncoding((const char *) encoding);
6883 /*
6884 * registered set of known encodings
6885 */
6886 if (enc != XML_CHAR_ENCODING_ERROR) {
6887 xmlSwitchEncoding(ctxt, enc);
6888 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6889 xmlFree(encoding);
6890 return(NULL);
6891 }
6892 } else {
6893 /*
6894 * fallback for unknown encodings
6895 */
6896 handler = xmlFindCharEncodingHandler((const char *) encoding);
6897 if (handler != NULL) {
6898 xmlSwitchToEncoding(ctxt, handler);
6899 } else {
6900 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6902 ctxt->sax->error(ctxt->userData,
6903 "Unsupported encoding %s\n", encoding);
6904 return(NULL);
6905 }
6906 }
6907 }
6908 }
6909 return(encoding);
6910}
6911
6912/**
6913 * xmlParseSDDecl:
6914 * @ctxt: an XML parser context
6915 *
6916 * parse the XML standalone declaration
6917 *
6918 * [32] SDDecl ::= S 'standalone' Eq
6919 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6920 *
6921 * [ VC: Standalone Document Declaration ]
6922 * TODO The standalone document declaration must have the value "no"
6923 * if any external markup declarations contain declarations of:
6924 * - attributes with default values, if elements to which these
6925 * attributes apply appear in the document without specifications
6926 * of values for these attributes, or
6927 * - entities (other than amp, lt, gt, apos, quot), if references
6928 * to those entities appear in the document, or
6929 * - attributes with values subject to normalization, where the
6930 * attribute appears in the document with a value which will change
6931 * as a result of normalization, or
6932 * - element types with element content, if white space occurs directly
6933 * within any instance of those types.
6934 *
6935 * Returns 1 if standalone, 0 otherwise
6936 */
6937
6938int
6939xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6940 int standalone = -1;
6941
6942 SKIP_BLANKS;
6943 if ((RAW == 's') && (NXT(1) == 't') &&
6944 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6945 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6946 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6947 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6948 SKIP(10);
6949 SKIP_BLANKS;
6950 if (RAW != '=') {
6951 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6953 ctxt->sax->error(ctxt->userData,
6954 "XML standalone declaration : expected '='\n");
6955 ctxt->wellFormed = 0;
6956 ctxt->disableSAX = 1;
6957 return(standalone);
6958 }
6959 NEXT;
6960 SKIP_BLANKS;
6961 if (RAW == '\''){
6962 NEXT;
6963 if ((RAW == 'n') && (NXT(1) == 'o')) {
6964 standalone = 0;
6965 SKIP(2);
6966 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6967 (NXT(2) == 's')) {
6968 standalone = 1;
6969 SKIP(3);
6970 } else {
6971 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6973 ctxt->sax->error(ctxt->userData,
6974 "standalone accepts only 'yes' or 'no'\n");
6975 ctxt->wellFormed = 0;
6976 ctxt->disableSAX = 1;
6977 }
6978 if (RAW != '\'') {
6979 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6981 ctxt->sax->error(ctxt->userData, "String not closed\n");
6982 ctxt->wellFormed = 0;
6983 ctxt->disableSAX = 1;
6984 } else
6985 NEXT;
6986 } else if (RAW == '"'){
6987 NEXT;
6988 if ((RAW == 'n') && (NXT(1) == 'o')) {
6989 standalone = 0;
6990 SKIP(2);
6991 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6992 (NXT(2) == 's')) {
6993 standalone = 1;
6994 SKIP(3);
6995 } else {
6996 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6998 ctxt->sax->error(ctxt->userData,
6999 "standalone accepts only 'yes' or 'no'\n");
7000 ctxt->wellFormed = 0;
7001 ctxt->disableSAX = 1;
7002 }
7003 if (RAW != '"') {
7004 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7006 ctxt->sax->error(ctxt->userData, "String not closed\n");
7007 ctxt->wellFormed = 0;
7008 ctxt->disableSAX = 1;
7009 } else
7010 NEXT;
7011 } else {
7012 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7014 ctxt->sax->error(ctxt->userData,
7015 "Standalone value not found\n");
7016 ctxt->wellFormed = 0;
7017 ctxt->disableSAX = 1;
7018 }
7019 }
7020 return(standalone);
7021}
7022
7023/**
7024 * xmlParseXMLDecl:
7025 * @ctxt: an XML parser context
7026 *
7027 * parse an XML declaration header
7028 *
7029 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7030 */
7031
7032void
7033xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7034 xmlChar *version;
7035
7036 /*
7037 * We know that '<?xml' is here.
7038 */
7039 SKIP(5);
7040
7041 if (!IS_BLANK(RAW)) {
7042 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7044 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7045 ctxt->wellFormed = 0;
7046 ctxt->disableSAX = 1;
7047 }
7048 SKIP_BLANKS;
7049
7050 /*
7051 * We should have the VersionInfo here.
7052 */
7053 version = xmlParseVersionInfo(ctxt);
7054 if (version == NULL)
7055 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7056 ctxt->version = xmlStrdup(version);
7057 xmlFree(version);
7058
7059 /*
7060 * We may have the encoding declaration
7061 */
7062 if (!IS_BLANK(RAW)) {
7063 if ((RAW == '?') && (NXT(1) == '>')) {
7064 SKIP(2);
7065 return;
7066 }
7067 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7068 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7069 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7070 ctxt->wellFormed = 0;
7071 ctxt->disableSAX = 1;
7072 }
7073 xmlParseEncodingDecl(ctxt);
7074 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7075 /*
7076 * The XML REC instructs us to stop parsing right here
7077 */
7078 return;
7079 }
7080
7081 /*
7082 * We may have the standalone status.
7083 */
7084 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7085 if ((RAW == '?') && (NXT(1) == '>')) {
7086 SKIP(2);
7087 return;
7088 }
7089 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7091 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7092 ctxt->wellFormed = 0;
7093 ctxt->disableSAX = 1;
7094 }
7095 SKIP_BLANKS;
7096 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7097
7098 SKIP_BLANKS;
7099 if ((RAW == '?') && (NXT(1) == '>')) {
7100 SKIP(2);
7101 } else if (RAW == '>') {
7102 /* Deprecated old WD ... */
7103 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7105 ctxt->sax->error(ctxt->userData,
7106 "XML declaration must end-up with '?>'\n");
7107 ctxt->wellFormed = 0;
7108 ctxt->disableSAX = 1;
7109 NEXT;
7110 } else {
7111 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7112 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7113 ctxt->sax->error(ctxt->userData,
7114 "parsing XML declaration: '?>' expected\n");
7115 ctxt->wellFormed = 0;
7116 ctxt->disableSAX = 1;
7117 MOVETO_ENDTAG(CUR_PTR);
7118 NEXT;
7119 }
7120}
7121
7122/**
7123 * xmlParseMisc:
7124 * @ctxt: an XML parser context
7125 *
7126 * parse an XML Misc* optionnal field.
7127 *
7128 * [27] Misc ::= Comment | PI | S
7129 */
7130
7131void
7132xmlParseMisc(xmlParserCtxtPtr ctxt) {
7133 while (((RAW == '<') && (NXT(1) == '?')) ||
7134 ((RAW == '<') && (NXT(1) == '!') &&
7135 (NXT(2) == '-') && (NXT(3) == '-')) ||
7136 IS_BLANK(CUR)) {
7137 if ((RAW == '<') && (NXT(1) == '?')) {
7138 xmlParsePI(ctxt);
7139 } else if (IS_BLANK(CUR)) {
7140 NEXT;
7141 } else
7142 xmlParseComment(ctxt);
7143 }
7144}
7145
7146/**
7147 * xmlParseDocument:
7148 * @ctxt: an XML parser context
7149 *
7150 * parse an XML document (and build a tree if using the standard SAX
7151 * interface).
7152 *
7153 * [1] document ::= prolog element Misc*
7154 *
7155 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7156 *
7157 * Returns 0, -1 in case of error. the parser context is augmented
7158 * as a result of the parsing.
7159 */
7160
7161int
7162xmlParseDocument(xmlParserCtxtPtr ctxt) {
7163 xmlChar start[4];
7164 xmlCharEncoding enc;
7165
7166 xmlInitParser();
7167
7168 GROW;
7169
7170 /*
7171 * SAX: beginning of the document processing.
7172 */
7173 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7174 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7175
7176 /*
7177 * Get the 4 first bytes and decode the charset
7178 * if enc != XML_CHAR_ENCODING_NONE
7179 * plug some encoding conversion routines.
7180 */
7181 start[0] = RAW;
7182 start[1] = NXT(1);
7183 start[2] = NXT(2);
7184 start[3] = NXT(3);
7185 enc = xmlDetectCharEncoding(start, 4);
7186 if (enc != XML_CHAR_ENCODING_NONE) {
7187 xmlSwitchEncoding(ctxt, enc);
7188 }
7189
7190
7191 if (CUR == 0) {
7192 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7194 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7195 ctxt->wellFormed = 0;
7196 ctxt->disableSAX = 1;
7197 }
7198
7199 /*
7200 * Check for the XMLDecl in the Prolog.
7201 */
7202 GROW;
7203 if ((RAW == '<') && (NXT(1) == '?') &&
7204 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7205 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7206
7207 /*
7208 * Note that we will switch encoding on the fly.
7209 */
7210 xmlParseXMLDecl(ctxt);
7211 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7212 /*
7213 * The XML REC instructs us to stop parsing right here
7214 */
7215 return(-1);
7216 }
7217 ctxt->standalone = ctxt->input->standalone;
7218 SKIP_BLANKS;
7219 } else {
7220 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7221 }
7222 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7223 ctxt->sax->startDocument(ctxt->userData);
7224
7225 /*
7226 * The Misc part of the Prolog
7227 */
7228 GROW;
7229 xmlParseMisc(ctxt);
7230
7231 /*
7232 * Then possibly doc type declaration(s) and more Misc
7233 * (doctypedecl Misc*)?
7234 */
7235 GROW;
7236 if ((RAW == '<') && (NXT(1) == '!') &&
7237 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7238 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7239 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7240 (NXT(8) == 'E')) {
7241
7242 ctxt->inSubset = 1;
7243 xmlParseDocTypeDecl(ctxt);
7244 if (RAW == '[') {
7245 ctxt->instate = XML_PARSER_DTD;
7246 xmlParseInternalSubset(ctxt);
7247 }
7248
7249 /*
7250 * Create and update the external subset.
7251 */
7252 ctxt->inSubset = 2;
7253 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7254 (!ctxt->disableSAX))
7255 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7256 ctxt->extSubSystem, ctxt->extSubURI);
7257 ctxt->inSubset = 0;
7258
7259
7260 ctxt->instate = XML_PARSER_PROLOG;
7261 xmlParseMisc(ctxt);
7262 }
7263
7264 /*
7265 * Time to start parsing the tree itself
7266 */
7267 GROW;
7268 if (RAW != '<') {
7269 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7271 ctxt->sax->error(ctxt->userData,
7272 "Start tag expected, '<' not found\n");
7273 ctxt->wellFormed = 0;
7274 ctxt->disableSAX = 1;
7275 ctxt->instate = XML_PARSER_EOF;
7276 } else {
7277 ctxt->instate = XML_PARSER_CONTENT;
7278 xmlParseElement(ctxt);
7279 ctxt->instate = XML_PARSER_EPILOG;
7280
7281
7282 /*
7283 * The Misc part at the end
7284 */
7285 xmlParseMisc(ctxt);
7286
7287 if (RAW != 0) {
7288 ctxt->errNo = XML_ERR_DOCUMENT_END;
7289 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7290 ctxt->sax->error(ctxt->userData,
7291 "Extra content at the end of the document\n");
7292 ctxt->wellFormed = 0;
7293 ctxt->disableSAX = 1;
7294 }
7295 ctxt->instate = XML_PARSER_EOF;
7296 }
7297
7298 /*
7299 * SAX: end of the document processing.
7300 */
7301 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7302 (!ctxt->disableSAX))
7303 ctxt->sax->endDocument(ctxt->userData);
7304
7305 if (! ctxt->wellFormed) return(-1);
7306 return(0);
7307}
7308
7309/**
7310 * xmlParseExtParsedEnt:
7311 * @ctxt: an XML parser context
7312 *
7313 * parse a genreral parsed entity
7314 * An external general parsed entity is well-formed if it matches the
7315 * production labeled extParsedEnt.
7316 *
7317 * [78] extParsedEnt ::= TextDecl? content
7318 *
7319 * Returns 0, -1 in case of error. the parser context is augmented
7320 * as a result of the parsing.
7321 */
7322
7323int
7324xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7325 xmlChar start[4];
7326 xmlCharEncoding enc;
7327
7328 xmlDefaultSAXHandlerInit();
7329
7330 GROW;
7331
7332 /*
7333 * SAX: beginning of the document processing.
7334 */
7335 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7336 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7337
7338 /*
7339 * Get the 4 first bytes and decode the charset
7340 * if enc != XML_CHAR_ENCODING_NONE
7341 * plug some encoding conversion routines.
7342 */
7343 start[0] = RAW;
7344 start[1] = NXT(1);
7345 start[2] = NXT(2);
7346 start[3] = NXT(3);
7347 enc = xmlDetectCharEncoding(start, 4);
7348 if (enc != XML_CHAR_ENCODING_NONE) {
7349 xmlSwitchEncoding(ctxt, enc);
7350 }
7351
7352
7353 if (CUR == 0) {
7354 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7356 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7357 ctxt->wellFormed = 0;
7358 ctxt->disableSAX = 1;
7359 }
7360
7361 /*
7362 * Check for the XMLDecl in the Prolog.
7363 */
7364 GROW;
7365 if ((RAW == '<') && (NXT(1) == '?') &&
7366 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7367 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7368
7369 /*
7370 * Note that we will switch encoding on the fly.
7371 */
7372 xmlParseXMLDecl(ctxt);
7373 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7374 /*
7375 * The XML REC instructs us to stop parsing right here
7376 */
7377 return(-1);
7378 }
7379 SKIP_BLANKS;
7380 } else {
7381 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7382 }
7383 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7384 ctxt->sax->startDocument(ctxt->userData);
7385
7386 /*
7387 * Doing validity checking on chunk doesn't make sense
7388 */
7389 ctxt->instate = XML_PARSER_CONTENT;
7390 ctxt->validate = 0;
7391 ctxt->loadsubset = 0;
7392 ctxt->depth = 0;
7393
7394 xmlParseContent(ctxt);
7395
7396 if ((RAW == '<') && (NXT(1) == '/')) {
7397 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7399 ctxt->sax->error(ctxt->userData,
7400 "chunk is not well balanced\n");
7401 ctxt->wellFormed = 0;
7402 ctxt->disableSAX = 1;
7403 } else if (RAW != 0) {
7404 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7406 ctxt->sax->error(ctxt->userData,
7407 "extra content at the end of well balanced chunk\n");
7408 ctxt->wellFormed = 0;
7409 ctxt->disableSAX = 1;
7410 }
7411
7412 /*
7413 * SAX: end of the document processing.
7414 */
7415 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7416 (!ctxt->disableSAX))
7417 ctxt->sax->endDocument(ctxt->userData);
7418
7419 if (! ctxt->wellFormed) return(-1);
7420 return(0);
7421}
7422
7423/************************************************************************
7424 * *
7425 * Progressive parsing interfaces *
7426 * *
7427 ************************************************************************/
7428
7429/**
7430 * xmlParseLookupSequence:
7431 * @ctxt: an XML parser context
7432 * @first: the first char to lookup
7433 * @next: the next char to lookup or zero
7434 * @third: the next char to lookup or zero
7435 *
7436 * Try to find if a sequence (first, next, third) or just (first next) or
7437 * (first) is available in the input stream.
7438 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7439 * to avoid rescanning sequences of bytes, it DOES change the state of the
7440 * parser, do not use liberally.
7441 *
7442 * Returns the index to the current parsing point if the full sequence
7443 * is available, -1 otherwise.
7444 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007445static int
Owen Taylor3473f882001-02-23 17:55:21 +00007446xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7447 xmlChar next, xmlChar third) {
7448 int base, len;
7449 xmlParserInputPtr in;
7450 const xmlChar *buf;
7451
7452 in = ctxt->input;
7453 if (in == NULL) return(-1);
7454 base = in->cur - in->base;
7455 if (base < 0) return(-1);
7456 if (ctxt->checkIndex > base)
7457 base = ctxt->checkIndex;
7458 if (in->buf == NULL) {
7459 buf = in->base;
7460 len = in->length;
7461 } else {
7462 buf = in->buf->buffer->content;
7463 len = in->buf->buffer->use;
7464 }
7465 /* take into account the sequence length */
7466 if (third) len -= 2;
7467 else if (next) len --;
7468 for (;base < len;base++) {
7469 if (buf[base] == first) {
7470 if (third != 0) {
7471 if ((buf[base + 1] != next) ||
7472 (buf[base + 2] != third)) continue;
7473 } else if (next != 0) {
7474 if (buf[base + 1] != next) continue;
7475 }
7476 ctxt->checkIndex = 0;
7477#ifdef DEBUG_PUSH
7478 if (next == 0)
7479 xmlGenericError(xmlGenericErrorContext,
7480 "PP: lookup '%c' found at %d\n",
7481 first, base);
7482 else if (third == 0)
7483 xmlGenericError(xmlGenericErrorContext,
7484 "PP: lookup '%c%c' found at %d\n",
7485 first, next, base);
7486 else
7487 xmlGenericError(xmlGenericErrorContext,
7488 "PP: lookup '%c%c%c' found at %d\n",
7489 first, next, third, base);
7490#endif
7491 return(base - (in->cur - in->base));
7492 }
7493 }
7494 ctxt->checkIndex = base;
7495#ifdef DEBUG_PUSH
7496 if (next == 0)
7497 xmlGenericError(xmlGenericErrorContext,
7498 "PP: lookup '%c' failed\n", first);
7499 else if (third == 0)
7500 xmlGenericError(xmlGenericErrorContext,
7501 "PP: lookup '%c%c' failed\n", first, next);
7502 else
7503 xmlGenericError(xmlGenericErrorContext,
7504 "PP: lookup '%c%c%c' failed\n", first, next, third);
7505#endif
7506 return(-1);
7507}
7508
7509/**
7510 * xmlParseTryOrFinish:
7511 * @ctxt: an XML parser context
7512 * @terminate: last chunk indicator
7513 *
7514 * Try to progress on parsing
7515 *
7516 * Returns zero if no parsing was possible
7517 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007518static int
Owen Taylor3473f882001-02-23 17:55:21 +00007519xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7520 int ret = 0;
7521 int avail;
7522 xmlChar cur, next;
7523
7524#ifdef DEBUG_PUSH
7525 switch (ctxt->instate) {
7526 case XML_PARSER_EOF:
7527 xmlGenericError(xmlGenericErrorContext,
7528 "PP: try EOF\n"); break;
7529 case XML_PARSER_START:
7530 xmlGenericError(xmlGenericErrorContext,
7531 "PP: try START\n"); break;
7532 case XML_PARSER_MISC:
7533 xmlGenericError(xmlGenericErrorContext,
7534 "PP: try MISC\n");break;
7535 case XML_PARSER_COMMENT:
7536 xmlGenericError(xmlGenericErrorContext,
7537 "PP: try COMMENT\n");break;
7538 case XML_PARSER_PROLOG:
7539 xmlGenericError(xmlGenericErrorContext,
7540 "PP: try PROLOG\n");break;
7541 case XML_PARSER_START_TAG:
7542 xmlGenericError(xmlGenericErrorContext,
7543 "PP: try START_TAG\n");break;
7544 case XML_PARSER_CONTENT:
7545 xmlGenericError(xmlGenericErrorContext,
7546 "PP: try CONTENT\n");break;
7547 case XML_PARSER_CDATA_SECTION:
7548 xmlGenericError(xmlGenericErrorContext,
7549 "PP: try CDATA_SECTION\n");break;
7550 case XML_PARSER_END_TAG:
7551 xmlGenericError(xmlGenericErrorContext,
7552 "PP: try END_TAG\n");break;
7553 case XML_PARSER_ENTITY_DECL:
7554 xmlGenericError(xmlGenericErrorContext,
7555 "PP: try ENTITY_DECL\n");break;
7556 case XML_PARSER_ENTITY_VALUE:
7557 xmlGenericError(xmlGenericErrorContext,
7558 "PP: try ENTITY_VALUE\n");break;
7559 case XML_PARSER_ATTRIBUTE_VALUE:
7560 xmlGenericError(xmlGenericErrorContext,
7561 "PP: try ATTRIBUTE_VALUE\n");break;
7562 case XML_PARSER_DTD:
7563 xmlGenericError(xmlGenericErrorContext,
7564 "PP: try DTD\n");break;
7565 case XML_PARSER_EPILOG:
7566 xmlGenericError(xmlGenericErrorContext,
7567 "PP: try EPILOG\n");break;
7568 case XML_PARSER_PI:
7569 xmlGenericError(xmlGenericErrorContext,
7570 "PP: try PI\n");break;
7571 case XML_PARSER_IGNORE:
7572 xmlGenericError(xmlGenericErrorContext,
7573 "PP: try IGNORE\n");break;
7574 }
7575#endif
7576
7577 while (1) {
7578 /*
7579 * Pop-up of finished entities.
7580 */
7581 while ((RAW == 0) && (ctxt->inputNr > 1))
7582 xmlPopInput(ctxt);
7583
7584 if (ctxt->input ==NULL) break;
7585 if (ctxt->input->buf == NULL)
7586 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7587 else
7588 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7589 if (avail < 1)
7590 goto done;
7591 switch (ctxt->instate) {
7592 case XML_PARSER_EOF:
7593 /*
7594 * Document parsing is done !
7595 */
7596 goto done;
7597 case XML_PARSER_START:
7598 /*
7599 * Very first chars read from the document flow.
7600 */
Owen Taylor3473f882001-02-23 17:55:21 +00007601 if (avail < 2)
7602 goto done;
7603
7604 cur = ctxt->input->cur[0];
7605 next = ctxt->input->cur[1];
7606 if (cur == 0) {
7607 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7608 ctxt->sax->setDocumentLocator(ctxt->userData,
7609 &xmlDefaultSAXLocator);
7610 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7612 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7613 ctxt->wellFormed = 0;
7614 ctxt->disableSAX = 1;
7615 ctxt->instate = XML_PARSER_EOF;
7616#ifdef DEBUG_PUSH
7617 xmlGenericError(xmlGenericErrorContext,
7618 "PP: entering EOF\n");
7619#endif
7620 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7621 ctxt->sax->endDocument(ctxt->userData);
7622 goto done;
7623 }
7624 if ((cur == '<') && (next == '?')) {
7625 /* PI or XML decl */
7626 if (avail < 5) return(ret);
7627 if ((!terminate) &&
7628 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7629 return(ret);
7630 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7631 ctxt->sax->setDocumentLocator(ctxt->userData,
7632 &xmlDefaultSAXLocator);
7633 if ((ctxt->input->cur[2] == 'x') &&
7634 (ctxt->input->cur[3] == 'm') &&
7635 (ctxt->input->cur[4] == 'l') &&
7636 (IS_BLANK(ctxt->input->cur[5]))) {
7637 ret += 5;
7638#ifdef DEBUG_PUSH
7639 xmlGenericError(xmlGenericErrorContext,
7640 "PP: Parsing XML Decl\n");
7641#endif
7642 xmlParseXMLDecl(ctxt);
7643 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7644 /*
7645 * The XML REC instructs us to stop parsing right
7646 * here
7647 */
7648 ctxt->instate = XML_PARSER_EOF;
7649 return(0);
7650 }
7651 ctxt->standalone = ctxt->input->standalone;
7652 if ((ctxt->encoding == NULL) &&
7653 (ctxt->input->encoding != NULL))
7654 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7655 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7656 (!ctxt->disableSAX))
7657 ctxt->sax->startDocument(ctxt->userData);
7658 ctxt->instate = XML_PARSER_MISC;
7659#ifdef DEBUG_PUSH
7660 xmlGenericError(xmlGenericErrorContext,
7661 "PP: entering MISC\n");
7662#endif
7663 } else {
7664 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7665 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7666 (!ctxt->disableSAX))
7667 ctxt->sax->startDocument(ctxt->userData);
7668 ctxt->instate = XML_PARSER_MISC;
7669#ifdef DEBUG_PUSH
7670 xmlGenericError(xmlGenericErrorContext,
7671 "PP: entering MISC\n");
7672#endif
7673 }
7674 } else {
7675 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7676 ctxt->sax->setDocumentLocator(ctxt->userData,
7677 &xmlDefaultSAXLocator);
7678 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7679 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7680 (!ctxt->disableSAX))
7681 ctxt->sax->startDocument(ctxt->userData);
7682 ctxt->instate = XML_PARSER_MISC;
7683#ifdef DEBUG_PUSH
7684 xmlGenericError(xmlGenericErrorContext,
7685 "PP: entering MISC\n");
7686#endif
7687 }
7688 break;
7689 case XML_PARSER_MISC:
7690 SKIP_BLANKS;
7691 if (ctxt->input->buf == NULL)
7692 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7693 else
7694 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7695 if (avail < 2)
7696 goto done;
7697 cur = ctxt->input->cur[0];
7698 next = ctxt->input->cur[1];
7699 if ((cur == '<') && (next == '?')) {
7700 if ((!terminate) &&
7701 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7702 goto done;
7703#ifdef DEBUG_PUSH
7704 xmlGenericError(xmlGenericErrorContext,
7705 "PP: Parsing PI\n");
7706#endif
7707 xmlParsePI(ctxt);
7708 } else if ((cur == '<') && (next == '!') &&
7709 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7710 if ((!terminate) &&
7711 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7712 goto done;
7713#ifdef DEBUG_PUSH
7714 xmlGenericError(xmlGenericErrorContext,
7715 "PP: Parsing Comment\n");
7716#endif
7717 xmlParseComment(ctxt);
7718 ctxt->instate = XML_PARSER_MISC;
7719 } else if ((cur == '<') && (next == '!') &&
7720 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7721 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7722 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7723 (ctxt->input->cur[8] == 'E')) {
7724 if ((!terminate) &&
7725 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7726 goto done;
7727#ifdef DEBUG_PUSH
7728 xmlGenericError(xmlGenericErrorContext,
7729 "PP: Parsing internal subset\n");
7730#endif
7731 ctxt->inSubset = 1;
7732 xmlParseDocTypeDecl(ctxt);
7733 if (RAW == '[') {
7734 ctxt->instate = XML_PARSER_DTD;
7735#ifdef DEBUG_PUSH
7736 xmlGenericError(xmlGenericErrorContext,
7737 "PP: entering DTD\n");
7738#endif
7739 } else {
7740 /*
7741 * Create and update the external subset.
7742 */
7743 ctxt->inSubset = 2;
7744 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7745 (ctxt->sax->externalSubset != NULL))
7746 ctxt->sax->externalSubset(ctxt->userData,
7747 ctxt->intSubName, ctxt->extSubSystem,
7748 ctxt->extSubURI);
7749 ctxt->inSubset = 0;
7750 ctxt->instate = XML_PARSER_PROLOG;
7751#ifdef DEBUG_PUSH
7752 xmlGenericError(xmlGenericErrorContext,
7753 "PP: entering PROLOG\n");
7754#endif
7755 }
7756 } else if ((cur == '<') && (next == '!') &&
7757 (avail < 9)) {
7758 goto done;
7759 } else {
7760 ctxt->instate = XML_PARSER_START_TAG;
7761#ifdef DEBUG_PUSH
7762 xmlGenericError(xmlGenericErrorContext,
7763 "PP: entering START_TAG\n");
7764#endif
7765 }
7766 break;
7767 case XML_PARSER_IGNORE:
7768 xmlGenericError(xmlGenericErrorContext,
7769 "PP: internal error, state == IGNORE");
7770 ctxt->instate = XML_PARSER_DTD;
7771#ifdef DEBUG_PUSH
7772 xmlGenericError(xmlGenericErrorContext,
7773 "PP: entering DTD\n");
7774#endif
7775 break;
7776 case XML_PARSER_PROLOG:
7777 SKIP_BLANKS;
7778 if (ctxt->input->buf == NULL)
7779 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7780 else
7781 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7782 if (avail < 2)
7783 goto done;
7784 cur = ctxt->input->cur[0];
7785 next = ctxt->input->cur[1];
7786 if ((cur == '<') && (next == '?')) {
7787 if ((!terminate) &&
7788 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7789 goto done;
7790#ifdef DEBUG_PUSH
7791 xmlGenericError(xmlGenericErrorContext,
7792 "PP: Parsing PI\n");
7793#endif
7794 xmlParsePI(ctxt);
7795 } else if ((cur == '<') && (next == '!') &&
7796 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7797 if ((!terminate) &&
7798 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7799 goto done;
7800#ifdef DEBUG_PUSH
7801 xmlGenericError(xmlGenericErrorContext,
7802 "PP: Parsing Comment\n");
7803#endif
7804 xmlParseComment(ctxt);
7805 ctxt->instate = XML_PARSER_PROLOG;
7806 } else if ((cur == '<') && (next == '!') &&
7807 (avail < 4)) {
7808 goto done;
7809 } else {
7810 ctxt->instate = XML_PARSER_START_TAG;
7811#ifdef DEBUG_PUSH
7812 xmlGenericError(xmlGenericErrorContext,
7813 "PP: entering START_TAG\n");
7814#endif
7815 }
7816 break;
7817 case XML_PARSER_EPILOG:
7818 SKIP_BLANKS;
7819 if (ctxt->input->buf == NULL)
7820 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7821 else
7822 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7823 if (avail < 2)
7824 goto done;
7825 cur = ctxt->input->cur[0];
7826 next = ctxt->input->cur[1];
7827 if ((cur == '<') && (next == '?')) {
7828 if ((!terminate) &&
7829 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7830 goto done;
7831#ifdef DEBUG_PUSH
7832 xmlGenericError(xmlGenericErrorContext,
7833 "PP: Parsing PI\n");
7834#endif
7835 xmlParsePI(ctxt);
7836 ctxt->instate = XML_PARSER_EPILOG;
7837 } else if ((cur == '<') && (next == '!') &&
7838 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7839 if ((!terminate) &&
7840 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7841 goto done;
7842#ifdef DEBUG_PUSH
7843 xmlGenericError(xmlGenericErrorContext,
7844 "PP: Parsing Comment\n");
7845#endif
7846 xmlParseComment(ctxt);
7847 ctxt->instate = XML_PARSER_EPILOG;
7848 } else if ((cur == '<') && (next == '!') &&
7849 (avail < 4)) {
7850 goto done;
7851 } else {
7852 ctxt->errNo = XML_ERR_DOCUMENT_END;
7853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7854 ctxt->sax->error(ctxt->userData,
7855 "Extra content at the end of the document\n");
7856 ctxt->wellFormed = 0;
7857 ctxt->disableSAX = 1;
7858 ctxt->instate = XML_PARSER_EOF;
7859#ifdef DEBUG_PUSH
7860 xmlGenericError(xmlGenericErrorContext,
7861 "PP: entering EOF\n");
7862#endif
7863 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7864 (!ctxt->disableSAX))
7865 ctxt->sax->endDocument(ctxt->userData);
7866 goto done;
7867 }
7868 break;
7869 case XML_PARSER_START_TAG: {
7870 xmlChar *name, *oldname;
7871
7872 if ((avail < 2) && (ctxt->inputNr == 1))
7873 goto done;
7874 cur = ctxt->input->cur[0];
7875 if (cur != '<') {
7876 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7878 ctxt->sax->error(ctxt->userData,
7879 "Start tag expect, '<' not found\n");
7880 ctxt->wellFormed = 0;
7881 ctxt->disableSAX = 1;
7882 ctxt->instate = XML_PARSER_EOF;
7883#ifdef DEBUG_PUSH
7884 xmlGenericError(xmlGenericErrorContext,
7885 "PP: entering EOF\n");
7886#endif
7887 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7888 (!ctxt->disableSAX))
7889 ctxt->sax->endDocument(ctxt->userData);
7890 goto done;
7891 }
7892 if ((!terminate) &&
7893 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7894 goto done;
7895 if (ctxt->spaceNr == 0)
7896 spacePush(ctxt, -1);
7897 else
7898 spacePush(ctxt, *ctxt->space);
7899 name = xmlParseStartTag(ctxt);
7900 if (name == NULL) {
7901 spacePop(ctxt);
7902 ctxt->instate = XML_PARSER_EOF;
7903#ifdef DEBUG_PUSH
7904 xmlGenericError(xmlGenericErrorContext,
7905 "PP: entering EOF\n");
7906#endif
7907 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7908 (!ctxt->disableSAX))
7909 ctxt->sax->endDocument(ctxt->userData);
7910 goto done;
7911 }
7912 namePush(ctxt, xmlStrdup(name));
7913
7914 /*
7915 * [ VC: Root Element Type ]
7916 * The Name in the document type declaration must match
7917 * the element type of the root element.
7918 */
7919 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7920 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7921 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7922
7923 /*
7924 * Check for an Empty Element.
7925 */
7926 if ((RAW == '/') && (NXT(1) == '>')) {
7927 SKIP(2);
7928 if ((ctxt->sax != NULL) &&
7929 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7930 ctxt->sax->endElement(ctxt->userData, name);
7931 xmlFree(name);
7932 oldname = namePop(ctxt);
7933 spacePop(ctxt);
7934 if (oldname != NULL) {
7935#ifdef DEBUG_STACK
7936 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7937#endif
7938 xmlFree(oldname);
7939 }
7940 if (ctxt->name == NULL) {
7941 ctxt->instate = XML_PARSER_EPILOG;
7942#ifdef DEBUG_PUSH
7943 xmlGenericError(xmlGenericErrorContext,
7944 "PP: entering EPILOG\n");
7945#endif
7946 } else {
7947 ctxt->instate = XML_PARSER_CONTENT;
7948#ifdef DEBUG_PUSH
7949 xmlGenericError(xmlGenericErrorContext,
7950 "PP: entering CONTENT\n");
7951#endif
7952 }
7953 break;
7954 }
7955 if (RAW == '>') {
7956 NEXT;
7957 } else {
7958 ctxt->errNo = XML_ERR_GT_REQUIRED;
7959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7960 ctxt->sax->error(ctxt->userData,
7961 "Couldn't find end of Start Tag %s\n",
7962 name);
7963 ctxt->wellFormed = 0;
7964 ctxt->disableSAX = 1;
7965
7966 /*
7967 * end of parsing of this node.
7968 */
7969 nodePop(ctxt);
7970 oldname = namePop(ctxt);
7971 spacePop(ctxt);
7972 if (oldname != NULL) {
7973#ifdef DEBUG_STACK
7974 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7975#endif
7976 xmlFree(oldname);
7977 }
7978 }
7979 xmlFree(name);
7980 ctxt->instate = XML_PARSER_CONTENT;
7981#ifdef DEBUG_PUSH
7982 xmlGenericError(xmlGenericErrorContext,
7983 "PP: entering CONTENT\n");
7984#endif
7985 break;
7986 }
7987 case XML_PARSER_CONTENT: {
7988 const xmlChar *test;
7989 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00007990 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00007991
7992 /*
7993 * Handle preparsed entities and charRef
7994 */
7995 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007996 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00007997
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007998 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00007999 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8000 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008001 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008002 ctxt->token = 0;
8003 }
8004 if ((avail < 2) && (ctxt->inputNr == 1))
8005 goto done;
8006 cur = ctxt->input->cur[0];
8007 next = ctxt->input->cur[1];
8008
8009 test = CUR_PTR;
8010 cons = ctxt->input->consumed;
8011 tok = ctxt->token;
8012 if ((cur == '<') && (next == '?')) {
8013 if ((!terminate) &&
8014 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8015 goto done;
8016#ifdef DEBUG_PUSH
8017 xmlGenericError(xmlGenericErrorContext,
8018 "PP: Parsing PI\n");
8019#endif
8020 xmlParsePI(ctxt);
8021 } else if ((cur == '<') && (next == '!') &&
8022 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8023 if ((!terminate) &&
8024 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8025 goto done;
8026#ifdef DEBUG_PUSH
8027 xmlGenericError(xmlGenericErrorContext,
8028 "PP: Parsing Comment\n");
8029#endif
8030 xmlParseComment(ctxt);
8031 ctxt->instate = XML_PARSER_CONTENT;
8032 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8033 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8034 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8035 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8036 (ctxt->input->cur[8] == '[')) {
8037 SKIP(9);
8038 ctxt->instate = XML_PARSER_CDATA_SECTION;
8039#ifdef DEBUG_PUSH
8040 xmlGenericError(xmlGenericErrorContext,
8041 "PP: entering CDATA_SECTION\n");
8042#endif
8043 break;
8044 } else if ((cur == '<') && (next == '!') &&
8045 (avail < 9)) {
8046 goto done;
8047 } else if ((cur == '<') && (next == '/')) {
8048 ctxt->instate = XML_PARSER_END_TAG;
8049#ifdef DEBUG_PUSH
8050 xmlGenericError(xmlGenericErrorContext,
8051 "PP: entering END_TAG\n");
8052#endif
8053 break;
8054 } else if (cur == '<') {
8055 ctxt->instate = XML_PARSER_START_TAG;
8056#ifdef DEBUG_PUSH
8057 xmlGenericError(xmlGenericErrorContext,
8058 "PP: entering START_TAG\n");
8059#endif
8060 break;
8061 } else if (cur == '&') {
8062 if ((!terminate) &&
8063 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8064 goto done;
8065#ifdef DEBUG_PUSH
8066 xmlGenericError(xmlGenericErrorContext,
8067 "PP: Parsing Reference\n");
8068#endif
8069 xmlParseReference(ctxt);
8070 } else {
8071 /* TODO Avoid the extra copy, handle directly !!! */
8072 /*
8073 * Goal of the following test is:
8074 * - minimize calls to the SAX 'character' callback
8075 * when they are mergeable
8076 * - handle an problem for isBlank when we only parse
8077 * a sequence of blank chars and the next one is
8078 * not available to check against '<' presence.
8079 * - tries to homogenize the differences in SAX
8080 * callbacks beween the push and pull versions
8081 * of the parser.
8082 */
8083 if ((ctxt->inputNr == 1) &&
8084 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8085 if ((!terminate) &&
8086 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8087 goto done;
8088 }
8089 ctxt->checkIndex = 0;
8090#ifdef DEBUG_PUSH
8091 xmlGenericError(xmlGenericErrorContext,
8092 "PP: Parsing char data\n");
8093#endif
8094 xmlParseCharData(ctxt, 0);
8095 }
8096 /*
8097 * Pop-up of finished entities.
8098 */
8099 while ((RAW == 0) && (ctxt->inputNr > 1))
8100 xmlPopInput(ctxt);
8101 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8102 (tok == ctxt->token)) {
8103 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8105 ctxt->sax->error(ctxt->userData,
8106 "detected an error in element content\n");
8107 ctxt->wellFormed = 0;
8108 ctxt->disableSAX = 1;
8109 ctxt->instate = XML_PARSER_EOF;
8110 break;
8111 }
8112 break;
8113 }
8114 case XML_PARSER_CDATA_SECTION: {
8115 /*
8116 * The Push mode need to have the SAX callback for
8117 * cdataBlock merge back contiguous callbacks.
8118 */
8119 int base;
8120
8121 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8122 if (base < 0) {
8123 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8124 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8125 if (ctxt->sax->cdataBlock != NULL)
8126 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8127 XML_PARSER_BIG_BUFFER_SIZE);
8128 }
8129 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8130 ctxt->checkIndex = 0;
8131 }
8132 goto done;
8133 } else {
8134 if ((ctxt->sax != NULL) && (base > 0) &&
8135 (!ctxt->disableSAX)) {
8136 if (ctxt->sax->cdataBlock != NULL)
8137 ctxt->sax->cdataBlock(ctxt->userData,
8138 ctxt->input->cur, base);
8139 }
8140 SKIP(base + 3);
8141 ctxt->checkIndex = 0;
8142 ctxt->instate = XML_PARSER_CONTENT;
8143#ifdef DEBUG_PUSH
8144 xmlGenericError(xmlGenericErrorContext,
8145 "PP: entering CONTENT\n");
8146#endif
8147 }
8148 break;
8149 }
8150 case XML_PARSER_END_TAG:
8151 if (avail < 2)
8152 goto done;
8153 if ((!terminate) &&
8154 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8155 goto done;
8156 xmlParseEndTag(ctxt);
8157 if (ctxt->name == NULL) {
8158 ctxt->instate = XML_PARSER_EPILOG;
8159#ifdef DEBUG_PUSH
8160 xmlGenericError(xmlGenericErrorContext,
8161 "PP: entering EPILOG\n");
8162#endif
8163 } else {
8164 ctxt->instate = XML_PARSER_CONTENT;
8165#ifdef DEBUG_PUSH
8166 xmlGenericError(xmlGenericErrorContext,
8167 "PP: entering CONTENT\n");
8168#endif
8169 }
8170 break;
8171 case XML_PARSER_DTD: {
8172 /*
8173 * Sorry but progressive parsing of the internal subset
8174 * is not expected to be supported. We first check that
8175 * the full content of the internal subset is available and
8176 * the parsing is launched only at that point.
8177 * Internal subset ends up with "']' S? '>'" in an unescaped
8178 * section and not in a ']]>' sequence which are conditional
8179 * sections (whoever argued to keep that crap in XML deserve
8180 * a place in hell !).
8181 */
8182 int base, i;
8183 xmlChar *buf;
8184 xmlChar quote = 0;
8185
8186 base = ctxt->input->cur - ctxt->input->base;
8187 if (base < 0) return(0);
8188 if (ctxt->checkIndex > base)
8189 base = ctxt->checkIndex;
8190 buf = ctxt->input->buf->buffer->content;
8191 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8192 base++) {
8193 if (quote != 0) {
8194 if (buf[base] == quote)
8195 quote = 0;
8196 continue;
8197 }
8198 if (buf[base] == '"') {
8199 quote = '"';
8200 continue;
8201 }
8202 if (buf[base] == '\'') {
8203 quote = '\'';
8204 continue;
8205 }
8206 if (buf[base] == ']') {
8207 if ((unsigned int) base +1 >=
8208 ctxt->input->buf->buffer->use)
8209 break;
8210 if (buf[base + 1] == ']') {
8211 /* conditional crap, skip both ']' ! */
8212 base++;
8213 continue;
8214 }
8215 for (i = 0;
8216 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8217 i++) {
8218 if (buf[base + i] == '>')
8219 goto found_end_int_subset;
8220 }
8221 break;
8222 }
8223 }
8224 /*
8225 * We didn't found the end of the Internal subset
8226 */
8227 if (quote == 0)
8228 ctxt->checkIndex = base;
8229#ifdef DEBUG_PUSH
8230 if (next == 0)
8231 xmlGenericError(xmlGenericErrorContext,
8232 "PP: lookup of int subset end filed\n");
8233#endif
8234 goto done;
8235
8236found_end_int_subset:
8237 xmlParseInternalSubset(ctxt);
8238 ctxt->inSubset = 2;
8239 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8240 (ctxt->sax->externalSubset != NULL))
8241 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8242 ctxt->extSubSystem, ctxt->extSubURI);
8243 ctxt->inSubset = 0;
8244 ctxt->instate = XML_PARSER_PROLOG;
8245 ctxt->checkIndex = 0;
8246#ifdef DEBUG_PUSH
8247 xmlGenericError(xmlGenericErrorContext,
8248 "PP: entering PROLOG\n");
8249#endif
8250 break;
8251 }
8252 case XML_PARSER_COMMENT:
8253 xmlGenericError(xmlGenericErrorContext,
8254 "PP: internal error, state == COMMENT\n");
8255 ctxt->instate = XML_PARSER_CONTENT;
8256#ifdef DEBUG_PUSH
8257 xmlGenericError(xmlGenericErrorContext,
8258 "PP: entering CONTENT\n");
8259#endif
8260 break;
8261 case XML_PARSER_PI:
8262 xmlGenericError(xmlGenericErrorContext,
8263 "PP: internal error, state == PI\n");
8264 ctxt->instate = XML_PARSER_CONTENT;
8265#ifdef DEBUG_PUSH
8266 xmlGenericError(xmlGenericErrorContext,
8267 "PP: entering CONTENT\n");
8268#endif
8269 break;
8270 case XML_PARSER_ENTITY_DECL:
8271 xmlGenericError(xmlGenericErrorContext,
8272 "PP: internal error, state == ENTITY_DECL\n");
8273 ctxt->instate = XML_PARSER_DTD;
8274#ifdef DEBUG_PUSH
8275 xmlGenericError(xmlGenericErrorContext,
8276 "PP: entering DTD\n");
8277#endif
8278 break;
8279 case XML_PARSER_ENTITY_VALUE:
8280 xmlGenericError(xmlGenericErrorContext,
8281 "PP: internal error, state == ENTITY_VALUE\n");
8282 ctxt->instate = XML_PARSER_CONTENT;
8283#ifdef DEBUG_PUSH
8284 xmlGenericError(xmlGenericErrorContext,
8285 "PP: entering DTD\n");
8286#endif
8287 break;
8288 case XML_PARSER_ATTRIBUTE_VALUE:
8289 xmlGenericError(xmlGenericErrorContext,
8290 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8291 ctxt->instate = XML_PARSER_START_TAG;
8292#ifdef DEBUG_PUSH
8293 xmlGenericError(xmlGenericErrorContext,
8294 "PP: entering START_TAG\n");
8295#endif
8296 break;
8297 case XML_PARSER_SYSTEM_LITERAL:
8298 xmlGenericError(xmlGenericErrorContext,
8299 "PP: internal error, state == SYSTEM_LITERAL\n");
8300 ctxt->instate = XML_PARSER_START_TAG;
8301#ifdef DEBUG_PUSH
8302 xmlGenericError(xmlGenericErrorContext,
8303 "PP: entering START_TAG\n");
8304#endif
8305 break;
8306 }
8307 }
8308done:
8309#ifdef DEBUG_PUSH
8310 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8311#endif
8312 return(ret);
8313}
8314
8315/**
Owen Taylor3473f882001-02-23 17:55:21 +00008316 * xmlParseChunk:
8317 * @ctxt: an XML parser context
8318 * @chunk: an char array
8319 * @size: the size in byte of the chunk
8320 * @terminate: last chunk indicator
8321 *
8322 * Parse a Chunk of memory
8323 *
8324 * Returns zero if no error, the xmlParserErrors otherwise.
8325 */
8326int
8327xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8328 int terminate) {
8329 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8330 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8331 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8332 int cur = ctxt->input->cur - ctxt->input->base;
8333
8334 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8335 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8336 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008337 ctxt->input->end =
8338 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008339#ifdef DEBUG_PUSH
8340 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8341#endif
8342
8343 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8344 xmlParseTryOrFinish(ctxt, terminate);
8345 } else if (ctxt->instate != XML_PARSER_EOF) {
8346 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8347 xmlParserInputBufferPtr in = ctxt->input->buf;
8348 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8349 (in->raw != NULL)) {
8350 int nbchars;
8351
8352 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8353 if (nbchars < 0) {
8354 xmlGenericError(xmlGenericErrorContext,
8355 "xmlParseChunk: encoder error\n");
8356 return(XML_ERR_INVALID_ENCODING);
8357 }
8358 }
8359 }
8360 }
8361 xmlParseTryOrFinish(ctxt, terminate);
8362 if (terminate) {
8363 /*
8364 * Check for termination
8365 */
8366 if ((ctxt->instate != XML_PARSER_EOF) &&
8367 (ctxt->instate != XML_PARSER_EPILOG)) {
8368 ctxt->errNo = XML_ERR_DOCUMENT_END;
8369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8370 ctxt->sax->error(ctxt->userData,
8371 "Extra content at the end of the document\n");
8372 ctxt->wellFormed = 0;
8373 ctxt->disableSAX = 1;
8374 }
8375 if (ctxt->instate != XML_PARSER_EOF) {
8376 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8377 (!ctxt->disableSAX))
8378 ctxt->sax->endDocument(ctxt->userData);
8379 }
8380 ctxt->instate = XML_PARSER_EOF;
8381 }
8382 return((xmlParserErrors) ctxt->errNo);
8383}
8384
8385/************************************************************************
8386 * *
8387 * I/O front end functions to the parser *
8388 * *
8389 ************************************************************************/
8390
8391/**
8392 * xmlStopParser:
8393 * @ctxt: an XML parser context
8394 *
8395 * Blocks further parser processing
8396 */
8397void
8398xmlStopParser(xmlParserCtxtPtr ctxt) {
8399 ctxt->instate = XML_PARSER_EOF;
8400 if (ctxt->input != NULL)
8401 ctxt->input->cur = BAD_CAST"";
8402}
8403
8404/**
8405 * xmlCreatePushParserCtxt:
8406 * @sax: a SAX handler
8407 * @user_data: The user data returned on SAX callbacks
8408 * @chunk: a pointer to an array of chars
8409 * @size: number of chars in the array
8410 * @filename: an optional file name or URI
8411 *
8412 * Create a parser context for using the XML parser in push mode
8413 * To allow content encoding detection, @size should be >= 4
8414 * The value of @filename is used for fetching external entities
8415 * and error/warning reports.
8416 *
8417 * Returns the new parser context or NULL
8418 */
8419xmlParserCtxtPtr
8420xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8421 const char *chunk, int size, const char *filename) {
8422 xmlParserCtxtPtr ctxt;
8423 xmlParserInputPtr inputStream;
8424 xmlParserInputBufferPtr buf;
8425 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8426
8427 /*
8428 * plug some encoding conversion routines
8429 */
8430 if ((chunk != NULL) && (size >= 4))
8431 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8432
8433 buf = xmlAllocParserInputBuffer(enc);
8434 if (buf == NULL) return(NULL);
8435
8436 ctxt = xmlNewParserCtxt();
8437 if (ctxt == NULL) {
8438 xmlFree(buf);
8439 return(NULL);
8440 }
8441 if (sax != NULL) {
8442 if (ctxt->sax != &xmlDefaultSAXHandler)
8443 xmlFree(ctxt->sax);
8444 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8445 if (ctxt->sax == NULL) {
8446 xmlFree(buf);
8447 xmlFree(ctxt);
8448 return(NULL);
8449 }
8450 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8451 if (user_data != NULL)
8452 ctxt->userData = user_data;
8453 }
8454 if (filename == NULL) {
8455 ctxt->directory = NULL;
8456 } else {
8457 ctxt->directory = xmlParserGetDirectory(filename);
8458 }
8459
8460 inputStream = xmlNewInputStream(ctxt);
8461 if (inputStream == NULL) {
8462 xmlFreeParserCtxt(ctxt);
8463 return(NULL);
8464 }
8465
8466 if (filename == NULL)
8467 inputStream->filename = NULL;
8468 else
8469 inputStream->filename = xmlMemStrdup(filename);
8470 inputStream->buf = buf;
8471 inputStream->base = inputStream->buf->buffer->content;
8472 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008473 inputStream->end =
8474 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008475 if (enc != XML_CHAR_ENCODING_NONE) {
8476 xmlSwitchEncoding(ctxt, enc);
8477 }
8478
8479 inputPush(ctxt, inputStream);
8480
8481 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8482 (ctxt->input->buf != NULL)) {
8483 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8484#ifdef DEBUG_PUSH
8485 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8486#endif
8487 }
8488
8489 return(ctxt);
8490}
8491
8492/**
8493 * xmlCreateIOParserCtxt:
8494 * @sax: a SAX handler
8495 * @user_data: The user data returned on SAX callbacks
8496 * @ioread: an I/O read function
8497 * @ioclose: an I/O close function
8498 * @ioctx: an I/O handler
8499 * @enc: the charset encoding if known
8500 *
8501 * Create a parser context for using the XML parser with an existing
8502 * I/O stream
8503 *
8504 * Returns the new parser context or NULL
8505 */
8506xmlParserCtxtPtr
8507xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8508 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8509 void *ioctx, xmlCharEncoding enc) {
8510 xmlParserCtxtPtr ctxt;
8511 xmlParserInputPtr inputStream;
8512 xmlParserInputBufferPtr buf;
8513
8514 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8515 if (buf == NULL) return(NULL);
8516
8517 ctxt = xmlNewParserCtxt();
8518 if (ctxt == NULL) {
8519 xmlFree(buf);
8520 return(NULL);
8521 }
8522 if (sax != NULL) {
8523 if (ctxt->sax != &xmlDefaultSAXHandler)
8524 xmlFree(ctxt->sax);
8525 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8526 if (ctxt->sax == NULL) {
8527 xmlFree(buf);
8528 xmlFree(ctxt);
8529 return(NULL);
8530 }
8531 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8532 if (user_data != NULL)
8533 ctxt->userData = user_data;
8534 }
8535
8536 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8537 if (inputStream == NULL) {
8538 xmlFreeParserCtxt(ctxt);
8539 return(NULL);
8540 }
8541 inputPush(ctxt, inputStream);
8542
8543 return(ctxt);
8544}
8545
8546/************************************************************************
8547 * *
8548 * Front ends when parsing a Dtd *
8549 * *
8550 ************************************************************************/
8551
8552/**
8553 * xmlIOParseDTD:
8554 * @sax: the SAX handler block or NULL
8555 * @input: an Input Buffer
8556 * @enc: the charset encoding if known
8557 *
8558 * Load and parse a DTD
8559 *
8560 * Returns the resulting xmlDtdPtr or NULL in case of error.
8561 * @input will be freed at parsing end.
8562 */
8563
8564xmlDtdPtr
8565xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8566 xmlCharEncoding enc) {
8567 xmlDtdPtr ret = NULL;
8568 xmlParserCtxtPtr ctxt;
8569 xmlParserInputPtr pinput = NULL;
8570
8571 if (input == NULL)
8572 return(NULL);
8573
8574 ctxt = xmlNewParserCtxt();
8575 if (ctxt == NULL) {
8576 return(NULL);
8577 }
8578
8579 /*
8580 * Set-up the SAX context
8581 */
8582 if (sax != NULL) {
8583 if (ctxt->sax != NULL)
8584 xmlFree(ctxt->sax);
8585 ctxt->sax = sax;
8586 ctxt->userData = NULL;
8587 }
8588
8589 /*
8590 * generate a parser input from the I/O handler
8591 */
8592
8593 pinput = xmlNewIOInputStream(ctxt, input, enc);
8594 if (pinput == NULL) {
8595 if (sax != NULL) ctxt->sax = NULL;
8596 xmlFreeParserCtxt(ctxt);
8597 return(NULL);
8598 }
8599
8600 /*
8601 * plug some encoding conversion routines here.
8602 */
8603 xmlPushInput(ctxt, pinput);
8604
8605 pinput->filename = NULL;
8606 pinput->line = 1;
8607 pinput->col = 1;
8608 pinput->base = ctxt->input->cur;
8609 pinput->cur = ctxt->input->cur;
8610 pinput->free = NULL;
8611
8612 /*
8613 * let's parse that entity knowing it's an external subset.
8614 */
8615 ctxt->inSubset = 2;
8616 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8617 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8618 BAD_CAST "none", BAD_CAST "none");
8619 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8620
8621 if (ctxt->myDoc != NULL) {
8622 if (ctxt->wellFormed) {
8623 ret = ctxt->myDoc->extSubset;
8624 ctxt->myDoc->extSubset = NULL;
8625 } else {
8626 ret = NULL;
8627 }
8628 xmlFreeDoc(ctxt->myDoc);
8629 ctxt->myDoc = NULL;
8630 }
8631 if (sax != NULL) ctxt->sax = NULL;
8632 xmlFreeParserCtxt(ctxt);
8633
8634 return(ret);
8635}
8636
8637/**
8638 * xmlSAXParseDTD:
8639 * @sax: the SAX handler block
8640 * @ExternalID: a NAME* containing the External ID of the DTD
8641 * @SystemID: a NAME* containing the URL to the DTD
8642 *
8643 * Load and parse an external subset.
8644 *
8645 * Returns the resulting xmlDtdPtr or NULL in case of error.
8646 */
8647
8648xmlDtdPtr
8649xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8650 const xmlChar *SystemID) {
8651 xmlDtdPtr ret = NULL;
8652 xmlParserCtxtPtr ctxt;
8653 xmlParserInputPtr input = NULL;
8654 xmlCharEncoding enc;
8655
8656 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8657
8658 ctxt = xmlNewParserCtxt();
8659 if (ctxt == NULL) {
8660 return(NULL);
8661 }
8662
8663 /*
8664 * Set-up the SAX context
8665 */
8666 if (sax != NULL) {
8667 if (ctxt->sax != NULL)
8668 xmlFree(ctxt->sax);
8669 ctxt->sax = sax;
8670 ctxt->userData = NULL;
8671 }
8672
8673 /*
8674 * Ask the Entity resolver to load the damn thing
8675 */
8676
8677 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8678 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8679 if (input == NULL) {
8680 if (sax != NULL) ctxt->sax = NULL;
8681 xmlFreeParserCtxt(ctxt);
8682 return(NULL);
8683 }
8684
8685 /*
8686 * plug some encoding conversion routines here.
8687 */
8688 xmlPushInput(ctxt, input);
8689 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8690 xmlSwitchEncoding(ctxt, enc);
8691
8692 if (input->filename == NULL)
8693 input->filename = (char *) xmlStrdup(SystemID);
8694 input->line = 1;
8695 input->col = 1;
8696 input->base = ctxt->input->cur;
8697 input->cur = ctxt->input->cur;
8698 input->free = NULL;
8699
8700 /*
8701 * let's parse that entity knowing it's an external subset.
8702 */
8703 ctxt->inSubset = 2;
8704 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8705 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8706 ExternalID, SystemID);
8707 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8708
8709 if (ctxt->myDoc != NULL) {
8710 if (ctxt->wellFormed) {
8711 ret = ctxt->myDoc->extSubset;
8712 ctxt->myDoc->extSubset = NULL;
8713 } else {
8714 ret = NULL;
8715 }
8716 xmlFreeDoc(ctxt->myDoc);
8717 ctxt->myDoc = NULL;
8718 }
8719 if (sax != NULL) ctxt->sax = NULL;
8720 xmlFreeParserCtxt(ctxt);
8721
8722 return(ret);
8723}
8724
8725/**
8726 * xmlParseDTD:
8727 * @ExternalID: a NAME* containing the External ID of the DTD
8728 * @SystemID: a NAME* containing the URL to the DTD
8729 *
8730 * Load and parse an external subset.
8731 *
8732 * Returns the resulting xmlDtdPtr or NULL in case of error.
8733 */
8734
8735xmlDtdPtr
8736xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8737 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8738}
8739
8740/************************************************************************
8741 * *
8742 * Front ends when parsing an Entity *
8743 * *
8744 ************************************************************************/
8745
8746/**
Owen Taylor3473f882001-02-23 17:55:21 +00008747 * xmlParseCtxtExternalEntity:
8748 * @ctx: the existing parsing context
8749 * @URL: the URL for the entity to load
8750 * @ID: the System ID for the entity to load
8751 * @list: the return value for the set of parsed nodes
8752 *
8753 * Parse an external general entity within an existing parsing context
8754 * An external general parsed entity is well-formed if it matches the
8755 * production labeled extParsedEnt.
8756 *
8757 * [78] extParsedEnt ::= TextDecl? content
8758 *
8759 * Returns 0 if the entity is well formed, -1 in case of args problem and
8760 * the parser error code otherwise
8761 */
8762
8763int
8764xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8765 const xmlChar *ID, xmlNodePtr *list) {
8766 xmlParserCtxtPtr ctxt;
8767 xmlDocPtr newDoc;
8768 xmlSAXHandlerPtr oldsax = NULL;
8769 int ret = 0;
8770
8771 if (ctx->depth > 40) {
8772 return(XML_ERR_ENTITY_LOOP);
8773 }
8774
8775 if (list != NULL)
8776 *list = NULL;
8777 if ((URL == NULL) && (ID == NULL))
8778 return(-1);
8779 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8780 return(-1);
8781
8782
8783 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8784 if (ctxt == NULL) return(-1);
8785 ctxt->userData = ctxt;
8786 oldsax = ctxt->sax;
8787 ctxt->sax = ctx->sax;
8788 newDoc = xmlNewDoc(BAD_CAST "1.0");
8789 if (newDoc == NULL) {
8790 xmlFreeParserCtxt(ctxt);
8791 return(-1);
8792 }
8793 if (ctx->myDoc != NULL) {
8794 newDoc->intSubset = ctx->myDoc->intSubset;
8795 newDoc->extSubset = ctx->myDoc->extSubset;
8796 }
8797 if (ctx->myDoc->URL != NULL) {
8798 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8799 }
8800 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8801 if (newDoc->children == NULL) {
8802 ctxt->sax = oldsax;
8803 xmlFreeParserCtxt(ctxt);
8804 newDoc->intSubset = NULL;
8805 newDoc->extSubset = NULL;
8806 xmlFreeDoc(newDoc);
8807 return(-1);
8808 }
8809 nodePush(ctxt, newDoc->children);
8810 if (ctx->myDoc == NULL) {
8811 ctxt->myDoc = newDoc;
8812 } else {
8813 ctxt->myDoc = ctx->myDoc;
8814 newDoc->children->doc = ctx->myDoc;
8815 }
8816
8817 /*
8818 * Parse a possible text declaration first
8819 */
8820 GROW;
8821 if ((RAW == '<') && (NXT(1) == '?') &&
8822 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8823 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8824 xmlParseTextDecl(ctxt);
8825 }
8826
8827 /*
8828 * Doing validity checking on chunk doesn't make sense
8829 */
8830 ctxt->instate = XML_PARSER_CONTENT;
8831 ctxt->validate = ctx->validate;
8832 ctxt->loadsubset = ctx->loadsubset;
8833 ctxt->depth = ctx->depth + 1;
8834 ctxt->replaceEntities = ctx->replaceEntities;
8835 if (ctxt->validate) {
8836 ctxt->vctxt.error = ctx->vctxt.error;
8837 ctxt->vctxt.warning = ctx->vctxt.warning;
8838 /* Allocate the Node stack */
8839 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8840 if (ctxt->vctxt.nodeTab == NULL) {
8841 xmlGenericError(xmlGenericErrorContext,
8842 "xmlParseCtxtExternalEntity: out of memory\n");
8843 ctxt->validate = 0;
8844 ctxt->vctxt.error = NULL;
8845 ctxt->vctxt.warning = NULL;
8846 } else {
8847 ctxt->vctxt.nodeNr = 0;
8848 ctxt->vctxt.nodeMax = 4;
8849 ctxt->vctxt.node = NULL;
8850 }
8851 } else {
8852 ctxt->vctxt.error = NULL;
8853 ctxt->vctxt.warning = NULL;
8854 }
8855
8856 xmlParseContent(ctxt);
8857
8858 if ((RAW == '<') && (NXT(1) == '/')) {
8859 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8861 ctxt->sax->error(ctxt->userData,
8862 "chunk is not well balanced\n");
8863 ctxt->wellFormed = 0;
8864 ctxt->disableSAX = 1;
8865 } else if (RAW != 0) {
8866 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8868 ctxt->sax->error(ctxt->userData,
8869 "extra content at the end of well balanced chunk\n");
8870 ctxt->wellFormed = 0;
8871 ctxt->disableSAX = 1;
8872 }
8873 if (ctxt->node != newDoc->children) {
8874 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8876 ctxt->sax->error(ctxt->userData,
8877 "chunk is not well balanced\n");
8878 ctxt->wellFormed = 0;
8879 ctxt->disableSAX = 1;
8880 }
8881
8882 if (!ctxt->wellFormed) {
8883 if (ctxt->errNo == 0)
8884 ret = 1;
8885 else
8886 ret = ctxt->errNo;
8887 } else {
8888 if (list != NULL) {
8889 xmlNodePtr cur;
8890
8891 /*
8892 * Return the newly created nodeset after unlinking it from
8893 * they pseudo parent.
8894 */
8895 cur = newDoc->children->children;
8896 *list = cur;
8897 while (cur != NULL) {
8898 cur->parent = NULL;
8899 cur = cur->next;
8900 }
8901 newDoc->children->children = NULL;
8902 }
8903 ret = 0;
8904 }
8905 ctxt->sax = oldsax;
8906 xmlFreeParserCtxt(ctxt);
8907 newDoc->intSubset = NULL;
8908 newDoc->extSubset = NULL;
8909 xmlFreeDoc(newDoc);
8910
8911 return(ret);
8912}
8913
8914/**
8915 * xmlParseExternalEntity:
8916 * @doc: the document the chunk pertains to
8917 * @sax: the SAX handler bloc (possibly NULL)
8918 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8919 * @depth: Used for loop detection, use 0
8920 * @URL: the URL for the entity to load
8921 * @ID: the System ID for the entity to load
8922 * @list: the return value for the set of parsed nodes
8923 *
8924 * Parse an external general entity
8925 * An external general parsed entity is well-formed if it matches the
8926 * production labeled extParsedEnt.
8927 *
8928 * [78] extParsedEnt ::= TextDecl? content
8929 *
8930 * Returns 0 if the entity is well formed, -1 in case of args problem and
8931 * the parser error code otherwise
8932 */
8933
8934int
8935xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
8936 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
8937 xmlParserCtxtPtr ctxt;
8938 xmlDocPtr newDoc;
8939 xmlSAXHandlerPtr oldsax = NULL;
8940 int ret = 0;
8941
8942 if (depth > 40) {
8943 return(XML_ERR_ENTITY_LOOP);
8944 }
8945
8946
8947
8948 if (list != NULL)
8949 *list = NULL;
8950 if ((URL == NULL) && (ID == NULL))
8951 return(-1);
8952 if (doc == NULL) /* @@ relax but check for dereferences */
8953 return(-1);
8954
8955
8956 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8957 if (ctxt == NULL) return(-1);
8958 ctxt->userData = ctxt;
8959 if (sax != NULL) {
8960 oldsax = ctxt->sax;
8961 ctxt->sax = sax;
8962 if (user_data != NULL)
8963 ctxt->userData = user_data;
8964 }
8965 newDoc = xmlNewDoc(BAD_CAST "1.0");
8966 if (newDoc == NULL) {
8967 xmlFreeParserCtxt(ctxt);
8968 return(-1);
8969 }
8970 if (doc != NULL) {
8971 newDoc->intSubset = doc->intSubset;
8972 newDoc->extSubset = doc->extSubset;
8973 }
8974 if (doc->URL != NULL) {
8975 newDoc->URL = xmlStrdup(doc->URL);
8976 }
8977 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8978 if (newDoc->children == NULL) {
8979 if (sax != NULL)
8980 ctxt->sax = oldsax;
8981 xmlFreeParserCtxt(ctxt);
8982 newDoc->intSubset = NULL;
8983 newDoc->extSubset = NULL;
8984 xmlFreeDoc(newDoc);
8985 return(-1);
8986 }
8987 nodePush(ctxt, newDoc->children);
8988 if (doc == NULL) {
8989 ctxt->myDoc = newDoc;
8990 } else {
8991 ctxt->myDoc = doc;
8992 newDoc->children->doc = doc;
8993 }
8994
8995 /*
8996 * Parse a possible text declaration first
8997 */
8998 GROW;
8999 if ((RAW == '<') && (NXT(1) == '?') &&
9000 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9001 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9002 xmlParseTextDecl(ctxt);
9003 }
9004
9005 /*
9006 * Doing validity checking on chunk doesn't make sense
9007 */
9008 ctxt->instate = XML_PARSER_CONTENT;
9009 ctxt->validate = 0;
Daniel Veillarde470df72001-04-18 21:41:07 +00009010 ctxt->external = 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009011 ctxt->loadsubset = 0;
9012 ctxt->depth = depth;
9013
9014 xmlParseContent(ctxt);
9015
9016 if ((RAW == '<') && (NXT(1) == '/')) {
9017 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9019 ctxt->sax->error(ctxt->userData,
9020 "chunk is not well balanced\n");
9021 ctxt->wellFormed = 0;
9022 ctxt->disableSAX = 1;
9023 } else if (RAW != 0) {
9024 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9026 ctxt->sax->error(ctxt->userData,
9027 "extra content at the end of well balanced chunk\n");
9028 ctxt->wellFormed = 0;
9029 ctxt->disableSAX = 1;
9030 }
9031 if (ctxt->node != newDoc->children) {
9032 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9034 ctxt->sax->error(ctxt->userData,
9035 "chunk is not well balanced\n");
9036 ctxt->wellFormed = 0;
9037 ctxt->disableSAX = 1;
9038 }
9039
9040 if (!ctxt->wellFormed) {
9041 if (ctxt->errNo == 0)
9042 ret = 1;
9043 else
9044 ret = ctxt->errNo;
9045 } else {
9046 if (list != NULL) {
9047 xmlNodePtr cur;
9048
9049 /*
9050 * Return the newly created nodeset after unlinking it from
9051 * they pseudo parent.
9052 */
9053 cur = newDoc->children->children;
9054 *list = cur;
9055 while (cur != NULL) {
9056 cur->parent = NULL;
9057 cur = cur->next;
9058 }
9059 newDoc->children->children = NULL;
9060 }
9061 ret = 0;
9062 }
9063 if (sax != NULL)
9064 ctxt->sax = oldsax;
9065 xmlFreeParserCtxt(ctxt);
9066 newDoc->intSubset = NULL;
9067 newDoc->extSubset = NULL;
9068 xmlFreeDoc(newDoc);
9069
9070 return(ret);
9071}
9072
9073/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009074 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009075 * @doc: the document the chunk pertains to
9076 * @sax: the SAX handler bloc (possibly NULL)
9077 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9078 * @depth: Used for loop detection, use 0
9079 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9080 * @list: the return value for the set of parsed nodes
9081 *
9082 * Parse a well-balanced chunk of an XML document
9083 * called by the parser
9084 * The allowed sequence for the Well Balanced Chunk is the one defined by
9085 * the content production in the XML grammar:
9086 *
9087 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9088 *
9089 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9090 * the parser error code otherwise
9091 */
9092
9093int
9094xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9095 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9096 xmlParserCtxtPtr ctxt;
9097 xmlDocPtr newDoc;
9098 xmlSAXHandlerPtr oldsax = NULL;
9099 int size;
9100 int ret = 0;
9101
9102 if (depth > 40) {
9103 return(XML_ERR_ENTITY_LOOP);
9104 }
9105
9106
9107 if (list != NULL)
9108 *list = NULL;
9109 if (string == NULL)
9110 return(-1);
9111
9112 size = xmlStrlen(string);
9113
9114 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9115 if (ctxt == NULL) return(-1);
9116 ctxt->userData = ctxt;
9117 if (sax != NULL) {
9118 oldsax = ctxt->sax;
9119 ctxt->sax = sax;
9120 if (user_data != NULL)
9121 ctxt->userData = user_data;
9122 }
9123 newDoc = xmlNewDoc(BAD_CAST "1.0");
9124 if (newDoc == NULL) {
9125 xmlFreeParserCtxt(ctxt);
9126 return(-1);
9127 }
9128 if (doc != NULL) {
9129 newDoc->intSubset = doc->intSubset;
9130 newDoc->extSubset = doc->extSubset;
9131 }
9132 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9133 if (newDoc->children == NULL) {
9134 if (sax != NULL)
9135 ctxt->sax = oldsax;
9136 xmlFreeParserCtxt(ctxt);
9137 newDoc->intSubset = NULL;
9138 newDoc->extSubset = NULL;
9139 xmlFreeDoc(newDoc);
9140 return(-1);
9141 }
9142 nodePush(ctxt, newDoc->children);
9143 if (doc == NULL) {
9144 ctxt->myDoc = newDoc;
9145 } else {
9146 ctxt->myDoc = doc;
9147 newDoc->children->doc = doc;
9148 }
9149 ctxt->instate = XML_PARSER_CONTENT;
9150 ctxt->depth = depth;
9151
9152 /*
9153 * Doing validity checking on chunk doesn't make sense
9154 */
9155 ctxt->validate = 0;
9156 ctxt->loadsubset = 0;
9157
9158 xmlParseContent(ctxt);
9159
9160 if ((RAW == '<') && (NXT(1) == '/')) {
9161 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9163 ctxt->sax->error(ctxt->userData,
9164 "chunk is not well balanced\n");
9165 ctxt->wellFormed = 0;
9166 ctxt->disableSAX = 1;
9167 } else if (RAW != 0) {
9168 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9169 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9170 ctxt->sax->error(ctxt->userData,
9171 "extra content at the end of well balanced chunk\n");
9172 ctxt->wellFormed = 0;
9173 ctxt->disableSAX = 1;
9174 }
9175 if (ctxt->node != newDoc->children) {
9176 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9178 ctxt->sax->error(ctxt->userData,
9179 "chunk is not well balanced\n");
9180 ctxt->wellFormed = 0;
9181 ctxt->disableSAX = 1;
9182 }
9183
9184 if (!ctxt->wellFormed) {
9185 if (ctxt->errNo == 0)
9186 ret = 1;
9187 else
9188 ret = ctxt->errNo;
9189 } else {
9190 if (list != NULL) {
9191 xmlNodePtr cur;
9192
9193 /*
9194 * Return the newly created nodeset after unlinking it from
9195 * they pseudo parent.
9196 */
9197 cur = newDoc->children->children;
9198 *list = cur;
9199 while (cur != NULL) {
9200 cur->parent = NULL;
9201 cur = cur->next;
9202 }
9203 newDoc->children->children = NULL;
9204 }
9205 ret = 0;
9206 }
9207 if (sax != NULL)
9208 ctxt->sax = oldsax;
9209 xmlFreeParserCtxt(ctxt);
9210 newDoc->intSubset = NULL;
9211 newDoc->extSubset = NULL;
9212 xmlFreeDoc(newDoc);
9213
9214 return(ret);
9215}
9216
9217/**
9218 * xmlSAXParseEntity:
9219 * @sax: the SAX handler block
9220 * @filename: the filename
9221 *
9222 * parse an XML external entity out of context and build a tree.
9223 * It use the given SAX function block to handle the parsing callback.
9224 * If sax is NULL, fallback to the default DOM tree building routines.
9225 *
9226 * [78] extParsedEnt ::= TextDecl? content
9227 *
9228 * This correspond to a "Well Balanced" chunk
9229 *
9230 * Returns the resulting document tree
9231 */
9232
9233xmlDocPtr
9234xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9235 xmlDocPtr ret;
9236 xmlParserCtxtPtr ctxt;
9237 char *directory = NULL;
9238
9239 ctxt = xmlCreateFileParserCtxt(filename);
9240 if (ctxt == NULL) {
9241 return(NULL);
9242 }
9243 if (sax != NULL) {
9244 if (ctxt->sax != NULL)
9245 xmlFree(ctxt->sax);
9246 ctxt->sax = sax;
9247 ctxt->userData = NULL;
9248 }
9249
9250 if ((ctxt->directory == NULL) && (directory == NULL))
9251 directory = xmlParserGetDirectory(filename);
9252
9253 xmlParseExtParsedEnt(ctxt);
9254
9255 if (ctxt->wellFormed)
9256 ret = ctxt->myDoc;
9257 else {
9258 ret = NULL;
9259 xmlFreeDoc(ctxt->myDoc);
9260 ctxt->myDoc = NULL;
9261 }
9262 if (sax != NULL)
9263 ctxt->sax = NULL;
9264 xmlFreeParserCtxt(ctxt);
9265
9266 return(ret);
9267}
9268
9269/**
9270 * xmlParseEntity:
9271 * @filename: the filename
9272 *
9273 * parse an XML external entity out of context and build a tree.
9274 *
9275 * [78] extParsedEnt ::= TextDecl? content
9276 *
9277 * This correspond to a "Well Balanced" chunk
9278 *
9279 * Returns the resulting document tree
9280 */
9281
9282xmlDocPtr
9283xmlParseEntity(const char *filename) {
9284 return(xmlSAXParseEntity(NULL, filename));
9285}
9286
9287/**
9288 * xmlCreateEntityParserCtxt:
9289 * @URL: the entity URL
9290 * @ID: the entity PUBLIC ID
9291 * @base: a posible base for the target URI
9292 *
9293 * Create a parser context for an external entity
9294 * Automatic support for ZLIB/Compress compressed document is provided
9295 * by default if found at compile-time.
9296 *
9297 * Returns the new parser context or NULL
9298 */
9299xmlParserCtxtPtr
9300xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9301 const xmlChar *base) {
9302 xmlParserCtxtPtr ctxt;
9303 xmlParserInputPtr inputStream;
9304 char *directory = NULL;
9305 xmlChar *uri;
9306
9307 ctxt = xmlNewParserCtxt();
9308 if (ctxt == NULL) {
9309 return(NULL);
9310 }
9311
9312 uri = xmlBuildURI(URL, base);
9313
9314 if (uri == NULL) {
9315 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9316 if (inputStream == NULL) {
9317 xmlFreeParserCtxt(ctxt);
9318 return(NULL);
9319 }
9320
9321 inputPush(ctxt, inputStream);
9322
9323 if ((ctxt->directory == NULL) && (directory == NULL))
9324 directory = xmlParserGetDirectory((char *)URL);
9325 if ((ctxt->directory == NULL) && (directory != NULL))
9326 ctxt->directory = directory;
9327 } else {
9328 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9329 if (inputStream == NULL) {
9330 xmlFree(uri);
9331 xmlFreeParserCtxt(ctxt);
9332 return(NULL);
9333 }
9334
9335 inputPush(ctxt, inputStream);
9336
9337 if ((ctxt->directory == NULL) && (directory == NULL))
9338 directory = xmlParserGetDirectory((char *)uri);
9339 if ((ctxt->directory == NULL) && (directory != NULL))
9340 ctxt->directory = directory;
9341 xmlFree(uri);
9342 }
9343
9344 return(ctxt);
9345}
9346
9347/************************************************************************
9348 * *
9349 * Front ends when parsing from a file *
9350 * *
9351 ************************************************************************/
9352
9353/**
9354 * xmlCreateFileParserCtxt:
9355 * @filename: the filename
9356 *
9357 * Create a parser context for a file content.
9358 * Automatic support for ZLIB/Compress compressed document is provided
9359 * by default if found at compile-time.
9360 *
9361 * Returns the new parser context or NULL
9362 */
9363xmlParserCtxtPtr
9364xmlCreateFileParserCtxt(const char *filename)
9365{
9366 xmlParserCtxtPtr ctxt;
9367 xmlParserInputPtr inputStream;
9368 xmlParserInputBufferPtr buf;
9369 char *directory = NULL;
9370
9371 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9372 if (buf == NULL) {
9373 return(NULL);
9374 }
9375
9376 ctxt = xmlNewParserCtxt();
9377 if (ctxt == NULL) {
9378 if (xmlDefaultSAXHandler.error != NULL) {
9379 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9380 }
9381 return(NULL);
9382 }
9383
9384 inputStream = xmlNewInputStream(ctxt);
9385 if (inputStream == NULL) {
9386 xmlFreeParserCtxt(ctxt);
9387 return(NULL);
9388 }
9389
9390 inputStream->filename = xmlMemStrdup(filename);
9391 inputStream->buf = buf;
9392 inputStream->base = inputStream->buf->buffer->content;
9393 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009394 inputStream->end =
9395 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009396
9397 inputPush(ctxt, inputStream);
9398 if ((ctxt->directory == NULL) && (directory == NULL))
9399 directory = xmlParserGetDirectory(filename);
9400 if ((ctxt->directory == NULL) && (directory != NULL))
9401 ctxt->directory = directory;
9402
9403 return(ctxt);
9404}
9405
9406/**
9407 * xmlSAXParseFile:
9408 * @sax: the SAX handler block
9409 * @filename: the filename
9410 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9411 * documents
9412 *
9413 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9414 * compressed document is provided by default if found at compile-time.
9415 * It use the given SAX function block to handle the parsing callback.
9416 * If sax is NULL, fallback to the default DOM tree building routines.
9417 *
9418 * Returns the resulting document tree
9419 */
9420
9421xmlDocPtr
9422xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9423 int recovery) {
9424 xmlDocPtr ret;
9425 xmlParserCtxtPtr ctxt;
9426 char *directory = NULL;
9427
9428 ctxt = xmlCreateFileParserCtxt(filename);
9429 if (ctxt == NULL) {
9430 return(NULL);
9431 }
9432 if (sax != NULL) {
9433 if (ctxt->sax != NULL)
9434 xmlFree(ctxt->sax);
9435 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009436 }
9437
9438 if ((ctxt->directory == NULL) && (directory == NULL))
9439 directory = xmlParserGetDirectory(filename);
9440 if ((ctxt->directory == NULL) && (directory != NULL))
9441 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9442
9443 xmlParseDocument(ctxt);
9444
9445 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9446 else {
9447 ret = NULL;
9448 xmlFreeDoc(ctxt->myDoc);
9449 ctxt->myDoc = NULL;
9450 }
9451 if (sax != NULL)
9452 ctxt->sax = NULL;
9453 xmlFreeParserCtxt(ctxt);
9454
9455 return(ret);
9456}
9457
9458/**
9459 * xmlRecoverDoc:
9460 * @cur: a pointer to an array of xmlChar
9461 *
9462 * parse an XML in-memory document and build a tree.
9463 * In the case the document is not Well Formed, a tree is built anyway
9464 *
9465 * Returns the resulting document tree
9466 */
9467
9468xmlDocPtr
9469xmlRecoverDoc(xmlChar *cur) {
9470 return(xmlSAXParseDoc(NULL, cur, 1));
9471}
9472
9473/**
9474 * xmlParseFile:
9475 * @filename: the filename
9476 *
9477 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9478 * compressed document is provided by default if found at compile-time.
9479 *
9480 * Returns the resulting document tree
9481 */
9482
9483xmlDocPtr
9484xmlParseFile(const char *filename) {
9485 return(xmlSAXParseFile(NULL, filename, 0));
9486}
9487
9488/**
9489 * xmlRecoverFile:
9490 * @filename: the filename
9491 *
9492 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9493 * compressed document is provided by default if found at compile-time.
9494 * In the case the document is not Well Formed, a tree is built anyway
9495 *
9496 * Returns the resulting document tree
9497 */
9498
9499xmlDocPtr
9500xmlRecoverFile(const char *filename) {
9501 return(xmlSAXParseFile(NULL, filename, 1));
9502}
9503
9504
9505/**
9506 * xmlSetupParserForBuffer:
9507 * @ctxt: an XML parser context
9508 * @buffer: a xmlChar * buffer
9509 * @filename: a file name
9510 *
9511 * Setup the parser context to parse a new buffer; Clears any prior
9512 * contents from the parser context. The buffer parameter must not be
9513 * NULL, but the filename parameter can be
9514 */
9515void
9516xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9517 const char* filename)
9518{
9519 xmlParserInputPtr input;
9520
9521 input = xmlNewInputStream(ctxt);
9522 if (input == NULL) {
9523 perror("malloc");
9524 xmlFree(ctxt);
9525 return;
9526 }
9527
9528 xmlClearParserCtxt(ctxt);
9529 if (filename != NULL)
9530 input->filename = xmlMemStrdup(filename);
9531 input->base = buffer;
9532 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009533 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009534 inputPush(ctxt, input);
9535}
9536
9537/**
9538 * xmlSAXUserParseFile:
9539 * @sax: a SAX handler
9540 * @user_data: The user data returned on SAX callbacks
9541 * @filename: a file name
9542 *
9543 * parse an XML file and call the given SAX handler routines.
9544 * Automatic support for ZLIB/Compress compressed document is provided
9545 *
9546 * Returns 0 in case of success or a error number otherwise
9547 */
9548int
9549xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9550 const char *filename) {
9551 int ret = 0;
9552 xmlParserCtxtPtr ctxt;
9553
9554 ctxt = xmlCreateFileParserCtxt(filename);
9555 if (ctxt == NULL) return -1;
9556 if (ctxt->sax != &xmlDefaultSAXHandler)
9557 xmlFree(ctxt->sax);
9558 ctxt->sax = sax;
9559 if (user_data != NULL)
9560 ctxt->userData = user_data;
9561
9562 xmlParseDocument(ctxt);
9563
9564 if (ctxt->wellFormed)
9565 ret = 0;
9566 else {
9567 if (ctxt->errNo != 0)
9568 ret = ctxt->errNo;
9569 else
9570 ret = -1;
9571 }
9572 if (sax != NULL)
9573 ctxt->sax = NULL;
9574 xmlFreeParserCtxt(ctxt);
9575
9576 return ret;
9577}
9578
9579/************************************************************************
9580 * *
9581 * Front ends when parsing from memory *
9582 * *
9583 ************************************************************************/
9584
9585/**
9586 * xmlCreateMemoryParserCtxt:
9587 * @buffer: a pointer to a char array
9588 * @size: the size of the array
9589 *
9590 * Create a parser context for an XML in-memory document.
9591 *
9592 * Returns the new parser context or NULL
9593 */
9594xmlParserCtxtPtr
9595xmlCreateMemoryParserCtxt(char *buffer, int size) {
9596 xmlParserCtxtPtr ctxt;
9597 xmlParserInputPtr input;
9598 xmlParserInputBufferPtr buf;
9599
9600 if (buffer == NULL)
9601 return(NULL);
9602 if (size <= 0)
9603 return(NULL);
9604
9605 ctxt = xmlNewParserCtxt();
9606 if (ctxt == NULL)
9607 return(NULL);
9608
9609 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9610 if (buf == NULL) return(NULL);
9611
9612 input = xmlNewInputStream(ctxt);
9613 if (input == NULL) {
9614 xmlFreeParserCtxt(ctxt);
9615 return(NULL);
9616 }
9617
9618 input->filename = NULL;
9619 input->buf = buf;
9620 input->base = input->buf->buffer->content;
9621 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009622 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009623
9624 inputPush(ctxt, input);
9625 return(ctxt);
9626}
9627
9628/**
9629 * xmlSAXParseMemory:
9630 * @sax: the SAX handler block
9631 * @buffer: an pointer to a char array
9632 * @size: the size of the array
9633 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9634 * documents
9635 *
9636 * parse an XML in-memory block and use the given SAX function block
9637 * to handle the parsing callback. If sax is NULL, fallback to the default
9638 * DOM tree building routines.
9639 *
9640 * Returns the resulting document tree
9641 */
9642xmlDocPtr
9643xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9644 xmlDocPtr ret;
9645 xmlParserCtxtPtr ctxt;
9646
9647 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9648 if (ctxt == NULL) return(NULL);
9649 if (sax != NULL) {
9650 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009651 }
9652
9653 xmlParseDocument(ctxt);
9654
9655 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9656 else {
9657 ret = NULL;
9658 xmlFreeDoc(ctxt->myDoc);
9659 ctxt->myDoc = NULL;
9660 }
9661 if (sax != NULL)
9662 ctxt->sax = NULL;
9663 xmlFreeParserCtxt(ctxt);
9664
9665 return(ret);
9666}
9667
9668/**
9669 * xmlParseMemory:
9670 * @buffer: an pointer to a char array
9671 * @size: the size of the array
9672 *
9673 * parse an XML in-memory block and build a tree.
9674 *
9675 * Returns the resulting document tree
9676 */
9677
9678xmlDocPtr xmlParseMemory(char *buffer, int size) {
9679 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9680}
9681
9682/**
9683 * xmlRecoverMemory:
9684 * @buffer: an pointer to a char array
9685 * @size: the size of the array
9686 *
9687 * parse an XML in-memory block and build a tree.
9688 * In the case the document is not Well Formed, a tree is built anyway
9689 *
9690 * Returns the resulting document tree
9691 */
9692
9693xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9694 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9695}
9696
9697/**
9698 * xmlSAXUserParseMemory:
9699 * @sax: a SAX handler
9700 * @user_data: The user data returned on SAX callbacks
9701 * @buffer: an in-memory XML document input
9702 * @size: the length of the XML document in bytes
9703 *
9704 * A better SAX parsing routine.
9705 * parse an XML in-memory buffer and call the given SAX handler routines.
9706 *
9707 * Returns 0 in case of success or a error number otherwise
9708 */
9709int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9710 char *buffer, int size) {
9711 int ret = 0;
9712 xmlParserCtxtPtr ctxt;
9713 xmlSAXHandlerPtr oldsax = NULL;
9714
9715 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9716 if (ctxt == NULL) return -1;
9717 if (sax != NULL) {
9718 oldsax = ctxt->sax;
9719 ctxt->sax = sax;
9720 }
Daniel Veillard30211a02001-04-26 09:33:18 +00009721 if (user_data != NULL)
9722 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00009723
9724 xmlParseDocument(ctxt);
9725
9726 if (ctxt->wellFormed)
9727 ret = 0;
9728 else {
9729 if (ctxt->errNo != 0)
9730 ret = ctxt->errNo;
9731 else
9732 ret = -1;
9733 }
9734 if (sax != NULL) {
9735 ctxt->sax = oldsax;
9736 }
9737 xmlFreeParserCtxt(ctxt);
9738
9739 return ret;
9740}
9741
9742/**
9743 * xmlCreateDocParserCtxt:
9744 * @cur: a pointer to an array of xmlChar
9745 *
9746 * Creates a parser context for an XML in-memory document.
9747 *
9748 * Returns the new parser context or NULL
9749 */
9750xmlParserCtxtPtr
9751xmlCreateDocParserCtxt(xmlChar *cur) {
9752 int len;
9753
9754 if (cur == NULL)
9755 return(NULL);
9756 len = xmlStrlen(cur);
9757 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9758}
9759
9760/**
9761 * xmlSAXParseDoc:
9762 * @sax: the SAX handler block
9763 * @cur: a pointer to an array of xmlChar
9764 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9765 * documents
9766 *
9767 * parse an XML in-memory document and build a tree.
9768 * It use the given SAX function block to handle the parsing callback.
9769 * If sax is NULL, fallback to the default DOM tree building routines.
9770 *
9771 * Returns the resulting document tree
9772 */
9773
9774xmlDocPtr
9775xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9776 xmlDocPtr ret;
9777 xmlParserCtxtPtr ctxt;
9778
9779 if (cur == NULL) return(NULL);
9780
9781
9782 ctxt = xmlCreateDocParserCtxt(cur);
9783 if (ctxt == NULL) return(NULL);
9784 if (sax != NULL) {
9785 ctxt->sax = sax;
9786 ctxt->userData = NULL;
9787 }
9788
9789 xmlParseDocument(ctxt);
9790 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9791 else {
9792 ret = NULL;
9793 xmlFreeDoc(ctxt->myDoc);
9794 ctxt->myDoc = NULL;
9795 }
9796 if (sax != NULL)
9797 ctxt->sax = NULL;
9798 xmlFreeParserCtxt(ctxt);
9799
9800 return(ret);
9801}
9802
9803/**
9804 * xmlParseDoc:
9805 * @cur: a pointer to an array of xmlChar
9806 *
9807 * parse an XML in-memory document and build a tree.
9808 *
9809 * Returns the resulting document tree
9810 */
9811
9812xmlDocPtr
9813xmlParseDoc(xmlChar *cur) {
9814 return(xmlSAXParseDoc(NULL, cur, 0));
9815}
9816
9817
9818/************************************************************************
9819 * *
9820 * Miscellaneous *
9821 * *
9822 ************************************************************************/
9823
9824#ifdef LIBXML_XPATH_ENABLED
9825#include <libxml/xpath.h>
9826#endif
9827
9828static int xmlParserInitialized = 0;
9829
9830/**
9831 * xmlInitParser:
9832 *
9833 * Initialization function for the XML parser.
9834 * This is not reentrant. Call once before processing in case of
9835 * use in multithreaded programs.
9836 */
9837
9838void
9839xmlInitParser(void) {
9840 if (xmlParserInitialized) return;
9841
9842 xmlInitCharEncodingHandlers();
9843 xmlInitializePredefinedEntities();
9844 xmlDefaultSAXHandlerInit();
9845 xmlRegisterDefaultInputCallbacks();
9846 xmlRegisterDefaultOutputCallbacks();
9847#ifdef LIBXML_HTML_ENABLED
9848 htmlInitAutoClose();
9849 htmlDefaultSAXHandlerInit();
9850#endif
9851#ifdef LIBXML_XPATH_ENABLED
9852 xmlXPathInit();
9853#endif
9854 xmlParserInitialized = 1;
9855}
9856
9857/**
9858 * xmlCleanupParser:
9859 *
9860 * Cleanup function for the XML parser. It tries to reclaim all
9861 * parsing related global memory allocated for the parser processing.
9862 * It doesn't deallocate any document related memory. Calling this
9863 * function should not prevent reusing the parser.
9864 */
9865
9866void
9867xmlCleanupParser(void) {
9868 xmlParserInitialized = 0;
9869 xmlCleanupCharEncodingHandlers();
9870 xmlCleanupPredefinedEntities();
9871}
9872
9873/**
9874 * xmlPedanticParserDefault:
9875 * @val: int 0 or 1
9876 *
9877 * Set and return the previous value for enabling pedantic warnings.
9878 *
9879 * Returns the last value for 0 for no substitution, 1 for substitution.
9880 */
9881
9882int
9883xmlPedanticParserDefault(int val) {
9884 int old = xmlPedanticParserDefaultValue;
9885
9886 xmlPedanticParserDefaultValue = val;
9887 return(old);
9888}
9889
9890/**
9891 * xmlSubstituteEntitiesDefault:
9892 * @val: int 0 or 1
9893 *
9894 * Set and return the previous value for default entity support.
9895 * Initially the parser always keep entity references instead of substituting
9896 * entity values in the output. This function has to be used to change the
9897 * default parser behaviour
9898 * SAX::subtituteEntities() has to be used for changing that on a file by
9899 * file basis.
9900 *
9901 * Returns the last value for 0 for no substitution, 1 for substitution.
9902 */
9903
9904int
9905xmlSubstituteEntitiesDefault(int val) {
9906 int old = xmlSubstituteEntitiesDefaultValue;
9907
9908 xmlSubstituteEntitiesDefaultValue = val;
9909 return(old);
9910}
9911
9912/**
9913 * xmlKeepBlanksDefault:
9914 * @val: int 0 or 1
9915 *
9916 * Set and return the previous value for default blanks text nodes support.
9917 * The 1.x version of the parser used an heuristic to try to detect
9918 * ignorable white spaces. As a result the SAX callback was generating
9919 * ignorableWhitespace() callbacks instead of characters() one, and when
9920 * using the DOM output text nodes containing those blanks were not generated.
9921 * The 2.x and later version will switch to the XML standard way and
9922 * ignorableWhitespace() are only generated when running the parser in
9923 * validating mode and when the current element doesn't allow CDATA or
9924 * mixed content.
9925 * This function is provided as a way to force the standard behaviour
9926 * on 1.X libs and to switch back to the old mode for compatibility when
9927 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9928 * by using xmlIsBlankNode() commodity function to detect the "empty"
9929 * nodes generated.
9930 * This value also affect autogeneration of indentation when saving code
9931 * if blanks sections are kept, indentation is not generated.
9932 *
9933 * Returns the last value for 0 for no substitution, 1 for substitution.
9934 */
9935
9936int
9937xmlKeepBlanksDefault(int val) {
9938 int old = xmlKeepBlanksDefaultValue;
9939
9940 xmlKeepBlanksDefaultValue = val;
9941 xmlIndentTreeOutput = !val;
9942 return(old);
9943}
9944