blob: 94589a758589f32bdd790bdc5ef38d3da28bcbf7 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
Bjorn Reese70a9da52001-04-21 16:57:29 +000036#include "libxml.h"
37
Owen Taylor3473f882001-02-23 17:55:21 +000038#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '\\'
40#else
Owen Taylor3473f882001-02-23 17:55:21 +000041#define XML_DIR_SEP '/'
42#endif
43
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <stdlib.h>
45#include <string.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
56
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
83int xmlGetWarningsDefaultValue = 1;
84int xmlParserDebugEntities = 0;
85#ifdef VMS
86int xmlSubstituteEntitiesDefaultVal = 0;
87#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
88int xmlDoValidityCheckingDefaultVal = 0;
89#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
90#else
91int xmlSubstituteEntitiesDefaultValue = 0;
92int xmlDoValidityCheckingDefaultValue = 0;
93#endif
94int xmlLoadExtDtdDefaultValue = 0;
95int xmlPedanticParserDefaultValue = 0;
96int xmlKeepBlanksDefaultValue = 1;
97
98/*
99 * List of XML prefixed PI allowed by W3C specs
100 */
101
102const char *xmlW3CPIs[] = {
103 "xml-stylesheet",
104 NULL
105};
106
107/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
108void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
109xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
110 const xmlChar **str);
111
Daniel Veillard257d9102001-05-08 10:41:44 +0000112static int
113xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlSAXHandlerPtr sax,
114 void *user_data, int depth, const xmlChar *URL,
115 const xmlChar *ID, xmlNodePtr *list, void *private);
Owen Taylor3473f882001-02-23 17:55:21 +0000116
117/************************************************************************
118 * *
119 * Parser stacks related functions and macros *
120 * *
121 ************************************************************************/
122
123xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
124 const xmlChar ** str);
125
126/*
127 * Generic function for accessing stacks in the Parser Context
128 */
129
130#define PUSH_AND_POP(scope, type, name) \
131scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
132 if (ctxt->name##Nr >= ctxt->name##Max) { \
133 ctxt->name##Max *= 2; \
134 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
135 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
136 if (ctxt->name##Tab == NULL) { \
137 xmlGenericError(xmlGenericErrorContext, \
138 "realloc failed !\n"); \
139 return(0); \
140 } \
141 } \
142 ctxt->name##Tab[ctxt->name##Nr] = value; \
143 ctxt->name = value; \
144 return(ctxt->name##Nr++); \
145} \
146scope type name##Pop(xmlParserCtxtPtr ctxt) { \
147 type ret; \
148 if (ctxt->name##Nr <= 0) return(0); \
149 ctxt->name##Nr--; \
150 if (ctxt->name##Nr > 0) \
151 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
152 else \
153 ctxt->name = NULL; \
154 ret = ctxt->name##Tab[ctxt->name##Nr]; \
155 ctxt->name##Tab[ctxt->name##Nr] = 0; \
156 return(ret); \
157} \
158
159/*
160 * Those macros actually generate the functions
161 */
162PUSH_AND_POP(extern, xmlParserInputPtr, input)
163PUSH_AND_POP(extern, xmlNodePtr, node)
164PUSH_AND_POP(extern, xmlChar*, name)
165
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000166static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000167 if (ctxt->spaceNr >= ctxt->spaceMax) {
168 ctxt->spaceMax *= 2;
169 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
170 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
171 if (ctxt->spaceTab == NULL) {
172 xmlGenericError(xmlGenericErrorContext,
173 "realloc failed !\n");
174 return(0);
175 }
176 }
177 ctxt->spaceTab[ctxt->spaceNr] = val;
178 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
179 return(ctxt->spaceNr++);
180}
181
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000182static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000183 int ret;
184 if (ctxt->spaceNr <= 0) return(0);
185 ctxt->spaceNr--;
186 if (ctxt->spaceNr > 0)
187 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
188 else
189 ctxt->space = NULL;
190 ret = ctxt->spaceTab[ctxt->spaceNr];
191 ctxt->spaceTab[ctxt->spaceNr] = -1;
192 return(ret);
193}
194
195/*
196 * Macros for accessing the content. Those should be used only by the parser,
197 * and not exported.
198 *
199 * Dirty macros, i.e. one often need to make assumption on the context to
200 * use them
201 *
202 * CUR_PTR return the current pointer to the xmlChar to be parsed.
203 * To be used with extreme caution since operations consuming
204 * characters may move the input buffer to a different location !
205 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
206 * This should be used internally by the parser
207 * only to compare to ASCII values otherwise it would break when
208 * running with UTF-8 encoding.
209 * RAW same as CUR but in the input buffer, bypass any token
210 * extraction that may have been done
211 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
212 * to compare on ASCII based substring.
213 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
214 * strings within the parser.
215 *
216 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
217 *
218 * NEXT Skip to the next character, this does the proper decoding
219 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
220 * NEXTL(l) Skip l xmlChars in the input buffer
221 * CUR_CHAR(l) returns the current unicode character (int), set l
222 * to the number of xmlChars used for the encoding [0-5].
223 * CUR_SCHAR same but operate on a string instead of the context
224 * COPY_BUF copy the current unicode char to the target buffer, increment
225 * the index
226 * GROW, SHRINK handling of input buffers
227 */
228
229#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
230#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
231#define NXT(val) ctxt->input->cur[(val)]
232#define CUR_PTR ctxt->input->cur
233
234#define SKIP(val) do { \
235 ctxt->nbChars += (val),ctxt->input->cur += (val); \
236 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000237 if ((*ctxt->input->cur == 0) && \
238 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
239 xmlPopInput(ctxt); \
240 } while (0)
241
Daniel Veillard48b2f892001-02-25 16:11:03 +0000242#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000243 xmlParserInputShrink(ctxt->input); \
244 if ((*ctxt->input->cur == 0) && \
245 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
246 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000247 }
Owen Taylor3473f882001-02-23 17:55:21 +0000248
Daniel Veillard48b2f892001-02-25 16:11:03 +0000249#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000250 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
251 if ((*ctxt->input->cur == 0) && \
252 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
253 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000254 }
Owen Taylor3473f882001-02-23 17:55:21 +0000255
256#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
257
258#define NEXT xmlNextChar(ctxt)
259
Daniel Veillard21a0f912001-02-25 19:54:14 +0000260#define NEXT1 { \
261 ctxt->input->cur++; \
262 ctxt->nbChars++; \
263 if (*ctxt->input->cur == 0) \
264 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
265 }
266
Owen Taylor3473f882001-02-23 17:55:21 +0000267#define NEXTL(l) do { \
268 if (*(ctxt->input->cur) == '\n') { \
269 ctxt->input->line++; ctxt->input->col = 1; \
270 } else ctxt->input->col++; \
271 ctxt->token = 0; ctxt->input->cur += l; \
272 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000273 } while (0)
274
275#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
276#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
277
278#define COPY_BUF(l,b,i,v) \
279 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000280 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000281
282/**
283 * xmlSkipBlankChars:
284 * @ctxt: the XML parser context
285 *
286 * skip all blanks character found at that point in the input streams.
287 * It pops up finished entities in the process if allowable at that point.
288 *
289 * Returns the number of space chars skipped
290 */
291
292int
293xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000294 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000295
Daniel Veillard02141ea2001-04-30 11:46:40 +0000296 if (ctxt->token != 0) {
297 if (!IS_BLANK(ctxt->token))
298 return(0);
299 ctxt->token = 0;
300 res++;
301 }
Owen Taylor3473f882001-02-23 17:55:21 +0000302 /*
303 * It's Okay to use CUR/NEXT here since all the blanks are on
304 * the ASCII range.
305 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000306 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
307 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000308 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000309 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000310 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000311 cur = ctxt->input->cur;
312 while (IS_BLANK(*cur)) {
313 if (*cur == '\n') {
314 ctxt->input->line++; ctxt->input->col = 1;
315 }
316 cur++;
317 res++;
318 if (*cur == 0) {
319 ctxt->input->cur = cur;
320 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
321 cur = ctxt->input->cur;
322 }
323 }
324 ctxt->input->cur = cur;
325 } else {
326 int cur;
327 do {
328 cur = CUR;
329 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
330 NEXT;
331 cur = CUR;
332 res++;
333 }
334 while ((cur == 0) && (ctxt->inputNr > 1) &&
335 (ctxt->instate != XML_PARSER_COMMENT)) {
336 xmlPopInput(ctxt);
337 cur = CUR;
338 }
339 /*
340 * Need to handle support of entities branching here
341 */
342 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
343 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
344 }
Owen Taylor3473f882001-02-23 17:55:21 +0000345 return(res);
346}
347
348/************************************************************************
349 * *
350 * Commodity functions to handle entities *
351 * *
352 ************************************************************************/
353
354/**
355 * xmlPopInput:
356 * @ctxt: an XML parser context
357 *
358 * xmlPopInput: the current input pointed by ctxt->input came to an end
359 * pop it and return the next char.
360 *
361 * Returns the current xmlChar in the parser context
362 */
363xmlChar
364xmlPopInput(xmlParserCtxtPtr ctxt) {
365 if (ctxt->inputNr == 1) return(0); /* End of main Input */
366 if (xmlParserDebugEntities)
367 xmlGenericError(xmlGenericErrorContext,
368 "Popping input %d\n", ctxt->inputNr);
369 xmlFreeInputStream(inputPop(ctxt));
370 if ((*ctxt->input->cur == 0) &&
371 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
372 return(xmlPopInput(ctxt));
373 return(CUR);
374}
375
376/**
377 * xmlPushInput:
378 * @ctxt: an XML parser context
379 * @input: an XML parser input fragment (entity, XML fragment ...).
380 *
381 * xmlPushInput: switch to a new input stream which is stacked on top
382 * of the previous one(s).
383 */
384void
385xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
386 if (input == NULL) return;
387
388 if (xmlParserDebugEntities) {
389 if ((ctxt->input != NULL) && (ctxt->input->filename))
390 xmlGenericError(xmlGenericErrorContext,
391 "%s(%d): ", ctxt->input->filename,
392 ctxt->input->line);
393 xmlGenericError(xmlGenericErrorContext,
394 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
395 }
396 inputPush(ctxt, input);
397 GROW;
398}
399
400/**
401 * xmlParseCharRef:
402 * @ctxt: an XML parser context
403 *
404 * parse Reference declarations
405 *
406 * [66] CharRef ::= '&#' [0-9]+ ';' |
407 * '&#x' [0-9a-fA-F]+ ';'
408 *
409 * [ WFC: Legal Character ]
410 * Characters referred to using character references must match the
411 * production for Char.
412 *
413 * Returns the value parsed (as an int), 0 in case of error
414 */
415int
416xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000417 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000418 int count = 0;
419
420 if (ctxt->token != 0) {
421 val = ctxt->token;
422 ctxt->token = 0;
423 return(val);
424 }
425 /*
426 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
427 */
428 if ((RAW == '&') && (NXT(1) == '#') &&
429 (NXT(2) == 'x')) {
430 SKIP(3);
431 GROW;
432 while (RAW != ';') { /* loop blocked by count */
433 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
434 val = val * 16 + (CUR - '0');
435 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
436 val = val * 16 + (CUR - 'a') + 10;
437 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
438 val = val * 16 + (CUR - 'A') + 10;
439 else {
440 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
442 ctxt->sax->error(ctxt->userData,
443 "xmlParseCharRef: invalid hexadecimal value\n");
444 ctxt->wellFormed = 0;
445 ctxt->disableSAX = 1;
446 val = 0;
447 break;
448 }
449 NEXT;
450 count++;
451 }
452 if (RAW == ';') {
453 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
454 ctxt->nbChars ++;
455 ctxt->input->cur++;
456 }
457 } else if ((RAW == '&') && (NXT(1) == '#')) {
458 SKIP(2);
459 GROW;
460 while (RAW != ';') { /* loop blocked by count */
461 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
462 val = val * 10 + (CUR - '0');
463 else {
464 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
466 ctxt->sax->error(ctxt->userData,
467 "xmlParseCharRef: invalid decimal value\n");
468 ctxt->wellFormed = 0;
469 ctxt->disableSAX = 1;
470 val = 0;
471 break;
472 }
473 NEXT;
474 count++;
475 }
476 if (RAW == ';') {
477 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
478 ctxt->nbChars ++;
479 ctxt->input->cur++;
480 }
481 } else {
482 ctxt->errNo = XML_ERR_INVALID_CHARREF;
483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
484 ctxt->sax->error(ctxt->userData,
485 "xmlParseCharRef: invalid value\n");
486 ctxt->wellFormed = 0;
487 ctxt->disableSAX = 1;
488 }
489
490 /*
491 * [ WFC: Legal Character ]
492 * Characters referred to using character references must match the
493 * production for Char.
494 */
495 if (IS_CHAR(val)) {
496 return(val);
497 } else {
498 ctxt->errNo = XML_ERR_INVALID_CHAR;
499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
500 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
501 val);
502 ctxt->wellFormed = 0;
503 ctxt->disableSAX = 1;
504 }
505 return(0);
506}
507
508/**
509 * xmlParseStringCharRef:
510 * @ctxt: an XML parser context
511 * @str: a pointer to an index in the string
512 *
513 * parse Reference declarations, variant parsing from a string rather
514 * than an an input flow.
515 *
516 * [66] CharRef ::= '&#' [0-9]+ ';' |
517 * '&#x' [0-9a-fA-F]+ ';'
518 *
519 * [ WFC: Legal Character ]
520 * Characters referred to using character references must match the
521 * production for Char.
522 *
523 * Returns the value parsed (as an int), 0 in case of error, str will be
524 * updated to the current value of the index
525 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000526static int
Owen Taylor3473f882001-02-23 17:55:21 +0000527xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
528 const xmlChar *ptr;
529 xmlChar cur;
530 int val = 0;
531
532 if ((str == NULL) || (*str == NULL)) return(0);
533 ptr = *str;
534 cur = *ptr;
535 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
536 ptr += 3;
537 cur = *ptr;
538 while (cur != ';') { /* Non input consuming loop */
539 if ((cur >= '0') && (cur <= '9'))
540 val = val * 16 + (cur - '0');
541 else if ((cur >= 'a') && (cur <= 'f'))
542 val = val * 16 + (cur - 'a') + 10;
543 else if ((cur >= 'A') && (cur <= 'F'))
544 val = val * 16 + (cur - 'A') + 10;
545 else {
546 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
548 ctxt->sax->error(ctxt->userData,
549 "xmlParseStringCharRef: invalid hexadecimal value\n");
550 ctxt->wellFormed = 0;
551 ctxt->disableSAX = 1;
552 val = 0;
553 break;
554 }
555 ptr++;
556 cur = *ptr;
557 }
558 if (cur == ';')
559 ptr++;
560 } else if ((cur == '&') && (ptr[1] == '#')){
561 ptr += 2;
562 cur = *ptr;
563 while (cur != ';') { /* Non input consuming loops */
564 if ((cur >= '0') && (cur <= '9'))
565 val = val * 10 + (cur - '0');
566 else {
567 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
569 ctxt->sax->error(ctxt->userData,
570 "xmlParseStringCharRef: invalid decimal value\n");
571 ctxt->wellFormed = 0;
572 ctxt->disableSAX = 1;
573 val = 0;
574 break;
575 }
576 ptr++;
577 cur = *ptr;
578 }
579 if (cur == ';')
580 ptr++;
581 } else {
582 ctxt->errNo = XML_ERR_INVALID_CHARREF;
583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
584 ctxt->sax->error(ctxt->userData,
585 "xmlParseCharRef: invalid value\n");
586 ctxt->wellFormed = 0;
587 ctxt->disableSAX = 1;
588 return(0);
589 }
590 *str = ptr;
591
592 /*
593 * [ WFC: Legal Character ]
594 * Characters referred to using character references must match the
595 * production for Char.
596 */
597 if (IS_CHAR(val)) {
598 return(val);
599 } else {
600 ctxt->errNo = XML_ERR_INVALID_CHAR;
601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
602 ctxt->sax->error(ctxt->userData,
603 "CharRef: invalid xmlChar value %d\n", val);
604 ctxt->wellFormed = 0;
605 ctxt->disableSAX = 1;
606 }
607 return(0);
608}
609
610/**
611 * xmlParserHandlePEReference:
612 * @ctxt: the parser context
613 *
614 * [69] PEReference ::= '%' Name ';'
615 *
616 * [ WFC: No Recursion ]
617 * A parsed entity must not contain a recursive
618 * reference to itself, either directly or indirectly.
619 *
620 * [ WFC: Entity Declared ]
621 * In a document without any DTD, a document with only an internal DTD
622 * subset which contains no parameter entity references, or a document
623 * with "standalone='yes'", ... ... The declaration of a parameter
624 * entity must precede any reference to it...
625 *
626 * [ VC: Entity Declared ]
627 * In a document with an external subset or external parameter entities
628 * with "standalone='no'", ... ... The declaration of a parameter entity
629 * must precede any reference to it...
630 *
631 * [ WFC: In DTD ]
632 * Parameter-entity references may only appear in the DTD.
633 * NOTE: misleading but this is handled.
634 *
635 * A PEReference may have been detected in the current input stream
636 * the handling is done accordingly to
637 * http://www.w3.org/TR/REC-xml#entproc
638 * i.e.
639 * - Included in literal in entity values
640 * - Included as Paraemeter Entity reference within DTDs
641 */
642void
643xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
644 xmlChar *name;
645 xmlEntityPtr entity = NULL;
646 xmlParserInputPtr input;
647
648 if (ctxt->token != 0) {
649 return;
650 }
651 if (RAW != '%') return;
652 switch(ctxt->instate) {
653 case XML_PARSER_CDATA_SECTION:
654 return;
655 case XML_PARSER_COMMENT:
656 return;
657 case XML_PARSER_START_TAG:
658 return;
659 case XML_PARSER_END_TAG:
660 return;
661 case XML_PARSER_EOF:
662 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
664 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
665 ctxt->wellFormed = 0;
666 ctxt->disableSAX = 1;
667 return;
668 case XML_PARSER_PROLOG:
669 case XML_PARSER_START:
670 case XML_PARSER_MISC:
671 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
673 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
674 ctxt->wellFormed = 0;
675 ctxt->disableSAX = 1;
676 return;
677 case XML_PARSER_ENTITY_DECL:
678 case XML_PARSER_CONTENT:
679 case XML_PARSER_ATTRIBUTE_VALUE:
680 case XML_PARSER_PI:
681 case XML_PARSER_SYSTEM_LITERAL:
682 /* we just ignore it there */
683 return;
684 case XML_PARSER_EPILOG:
685 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
687 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
688 ctxt->wellFormed = 0;
689 ctxt->disableSAX = 1;
690 return;
691 case XML_PARSER_ENTITY_VALUE:
692 /*
693 * NOTE: in the case of entity values, we don't do the
694 * substitution here since we need the literal
695 * entity value to be able to save the internal
696 * subset of the document.
697 * This will be handled by xmlStringDecodeEntities
698 */
699 return;
700 case XML_PARSER_DTD:
701 /*
702 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
703 * In the internal DTD subset, parameter-entity references
704 * can occur only where markup declarations can occur, not
705 * within markup declarations.
706 * In that case this is handled in xmlParseMarkupDecl
707 */
708 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
709 return;
710 break;
711 case XML_PARSER_IGNORE:
712 return;
713 }
714
715 NEXT;
716 name = xmlParseName(ctxt);
717 if (xmlParserDebugEntities)
718 xmlGenericError(xmlGenericErrorContext,
719 "PE Reference: %s\n", name);
720 if (name == NULL) {
721 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
723 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
724 ctxt->wellFormed = 0;
725 ctxt->disableSAX = 1;
726 } else {
727 if (RAW == ';') {
728 NEXT;
729 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
730 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
731 if (entity == NULL) {
732
733 /*
734 * [ WFC: Entity Declared ]
735 * In a document without any DTD, a document with only an
736 * internal DTD subset which contains no parameter entity
737 * references, or a document with "standalone='yes'", ...
738 * ... The declaration of a parameter entity must precede
739 * any reference to it...
740 */
741 if ((ctxt->standalone == 1) ||
742 ((ctxt->hasExternalSubset == 0) &&
743 (ctxt->hasPErefs == 0))) {
744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
745 ctxt->sax->error(ctxt->userData,
746 "PEReference: %%%s; not found\n", name);
747 ctxt->wellFormed = 0;
748 ctxt->disableSAX = 1;
749 } else {
750 /*
751 * [ VC: Entity Declared ]
752 * In a document with an external subset or external
753 * parameter entities with "standalone='no'", ...
754 * ... The declaration of a parameter entity must precede
755 * any reference to it...
756 */
757 if ((!ctxt->disableSAX) &&
758 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
759 ctxt->vctxt.error(ctxt->vctxt.userData,
760 "PEReference: %%%s; not found\n", name);
761 } else if ((!ctxt->disableSAX) &&
762 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
763 ctxt->sax->warning(ctxt->userData,
764 "PEReference: %%%s; not found\n", name);
765 ctxt->valid = 0;
766 }
767 } else {
768 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
769 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
770 /*
771 * handle the extra spaces added before and after
772 * c.f. http://www.w3.org/TR/REC-xml#as-PE
773 * this is done independantly.
774 */
775 input = xmlNewEntityInputStream(ctxt, entity);
776 xmlPushInput(ctxt, input);
777 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
778 (RAW == '<') && (NXT(1) == '?') &&
779 (NXT(2) == 'x') && (NXT(3) == 'm') &&
780 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
781 xmlParseTextDecl(ctxt);
782 }
783 if (ctxt->token == 0)
784 ctxt->token = ' ';
785 } else {
786 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
787 ctxt->sax->error(ctxt->userData,
788 "xmlHandlePEReference: %s is not a parameter entity\n",
789 name);
790 ctxt->wellFormed = 0;
791 ctxt->disableSAX = 1;
792 }
793 }
794 } else {
795 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
797 ctxt->sax->error(ctxt->userData,
798 "xmlHandlePEReference: expecting ';'\n");
799 ctxt->wellFormed = 0;
800 ctxt->disableSAX = 1;
801 }
802 xmlFree(name);
803 }
804}
805
806/*
807 * Macro used to grow the current buffer.
808 */
809#define growBuffer(buffer) { \
810 buffer##_size *= 2; \
811 buffer = (xmlChar *) \
812 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
813 if (buffer == NULL) { \
814 perror("realloc failed"); \
815 return(NULL); \
816 } \
817}
818
819/**
820 * xmlStringDecodeEntities:
821 * @ctxt: the parser context
822 * @str: the input string
823 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
824 * @end: an end marker xmlChar, 0 if none
825 * @end2: an end marker xmlChar, 0 if none
826 * @end3: an end marker xmlChar, 0 if none
827 *
828 * Takes a entity string content and process to do the adequate subtitutions.
829 *
830 * [67] Reference ::= EntityRef | CharRef
831 *
832 * [69] PEReference ::= '%' Name ';'
833 *
834 * Returns A newly allocated string with the substitution done. The caller
835 * must deallocate it !
836 */
837xmlChar *
838xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
839 xmlChar end, xmlChar end2, xmlChar end3) {
840 xmlChar *buffer = NULL;
841 int buffer_size = 0;
842
843 xmlChar *current = NULL;
844 xmlEntityPtr ent;
845 int c,l;
846 int nbchars = 0;
847
848 if (str == NULL)
849 return(NULL);
850
851 if (ctxt->depth > 40) {
852 ctxt->errNo = XML_ERR_ENTITY_LOOP;
853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
854 ctxt->sax->error(ctxt->userData,
855 "Detected entity reference loop\n");
856 ctxt->wellFormed = 0;
857 ctxt->disableSAX = 1;
858 return(NULL);
859 }
860
861 /*
862 * allocate a translation buffer.
863 */
864 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
865 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
866 if (buffer == NULL) {
867 perror("xmlDecodeEntities: malloc failed");
868 return(NULL);
869 }
870
871 /*
872 * Ok loop until we reach one of the ending char or a size limit.
873 * we are operating on already parsed values.
874 */
875 c = CUR_SCHAR(str, l);
876 while ((c != 0) && (c != end) && /* non input consuming loop */
877 (c != end2) && (c != end3)) {
878
879 if (c == 0) break;
880 if ((c == '&') && (str[1] == '#')) {
881 int val = xmlParseStringCharRef(ctxt, &str);
882 if (val != 0) {
883 COPY_BUF(0,buffer,nbchars,val);
884 }
885 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
886 if (xmlParserDebugEntities)
887 xmlGenericError(xmlGenericErrorContext,
888 "String decoding Entity Reference: %.30s\n",
889 str);
890 ent = xmlParseStringEntityRef(ctxt, &str);
891 if ((ent != NULL) &&
892 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
893 if (ent->content != NULL) {
894 COPY_BUF(0,buffer,nbchars,ent->content[0]);
895 } else {
896 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
897 ctxt->sax->error(ctxt->userData,
898 "internal error entity has no content\n");
899 }
900 } else if ((ent != NULL) && (ent->content != NULL)) {
901 xmlChar *rep;
902
903 ctxt->depth++;
904 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
905 0, 0, 0);
906 ctxt->depth--;
907 if (rep != NULL) {
908 current = rep;
909 while (*current != 0) { /* non input consuming loop */
910 buffer[nbchars++] = *current++;
911 if (nbchars >
912 buffer_size - XML_PARSER_BUFFER_SIZE) {
913 growBuffer(buffer);
914 }
915 }
916 xmlFree(rep);
917 }
918 } else if (ent != NULL) {
919 int i = xmlStrlen(ent->name);
920 const xmlChar *cur = ent->name;
921
922 buffer[nbchars++] = '&';
923 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
924 growBuffer(buffer);
925 }
926 for (;i > 0;i--)
927 buffer[nbchars++] = *cur++;
928 buffer[nbchars++] = ';';
929 }
930 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
931 if (xmlParserDebugEntities)
932 xmlGenericError(xmlGenericErrorContext,
933 "String decoding PE Reference: %.30s\n", str);
934 ent = xmlParseStringPEReference(ctxt, &str);
935 if (ent != NULL) {
936 xmlChar *rep;
937
938 ctxt->depth++;
939 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
940 0, 0, 0);
941 ctxt->depth--;
942 if (rep != NULL) {
943 current = rep;
944 while (*current != 0) { /* non input consuming loop */
945 buffer[nbchars++] = *current++;
946 if (nbchars >
947 buffer_size - XML_PARSER_BUFFER_SIZE) {
948 growBuffer(buffer);
949 }
950 }
951 xmlFree(rep);
952 }
953 }
954 } else {
955 COPY_BUF(l,buffer,nbchars,c);
956 str += l;
957 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
958 growBuffer(buffer);
959 }
960 }
961 c = CUR_SCHAR(str, l);
962 }
963 buffer[nbchars++] = 0;
964 return(buffer);
965}
966
967
968/************************************************************************
969 * *
970 * Commodity functions to handle xmlChars *
971 * *
972 ************************************************************************/
973
974/**
975 * xmlStrndup:
976 * @cur: the input xmlChar *
977 * @len: the len of @cur
978 *
979 * a strndup for array of xmlChar's
980 *
981 * Returns a new xmlChar * or NULL
982 */
983xmlChar *
984xmlStrndup(const xmlChar *cur, int len) {
985 xmlChar *ret;
986
987 if ((cur == NULL) || (len < 0)) return(NULL);
988 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
989 if (ret == NULL) {
990 xmlGenericError(xmlGenericErrorContext,
991 "malloc of %ld byte failed\n",
992 (len + 1) * (long)sizeof(xmlChar));
993 return(NULL);
994 }
995 memcpy(ret, cur, len * sizeof(xmlChar));
996 ret[len] = 0;
997 return(ret);
998}
999
1000/**
1001 * xmlStrdup:
1002 * @cur: the input xmlChar *
1003 *
1004 * a strdup for array of xmlChar's. Since they are supposed to be
1005 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1006 * a termination mark of '0'.
1007 *
1008 * Returns a new xmlChar * or NULL
1009 */
1010xmlChar *
1011xmlStrdup(const xmlChar *cur) {
1012 const xmlChar *p = cur;
1013
1014 if (cur == NULL) return(NULL);
1015 while (*p != 0) p++; /* non input consuming */
1016 return(xmlStrndup(cur, p - cur));
1017}
1018
1019/**
1020 * xmlCharStrndup:
1021 * @cur: the input char *
1022 * @len: the len of @cur
1023 *
1024 * a strndup for char's to xmlChar's
1025 *
1026 * Returns a new xmlChar * or NULL
1027 */
1028
1029xmlChar *
1030xmlCharStrndup(const char *cur, int len) {
1031 int i;
1032 xmlChar *ret;
1033
1034 if ((cur == NULL) || (len < 0)) return(NULL);
1035 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1036 if (ret == NULL) {
1037 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1038 (len + 1) * (long)sizeof(xmlChar));
1039 return(NULL);
1040 }
1041 for (i = 0;i < len;i++)
1042 ret[i] = (xmlChar) cur[i];
1043 ret[len] = 0;
1044 return(ret);
1045}
1046
1047/**
1048 * xmlCharStrdup:
1049 * @cur: the input char *
1050 * @len: the len of @cur
1051 *
1052 * a strdup for char's to xmlChar's
1053 *
1054 * Returns a new xmlChar * or NULL
1055 */
1056
1057xmlChar *
1058xmlCharStrdup(const char *cur) {
1059 const char *p = cur;
1060
1061 if (cur == NULL) return(NULL);
1062 while (*p != '\0') p++; /* non input consuming */
1063 return(xmlCharStrndup(cur, p - cur));
1064}
1065
1066/**
1067 * xmlStrcmp:
1068 * @str1: the first xmlChar *
1069 * @str2: the second xmlChar *
1070 *
1071 * a strcmp for xmlChar's
1072 *
1073 * Returns the integer result of the comparison
1074 */
1075
1076int
1077xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1078 register int tmp;
1079
1080 if (str1 == str2) return(0);
1081 if (str1 == NULL) return(-1);
1082 if (str2 == NULL) return(1);
1083 do {
1084 tmp = *str1++ - *str2;
1085 if (tmp != 0) return(tmp);
1086 } while (*str2++ != 0);
1087 return 0;
1088}
1089
1090/**
1091 * xmlStrEqual:
1092 * @str1: the first xmlChar *
1093 * @str2: the second xmlChar *
1094 *
1095 * Check if both string are equal of have same content
1096 * Should be a bit more readable and faster than xmlStrEqual()
1097 *
1098 * Returns 1 if they are equal, 0 if they are different
1099 */
1100
1101int
1102xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1103 if (str1 == str2) return(1);
1104 if (str1 == NULL) return(0);
1105 if (str2 == NULL) return(0);
1106 do {
1107 if (*str1++ != *str2) return(0);
1108 } while (*str2++);
1109 return(1);
1110}
1111
1112/**
1113 * xmlStrncmp:
1114 * @str1: the first xmlChar *
1115 * @str2: the second xmlChar *
1116 * @len: the max comparison length
1117 *
1118 * a strncmp for xmlChar's
1119 *
1120 * Returns the integer result of the comparison
1121 */
1122
1123int
1124xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1125 register int tmp;
1126
1127 if (len <= 0) return(0);
1128 if (str1 == str2) return(0);
1129 if (str1 == NULL) return(-1);
1130 if (str2 == NULL) return(1);
1131 do {
1132 tmp = *str1++ - *str2;
1133 if (tmp != 0 || --len == 0) return(tmp);
1134 } while (*str2++ != 0);
1135 return 0;
1136}
1137
1138static xmlChar casemap[256] = {
1139 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1140 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1141 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1142 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1143 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1144 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1145 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1146 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1147 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1148 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1149 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1150 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1151 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1152 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1153 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1154 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1155 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1156 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1157 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1158 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1159 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1160 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1161 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1162 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1163 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1164 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1165 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1166 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1167 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1168 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1169 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1170 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1171};
1172
1173/**
1174 * xmlStrcasecmp:
1175 * @str1: the first xmlChar *
1176 * @str2: the second xmlChar *
1177 *
1178 * a strcasecmp for xmlChar's
1179 *
1180 * Returns the integer result of the comparison
1181 */
1182
1183int
1184xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1185 register int tmp;
1186
1187 if (str1 == str2) return(0);
1188 if (str1 == NULL) return(-1);
1189 if (str2 == NULL) return(1);
1190 do {
1191 tmp = casemap[*str1++] - casemap[*str2];
1192 if (tmp != 0) return(tmp);
1193 } while (*str2++ != 0);
1194 return 0;
1195}
1196
1197/**
1198 * xmlStrncasecmp:
1199 * @str1: the first xmlChar *
1200 * @str2: the second xmlChar *
1201 * @len: the max comparison length
1202 *
1203 * a strncasecmp for xmlChar's
1204 *
1205 * Returns the integer result of the comparison
1206 */
1207
1208int
1209xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1210 register int tmp;
1211
1212 if (len <= 0) return(0);
1213 if (str1 == str2) return(0);
1214 if (str1 == NULL) return(-1);
1215 if (str2 == NULL) return(1);
1216 do {
1217 tmp = casemap[*str1++] - casemap[*str2];
1218 if (tmp != 0 || --len == 0) return(tmp);
1219 } while (*str2++ != 0);
1220 return 0;
1221}
1222
1223/**
1224 * xmlStrchr:
1225 * @str: the xmlChar * array
1226 * @val: the xmlChar to search
1227 *
1228 * a strchr for xmlChar's
1229 *
1230 * Returns the xmlChar * for the first occurence or NULL.
1231 */
1232
1233const xmlChar *
1234xmlStrchr(const xmlChar *str, xmlChar val) {
1235 if (str == NULL) return(NULL);
1236 while (*str != 0) { /* non input consuming */
1237 if (*str == val) return((xmlChar *) str);
1238 str++;
1239 }
1240 return(NULL);
1241}
1242
1243/**
1244 * xmlStrstr:
1245 * @str: the xmlChar * array (haystack)
1246 * @val: the xmlChar to search (needle)
1247 *
1248 * a strstr for xmlChar's
1249 *
1250 * Returns the xmlChar * for the first occurence or NULL.
1251 */
1252
1253const xmlChar *
1254xmlStrstr(const xmlChar *str, xmlChar *val) {
1255 int n;
1256
1257 if (str == NULL) return(NULL);
1258 if (val == NULL) return(NULL);
1259 n = xmlStrlen(val);
1260
1261 if (n == 0) return(str);
1262 while (*str != 0) { /* non input consuming */
1263 if (*str == *val) {
1264 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1265 }
1266 str++;
1267 }
1268 return(NULL);
1269}
1270
1271/**
1272 * xmlStrcasestr:
1273 * @str: the xmlChar * array (haystack)
1274 * @val: the xmlChar to search (needle)
1275 *
1276 * a case-ignoring strstr for xmlChar's
1277 *
1278 * Returns the xmlChar * for the first occurence or NULL.
1279 */
1280
1281const xmlChar *
1282xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1283 int n;
1284
1285 if (str == NULL) return(NULL);
1286 if (val == NULL) return(NULL);
1287 n = xmlStrlen(val);
1288
1289 if (n == 0) return(str);
1290 while (*str != 0) { /* non input consuming */
1291 if (casemap[*str] == casemap[*val])
1292 if (!xmlStrncasecmp(str, val, n)) return(str);
1293 str++;
1294 }
1295 return(NULL);
1296}
1297
1298/**
1299 * xmlStrsub:
1300 * @str: the xmlChar * array (haystack)
1301 * @start: the index of the first char (zero based)
1302 * @len: the length of the substring
1303 *
1304 * Extract a substring of a given string
1305 *
1306 * Returns the xmlChar * for the first occurence or NULL.
1307 */
1308
1309xmlChar *
1310xmlStrsub(const xmlChar *str, int start, int len) {
1311 int i;
1312
1313 if (str == NULL) return(NULL);
1314 if (start < 0) return(NULL);
1315 if (len < 0) return(NULL);
1316
1317 for (i = 0;i < start;i++) {
1318 if (*str == 0) return(NULL);
1319 str++;
1320 }
1321 if (*str == 0) return(NULL);
1322 return(xmlStrndup(str, len));
1323}
1324
1325/**
1326 * xmlStrlen:
1327 * @str: the xmlChar * array
1328 *
1329 * length of a xmlChar's string
1330 *
1331 * Returns the number of xmlChar contained in the ARRAY.
1332 */
1333
1334int
1335xmlStrlen(const xmlChar *str) {
1336 int len = 0;
1337
1338 if (str == NULL) return(0);
1339 while (*str != 0) { /* non input consuming */
1340 str++;
1341 len++;
1342 }
1343 return(len);
1344}
1345
1346/**
1347 * xmlStrncat:
1348 * @cur: the original xmlChar * array
1349 * @add: the xmlChar * array added
1350 * @len: the length of @add
1351 *
1352 * a strncat for array of xmlChar's, it will extend cur with the len
1353 * first bytes of @add.
1354 *
1355 * Returns a new xmlChar *, the original @cur is reallocated if needed
1356 * and should not be freed
1357 */
1358
1359xmlChar *
1360xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1361 int size;
1362 xmlChar *ret;
1363
1364 if ((add == NULL) || (len == 0))
1365 return(cur);
1366 if (cur == NULL)
1367 return(xmlStrndup(add, len));
1368
1369 size = xmlStrlen(cur);
1370 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1371 if (ret == NULL) {
1372 xmlGenericError(xmlGenericErrorContext,
1373 "xmlStrncat: realloc of %ld byte failed\n",
1374 (size + len + 1) * (long)sizeof(xmlChar));
1375 return(cur);
1376 }
1377 memcpy(&ret[size], add, len * sizeof(xmlChar));
1378 ret[size + len] = 0;
1379 return(ret);
1380}
1381
1382/**
1383 * xmlStrcat:
1384 * @cur: the original xmlChar * array
1385 * @add: the xmlChar * array added
1386 *
1387 * a strcat for array of xmlChar's. Since they are supposed to be
1388 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1389 * a termination mark of '0'.
1390 *
1391 * Returns a new xmlChar * containing the concatenated string.
1392 */
1393xmlChar *
1394xmlStrcat(xmlChar *cur, const xmlChar *add) {
1395 const xmlChar *p = add;
1396
1397 if (add == NULL) return(cur);
1398 if (cur == NULL)
1399 return(xmlStrdup(add));
1400
1401 while (*p != 0) p++; /* non input consuming */
1402 return(xmlStrncat(cur, add, p - add));
1403}
1404
1405/************************************************************************
1406 * *
1407 * Commodity functions, cleanup needed ? *
1408 * *
1409 ************************************************************************/
1410
1411/**
1412 * areBlanks:
1413 * @ctxt: an XML parser context
1414 * @str: a xmlChar *
1415 * @len: the size of @str
1416 *
1417 * Is this a sequence of blank chars that one can ignore ?
1418 *
1419 * Returns 1 if ignorable 0 otherwise.
1420 */
1421
1422static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1423 int i, ret;
1424 xmlNodePtr lastChild;
1425
Daniel Veillard2f362242001-03-02 17:36:21 +00001426 if (ctxt->keepBlanks)
1427 return(0);
1428
Owen Taylor3473f882001-02-23 17:55:21 +00001429 /*
1430 * Check for xml:space value.
1431 */
1432 if (*(ctxt->space) == 1)
1433 return(0);
1434
1435 /*
1436 * Check that the string is made of blanks
1437 */
1438 for (i = 0;i < len;i++)
1439 if (!(IS_BLANK(str[i]))) return(0);
1440
1441 /*
1442 * Look if the element is mixed content in the Dtd if available
1443 */
1444 if (ctxt->myDoc != NULL) {
1445 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1446 if (ret == 0) return(1);
1447 if (ret == 1) return(0);
1448 }
1449
1450 /*
1451 * Otherwise, heuristic :-\
1452 */
Owen Taylor3473f882001-02-23 17:55:21 +00001453 if (RAW != '<') return(0);
1454 if (ctxt->node == NULL) return(0);
1455 if ((ctxt->node->children == NULL) &&
1456 (RAW == '<') && (NXT(1) == '/')) return(0);
1457
1458 lastChild = xmlGetLastChild(ctxt->node);
1459 if (lastChild == NULL) {
1460 if (ctxt->node->content != NULL) return(0);
1461 } else if (xmlNodeIsText(lastChild))
1462 return(0);
1463 else if ((ctxt->node->children != NULL) &&
1464 (xmlNodeIsText(ctxt->node->children)))
1465 return(0);
1466 return(1);
1467}
1468
1469/*
1470 * Forward definition for recusive behaviour.
1471 */
1472void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1473void xmlParseReference(xmlParserCtxtPtr ctxt);
1474
1475/************************************************************************
1476 * *
1477 * Extra stuff for namespace support *
1478 * Relates to http://www.w3.org/TR/WD-xml-names *
1479 * *
1480 ************************************************************************/
1481
1482/**
1483 * xmlSplitQName:
1484 * @ctxt: an XML parser context
1485 * @name: an XML parser context
1486 * @prefix: a xmlChar **
1487 *
1488 * parse an UTF8 encoded XML qualified name string
1489 *
1490 * [NS 5] QName ::= (Prefix ':')? LocalPart
1491 *
1492 * [NS 6] Prefix ::= NCName
1493 *
1494 * [NS 7] LocalPart ::= NCName
1495 *
1496 * Returns the local part, and prefix is updated
1497 * to get the Prefix if any.
1498 */
1499
1500xmlChar *
1501xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1502 xmlChar buf[XML_MAX_NAMELEN + 5];
1503 xmlChar *buffer = NULL;
1504 int len = 0;
1505 int max = XML_MAX_NAMELEN;
1506 xmlChar *ret = NULL;
1507 const xmlChar *cur = name;
1508 int c;
1509
1510 *prefix = NULL;
1511
1512#ifndef XML_XML_NAMESPACE
1513 /* xml: prefix is not really a namespace */
1514 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1515 (cur[2] == 'l') && (cur[3] == ':'))
1516 return(xmlStrdup(name));
1517#endif
1518
1519 /* nasty but valid */
1520 if (cur[0] == ':')
1521 return(xmlStrdup(name));
1522
1523 c = *cur++;
1524 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1525 buf[len++] = c;
1526 c = *cur++;
1527 }
1528 if (len >= max) {
1529 /*
1530 * Okay someone managed to make a huge name, so he's ready to pay
1531 * for the processing speed.
1532 */
1533 max = len * 2;
1534
1535 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1536 if (buffer == NULL) {
1537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1538 ctxt->sax->error(ctxt->userData,
1539 "xmlSplitQName: out of memory\n");
1540 return(NULL);
1541 }
1542 memcpy(buffer, buf, len);
1543 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1544 if (len + 10 > max) {
1545 max *= 2;
1546 buffer = (xmlChar *) xmlRealloc(buffer,
1547 max * sizeof(xmlChar));
1548 if (buffer == NULL) {
1549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1550 ctxt->sax->error(ctxt->userData,
1551 "xmlSplitQName: out of memory\n");
1552 return(NULL);
1553 }
1554 }
1555 buffer[len++] = c;
1556 c = *cur++;
1557 }
1558 buffer[len] = 0;
1559 }
1560
1561 if (buffer == NULL)
1562 ret = xmlStrndup(buf, len);
1563 else {
1564 ret = buffer;
1565 buffer = NULL;
1566 max = XML_MAX_NAMELEN;
1567 }
1568
1569
1570 if (c == ':') {
1571 c = *cur++;
1572 if (c == 0) return(ret);
1573 *prefix = ret;
1574 len = 0;
1575
1576 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1577 buf[len++] = c;
1578 c = *cur++;
1579 }
1580 if (len >= max) {
1581 /*
1582 * Okay someone managed to make a huge name, so he's ready to pay
1583 * for the processing speed.
1584 */
1585 max = len * 2;
1586
1587 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1588 if (buffer == NULL) {
1589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1590 ctxt->sax->error(ctxt->userData,
1591 "xmlSplitQName: out of memory\n");
1592 return(NULL);
1593 }
1594 memcpy(buffer, buf, len);
1595 while (c != 0) { /* tested bigname2.xml */
1596 if (len + 10 > max) {
1597 max *= 2;
1598 buffer = (xmlChar *) xmlRealloc(buffer,
1599 max * sizeof(xmlChar));
1600 if (buffer == NULL) {
1601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1602 ctxt->sax->error(ctxt->userData,
1603 "xmlSplitQName: out of memory\n");
1604 return(NULL);
1605 }
1606 }
1607 buffer[len++] = c;
1608 c = *cur++;
1609 }
1610 buffer[len] = 0;
1611 }
1612
1613 if (buffer == NULL)
1614 ret = xmlStrndup(buf, len);
1615 else {
1616 ret = buffer;
1617 }
1618 }
1619
1620 return(ret);
1621}
1622
1623/************************************************************************
1624 * *
1625 * The parser itself *
1626 * Relates to http://www.w3.org/TR/REC-xml *
1627 * *
1628 ************************************************************************/
1629
Daniel Veillard76d66f42001-05-16 21:05:17 +00001630static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001631/**
1632 * xmlParseName:
1633 * @ctxt: an XML parser context
1634 *
1635 * parse an XML name.
1636 *
1637 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1638 * CombiningChar | Extender
1639 *
1640 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1641 *
1642 * [6] Names ::= Name (S Name)*
1643 *
1644 * Returns the Name parsed or NULL
1645 */
1646
1647xmlChar *
1648xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001649 const xmlChar *in;
1650 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001651 int count = 0;
1652
1653 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001654
1655 /*
1656 * Accelerator for simple ASCII names
1657 */
1658 in = ctxt->input->cur;
1659 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1660 ((*in >= 0x41) && (*in <= 0x5A)) ||
1661 (*in == '_') || (*in == ':')) {
1662 in++;
1663 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1664 ((*in >= 0x41) && (*in <= 0x5A)) ||
1665 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001666 (*in == '_') || (*in == '-') ||
1667 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001668 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001669 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001670 count = in - ctxt->input->cur;
1671 ret = xmlStrndup(ctxt->input->cur, count);
1672 ctxt->input->cur = in;
1673 return(ret);
1674 }
1675 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001676 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001677}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001678
Daniel Veillard76d66f42001-05-16 21:05:17 +00001679static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001680xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1681 xmlChar buf[XML_MAX_NAMELEN + 5];
1682 int len = 0, l;
1683 int c;
1684 int count = 0;
1685
1686 /*
1687 * Handler for more complex cases
1688 */
1689 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001690 c = CUR_CHAR(l);
1691 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1692 (!IS_LETTER(c) && (c != '_') &&
1693 (c != ':'))) {
1694 return(NULL);
1695 }
1696
1697 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1698 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1699 (c == '.') || (c == '-') ||
1700 (c == '_') || (c == ':') ||
1701 (IS_COMBINING(c)) ||
1702 (IS_EXTENDER(c)))) {
1703 if (count++ > 100) {
1704 count = 0;
1705 GROW;
1706 }
1707 COPY_BUF(l,buf,len,c);
1708 NEXTL(l);
1709 c = CUR_CHAR(l);
1710 if (len >= XML_MAX_NAMELEN) {
1711 /*
1712 * Okay someone managed to make a huge name, so he's ready to pay
1713 * for the processing speed.
1714 */
1715 xmlChar *buffer;
1716 int max = len * 2;
1717
1718 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1719 if (buffer == NULL) {
1720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1721 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001722 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001723 return(NULL);
1724 }
1725 memcpy(buffer, buf, len);
1726 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1727 (c == '.') || (c == '-') ||
1728 (c == '_') || (c == ':') ||
1729 (IS_COMBINING(c)) ||
1730 (IS_EXTENDER(c))) {
1731 if (count++ > 100) {
1732 count = 0;
1733 GROW;
1734 }
1735 if (len + 10 > max) {
1736 max *= 2;
1737 buffer = (xmlChar *) xmlRealloc(buffer,
1738 max * sizeof(xmlChar));
1739 if (buffer == NULL) {
1740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1741 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001742 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001743 return(NULL);
1744 }
1745 }
1746 COPY_BUF(l,buffer,len,c);
1747 NEXTL(l);
1748 c = CUR_CHAR(l);
1749 }
1750 buffer[len] = 0;
1751 return(buffer);
1752 }
1753 }
1754 return(xmlStrndup(buf, len));
1755}
1756
1757/**
1758 * xmlParseStringName:
1759 * @ctxt: an XML parser context
1760 * @str: a pointer to the string pointer (IN/OUT)
1761 *
1762 * parse an XML name.
1763 *
1764 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1765 * CombiningChar | Extender
1766 *
1767 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1768 *
1769 * [6] Names ::= Name (S Name)*
1770 *
1771 * Returns the Name parsed or NULL. The str pointer
1772 * is updated to the current location in the string.
1773 */
1774
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001775static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001776xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1777 xmlChar buf[XML_MAX_NAMELEN + 5];
1778 const xmlChar *cur = *str;
1779 int len = 0, l;
1780 int c;
1781
1782 c = CUR_SCHAR(cur, l);
1783 if (!IS_LETTER(c) && (c != '_') &&
1784 (c != ':')) {
1785 return(NULL);
1786 }
1787
1788 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1789 (c == '.') || (c == '-') ||
1790 (c == '_') || (c == ':') ||
1791 (IS_COMBINING(c)) ||
1792 (IS_EXTENDER(c))) {
1793 COPY_BUF(l,buf,len,c);
1794 cur += l;
1795 c = CUR_SCHAR(cur, l);
1796 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1797 /*
1798 * Okay someone managed to make a huge name, so he's ready to pay
1799 * for the processing speed.
1800 */
1801 xmlChar *buffer;
1802 int max = len * 2;
1803
1804 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1805 if (buffer == NULL) {
1806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1807 ctxt->sax->error(ctxt->userData,
1808 "xmlParseStringName: out of memory\n");
1809 return(NULL);
1810 }
1811 memcpy(buffer, buf, len);
1812 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1813 (c == '.') || (c == '-') ||
1814 (c == '_') || (c == ':') ||
1815 (IS_COMBINING(c)) ||
1816 (IS_EXTENDER(c))) {
1817 if (len + 10 > max) {
1818 max *= 2;
1819 buffer = (xmlChar *) xmlRealloc(buffer,
1820 max * sizeof(xmlChar));
1821 if (buffer == NULL) {
1822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1823 ctxt->sax->error(ctxt->userData,
1824 "xmlParseStringName: out of memory\n");
1825 return(NULL);
1826 }
1827 }
1828 COPY_BUF(l,buffer,len,c);
1829 cur += l;
1830 c = CUR_SCHAR(cur, l);
1831 }
1832 buffer[len] = 0;
1833 *str = cur;
1834 return(buffer);
1835 }
1836 }
1837 *str = cur;
1838 return(xmlStrndup(buf, len));
1839}
1840
1841/**
1842 * xmlParseNmtoken:
1843 * @ctxt: an XML parser context
1844 *
1845 * parse an XML Nmtoken.
1846 *
1847 * [7] Nmtoken ::= (NameChar)+
1848 *
1849 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1850 *
1851 * Returns the Nmtoken parsed or NULL
1852 */
1853
1854xmlChar *
1855xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1856 xmlChar buf[XML_MAX_NAMELEN + 5];
1857 int len = 0, l;
1858 int c;
1859 int count = 0;
1860
1861 GROW;
1862 c = CUR_CHAR(l);
1863
1864 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1865 (c == '.') || (c == '-') ||
1866 (c == '_') || (c == ':') ||
1867 (IS_COMBINING(c)) ||
1868 (IS_EXTENDER(c))) {
1869 if (count++ > 100) {
1870 count = 0;
1871 GROW;
1872 }
1873 COPY_BUF(l,buf,len,c);
1874 NEXTL(l);
1875 c = CUR_CHAR(l);
1876 if (len >= XML_MAX_NAMELEN) {
1877 /*
1878 * Okay someone managed to make a huge token, so he's ready to pay
1879 * for the processing speed.
1880 */
1881 xmlChar *buffer;
1882 int max = len * 2;
1883
1884 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1885 if (buffer == NULL) {
1886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1887 ctxt->sax->error(ctxt->userData,
1888 "xmlParseNmtoken: out of memory\n");
1889 return(NULL);
1890 }
1891 memcpy(buffer, buf, len);
1892 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1893 (c == '.') || (c == '-') ||
1894 (c == '_') || (c == ':') ||
1895 (IS_COMBINING(c)) ||
1896 (IS_EXTENDER(c))) {
1897 if (count++ > 100) {
1898 count = 0;
1899 GROW;
1900 }
1901 if (len + 10 > max) {
1902 max *= 2;
1903 buffer = (xmlChar *) xmlRealloc(buffer,
1904 max * sizeof(xmlChar));
1905 if (buffer == NULL) {
1906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1907 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001908 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001909 return(NULL);
1910 }
1911 }
1912 COPY_BUF(l,buffer,len,c);
1913 NEXTL(l);
1914 c = CUR_CHAR(l);
1915 }
1916 buffer[len] = 0;
1917 return(buffer);
1918 }
1919 }
1920 if (len == 0)
1921 return(NULL);
1922 return(xmlStrndup(buf, len));
1923}
1924
1925/**
1926 * xmlParseEntityValue:
1927 * @ctxt: an XML parser context
1928 * @orig: if non-NULL store a copy of the original entity value
1929 *
1930 * parse a value for ENTITY declarations
1931 *
1932 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1933 * "'" ([^%&'] | PEReference | Reference)* "'"
1934 *
1935 * Returns the EntityValue parsed with reference substitued or NULL
1936 */
1937
1938xmlChar *
1939xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1940 xmlChar *buf = NULL;
1941 int len = 0;
1942 int size = XML_PARSER_BUFFER_SIZE;
1943 int c, l;
1944 xmlChar stop;
1945 xmlChar *ret = NULL;
1946 const xmlChar *cur = NULL;
1947 xmlParserInputPtr input;
1948
1949 if (RAW == '"') stop = '"';
1950 else if (RAW == '\'') stop = '\'';
1951 else {
1952 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1954 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1955 ctxt->wellFormed = 0;
1956 ctxt->disableSAX = 1;
1957 return(NULL);
1958 }
1959 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1960 if (buf == NULL) {
1961 xmlGenericError(xmlGenericErrorContext,
1962 "malloc of %d byte failed\n", size);
1963 return(NULL);
1964 }
1965
1966 /*
1967 * The content of the entity definition is copied in a buffer.
1968 */
1969
1970 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1971 input = ctxt->input;
1972 GROW;
1973 NEXT;
1974 c = CUR_CHAR(l);
1975 /*
1976 * NOTE: 4.4.5 Included in Literal
1977 * When a parameter entity reference appears in a literal entity
1978 * value, ... a single or double quote character in the replacement
1979 * text is always treated as a normal data character and will not
1980 * terminate the literal.
1981 * In practice it means we stop the loop only when back at parsing
1982 * the initial entity and the quote is found
1983 */
1984 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1985 (ctxt->input != input))) {
1986 if (len + 5 >= size) {
1987 size *= 2;
1988 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1989 if (buf == NULL) {
1990 xmlGenericError(xmlGenericErrorContext,
1991 "realloc of %d byte failed\n", size);
1992 return(NULL);
1993 }
1994 }
1995 COPY_BUF(l,buf,len,c);
1996 NEXTL(l);
1997 /*
1998 * Pop-up of finished entities.
1999 */
2000 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2001 xmlPopInput(ctxt);
2002
2003 GROW;
2004 c = CUR_CHAR(l);
2005 if (c == 0) {
2006 GROW;
2007 c = CUR_CHAR(l);
2008 }
2009 }
2010 buf[len] = 0;
2011
2012 /*
2013 * Raise problem w.r.t. '&' and '%' being used in non-entities
2014 * reference constructs. Note Charref will be handled in
2015 * xmlStringDecodeEntities()
2016 */
2017 cur = buf;
2018 while (*cur != 0) { /* non input consuming */
2019 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2020 xmlChar *name;
2021 xmlChar tmp = *cur;
2022
2023 cur++;
2024 name = xmlParseStringName(ctxt, &cur);
2025 if ((name == NULL) || (*cur != ';')) {
2026 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2028 ctxt->sax->error(ctxt->userData,
2029 "EntityValue: '%c' forbidden except for entities references\n",
2030 tmp);
2031 ctxt->wellFormed = 0;
2032 ctxt->disableSAX = 1;
2033 }
2034 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2035 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2037 ctxt->sax->error(ctxt->userData,
2038 "EntityValue: PEReferences forbidden in internal subset\n",
2039 tmp);
2040 ctxt->wellFormed = 0;
2041 ctxt->disableSAX = 1;
2042 }
2043 if (name != NULL)
2044 xmlFree(name);
2045 }
2046 cur++;
2047 }
2048
2049 /*
2050 * Then PEReference entities are substituted.
2051 */
2052 if (c != stop) {
2053 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2055 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2056 ctxt->wellFormed = 0;
2057 ctxt->disableSAX = 1;
2058 xmlFree(buf);
2059 } else {
2060 NEXT;
2061 /*
2062 * NOTE: 4.4.7 Bypassed
2063 * When a general entity reference appears in the EntityValue in
2064 * an entity declaration, it is bypassed and left as is.
2065 * so XML_SUBSTITUTE_REF is not set here.
2066 */
2067 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2068 0, 0, 0);
2069 if (orig != NULL)
2070 *orig = buf;
2071 else
2072 xmlFree(buf);
2073 }
2074
2075 return(ret);
2076}
2077
2078/**
2079 * xmlParseAttValue:
2080 * @ctxt: an XML parser context
2081 *
2082 * parse a value for an attribute
2083 * Note: the parser won't do substitution of entities here, this
2084 * will be handled later in xmlStringGetNodeList
2085 *
2086 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2087 * "'" ([^<&'] | Reference)* "'"
2088 *
2089 * 3.3.3 Attribute-Value Normalization:
2090 * Before the value of an attribute is passed to the application or
2091 * checked for validity, the XML processor must normalize it as follows:
2092 * - a character reference is processed by appending the referenced
2093 * character to the attribute value
2094 * - an entity reference is processed by recursively processing the
2095 * replacement text of the entity
2096 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2097 * appending #x20 to the normalized value, except that only a single
2098 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2099 * parsed entity or the literal entity value of an internal parsed entity
2100 * - other characters are processed by appending them to the normalized value
2101 * If the declared value is not CDATA, then the XML processor must further
2102 * process the normalized attribute value by discarding any leading and
2103 * trailing space (#x20) characters, and by replacing sequences of space
2104 * (#x20) characters by a single space (#x20) character.
2105 * All attributes for which no declaration has been read should be treated
2106 * by a non-validating parser as if declared CDATA.
2107 *
2108 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2109 */
2110
2111xmlChar *
2112xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2113 xmlChar limit = 0;
2114 xmlChar *buf = NULL;
2115 int len = 0;
2116 int buf_size = 0;
2117 int c, l;
2118 xmlChar *current = NULL;
2119 xmlEntityPtr ent;
2120
2121
2122 SHRINK;
2123 if (NXT(0) == '"') {
2124 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2125 limit = '"';
2126 NEXT;
2127 } else if (NXT(0) == '\'') {
2128 limit = '\'';
2129 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2130 NEXT;
2131 } else {
2132 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2133 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2134 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2135 ctxt->wellFormed = 0;
2136 ctxt->disableSAX = 1;
2137 return(NULL);
2138 }
2139
2140 /*
2141 * allocate a translation buffer.
2142 */
2143 buf_size = XML_PARSER_BUFFER_SIZE;
2144 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2145 if (buf == NULL) {
2146 perror("xmlParseAttValue: malloc failed");
2147 return(NULL);
2148 }
2149
2150 /*
2151 * Ok loop until we reach one of the ending char or a size limit.
2152 */
2153 c = CUR_CHAR(l);
2154 while (((NXT(0) != limit) && /* checked */
2155 (c != '<')) || (ctxt->token != 0)) {
2156 if (c == 0) break;
2157 if (ctxt->token == '&') {
2158 /*
2159 * The reparsing will be done in xmlStringGetNodeList()
2160 * called by the attribute() function in SAX.c
2161 */
2162 static xmlChar buffer[6] = "&#38;";
2163
2164 if (len > buf_size - 10) {
2165 growBuffer(buf);
2166 }
2167 current = &buffer[0];
2168 while (*current != 0) { /* non input consuming */
2169 buf[len++] = *current++;
2170 }
2171 ctxt->token = 0;
2172 } else if (c == '&') {
2173 if (NXT(1) == '#') {
2174 int val = xmlParseCharRef(ctxt);
2175 if (val == '&') {
2176 /*
2177 * The reparsing will be done in xmlStringGetNodeList()
2178 * called by the attribute() function in SAX.c
2179 */
2180 static xmlChar buffer[6] = "&#38;";
2181
2182 if (len > buf_size - 10) {
2183 growBuffer(buf);
2184 }
2185 current = &buffer[0];
2186 while (*current != 0) { /* non input consuming */
2187 buf[len++] = *current++;
2188 }
2189 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002190 if (len > buf_size - 10) {
2191 growBuffer(buf);
2192 }
Owen Taylor3473f882001-02-23 17:55:21 +00002193 len += xmlCopyChar(0, &buf[len], val);
2194 }
2195 } else {
2196 ent = xmlParseEntityRef(ctxt);
2197 if ((ent != NULL) &&
2198 (ctxt->replaceEntities != 0)) {
2199 xmlChar *rep;
2200
2201 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2202 rep = xmlStringDecodeEntities(ctxt, ent->content,
2203 XML_SUBSTITUTE_REF, 0, 0, 0);
2204 if (rep != NULL) {
2205 current = rep;
2206 while (*current != 0) { /* non input consuming */
2207 buf[len++] = *current++;
2208 if (len > buf_size - 10) {
2209 growBuffer(buf);
2210 }
2211 }
2212 xmlFree(rep);
2213 }
2214 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002215 if (len > buf_size - 10) {
2216 growBuffer(buf);
2217 }
Owen Taylor3473f882001-02-23 17:55:21 +00002218 if (ent->content != NULL)
2219 buf[len++] = ent->content[0];
2220 }
2221 } else if (ent != NULL) {
2222 int i = xmlStrlen(ent->name);
2223 const xmlChar *cur = ent->name;
2224
2225 /*
2226 * This may look absurd but is needed to detect
2227 * entities problems
2228 */
2229 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2230 (ent->content != NULL)) {
2231 xmlChar *rep;
2232 rep = xmlStringDecodeEntities(ctxt, ent->content,
2233 XML_SUBSTITUTE_REF, 0, 0, 0);
2234 if (rep != NULL)
2235 xmlFree(rep);
2236 }
2237
2238 /*
2239 * Just output the reference
2240 */
2241 buf[len++] = '&';
2242 if (len > buf_size - i - 10) {
2243 growBuffer(buf);
2244 }
2245 for (;i > 0;i--)
2246 buf[len++] = *cur++;
2247 buf[len++] = ';';
2248 }
2249 }
2250 } else {
2251 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2252 COPY_BUF(l,buf,len,0x20);
2253 if (len > buf_size - 10) {
2254 growBuffer(buf);
2255 }
2256 } else {
2257 COPY_BUF(l,buf,len,c);
2258 if (len > buf_size - 10) {
2259 growBuffer(buf);
2260 }
2261 }
2262 NEXTL(l);
2263 }
2264 GROW;
2265 c = CUR_CHAR(l);
2266 }
2267 buf[len++] = 0;
2268 if (RAW == '<') {
2269 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2271 ctxt->sax->error(ctxt->userData,
2272 "Unescaped '<' not allowed in attributes values\n");
2273 ctxt->wellFormed = 0;
2274 ctxt->disableSAX = 1;
2275 } else if (RAW != limit) {
2276 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2277 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2278 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2279 ctxt->wellFormed = 0;
2280 ctxt->disableSAX = 1;
2281 } else
2282 NEXT;
2283 return(buf);
2284}
2285
2286/**
2287 * xmlParseSystemLiteral:
2288 * @ctxt: an XML parser context
2289 *
2290 * parse an XML Literal
2291 *
2292 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2293 *
2294 * Returns the SystemLiteral parsed or NULL
2295 */
2296
2297xmlChar *
2298xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2299 xmlChar *buf = NULL;
2300 int len = 0;
2301 int size = XML_PARSER_BUFFER_SIZE;
2302 int cur, l;
2303 xmlChar stop;
2304 int state = ctxt->instate;
2305 int count = 0;
2306
2307 SHRINK;
2308 if (RAW == '"') {
2309 NEXT;
2310 stop = '"';
2311 } else if (RAW == '\'') {
2312 NEXT;
2313 stop = '\'';
2314 } else {
2315 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2316 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2317 ctxt->sax->error(ctxt->userData,
2318 "SystemLiteral \" or ' expected\n");
2319 ctxt->wellFormed = 0;
2320 ctxt->disableSAX = 1;
2321 return(NULL);
2322 }
2323
2324 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2325 if (buf == NULL) {
2326 xmlGenericError(xmlGenericErrorContext,
2327 "malloc of %d byte failed\n", size);
2328 return(NULL);
2329 }
2330 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2331 cur = CUR_CHAR(l);
2332 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2333 if (len + 5 >= size) {
2334 size *= 2;
2335 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2336 if (buf == NULL) {
2337 xmlGenericError(xmlGenericErrorContext,
2338 "realloc of %d byte failed\n", size);
2339 ctxt->instate = (xmlParserInputState) state;
2340 return(NULL);
2341 }
2342 }
2343 count++;
2344 if (count > 50) {
2345 GROW;
2346 count = 0;
2347 }
2348 COPY_BUF(l,buf,len,cur);
2349 NEXTL(l);
2350 cur = CUR_CHAR(l);
2351 if (cur == 0) {
2352 GROW;
2353 SHRINK;
2354 cur = CUR_CHAR(l);
2355 }
2356 }
2357 buf[len] = 0;
2358 ctxt->instate = (xmlParserInputState) state;
2359 if (!IS_CHAR(cur)) {
2360 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2362 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2363 ctxt->wellFormed = 0;
2364 ctxt->disableSAX = 1;
2365 } else {
2366 NEXT;
2367 }
2368 return(buf);
2369}
2370
2371/**
2372 * xmlParsePubidLiteral:
2373 * @ctxt: an XML parser context
2374 *
2375 * parse an XML public literal
2376 *
2377 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2378 *
2379 * Returns the PubidLiteral parsed or NULL.
2380 */
2381
2382xmlChar *
2383xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2384 xmlChar *buf = NULL;
2385 int len = 0;
2386 int size = XML_PARSER_BUFFER_SIZE;
2387 xmlChar cur;
2388 xmlChar stop;
2389 int count = 0;
2390
2391 SHRINK;
2392 if (RAW == '"') {
2393 NEXT;
2394 stop = '"';
2395 } else if (RAW == '\'') {
2396 NEXT;
2397 stop = '\'';
2398 } else {
2399 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2401 ctxt->sax->error(ctxt->userData,
2402 "SystemLiteral \" or ' expected\n");
2403 ctxt->wellFormed = 0;
2404 ctxt->disableSAX = 1;
2405 return(NULL);
2406 }
2407 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2408 if (buf == NULL) {
2409 xmlGenericError(xmlGenericErrorContext,
2410 "malloc of %d byte failed\n", size);
2411 return(NULL);
2412 }
2413 cur = CUR;
2414 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2415 if (len + 1 >= size) {
2416 size *= 2;
2417 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2418 if (buf == NULL) {
2419 xmlGenericError(xmlGenericErrorContext,
2420 "realloc of %d byte failed\n", size);
2421 return(NULL);
2422 }
2423 }
2424 buf[len++] = cur;
2425 count++;
2426 if (count > 50) {
2427 GROW;
2428 count = 0;
2429 }
2430 NEXT;
2431 cur = CUR;
2432 if (cur == 0) {
2433 GROW;
2434 SHRINK;
2435 cur = CUR;
2436 }
2437 }
2438 buf[len] = 0;
2439 if (cur != stop) {
2440 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2442 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2443 ctxt->wellFormed = 0;
2444 ctxt->disableSAX = 1;
2445 } else {
2446 NEXT;
2447 }
2448 return(buf);
2449}
2450
Daniel Veillard48b2f892001-02-25 16:11:03 +00002451void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002452/**
2453 * xmlParseCharData:
2454 * @ctxt: an XML parser context
2455 * @cdata: int indicating whether we are within a CDATA section
2456 *
2457 * parse a CharData section.
2458 * if we are within a CDATA section ']]>' marks an end of section.
2459 *
2460 * The right angle bracket (>) may be represented using the string "&gt;",
2461 * and must, for compatibility, be escaped using "&gt;" or a character
2462 * reference when it appears in the string "]]>" in content, when that
2463 * string is not marking the end of a CDATA section.
2464 *
2465 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2466 */
2467
2468void
2469xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002470 const xmlChar *in;
2471 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002472 int line = ctxt->input->line;
2473 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002474
2475 SHRINK;
2476 GROW;
2477 /*
2478 * Accelerated common case where input don't need to be
2479 * modified before passing it to the handler.
2480 */
2481 if ((ctxt->token == 0) && (!cdata)) {
2482 in = ctxt->input->cur;
2483 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002484get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002485 while (((*in >= 0x20) && (*in != '<') &&
2486 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2487 in++;
2488 if (*in == 0xA) {
2489 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002490 in++;
2491 while (*in == 0xA) {
2492 ctxt->input->line++;
2493 in++;
2494 }
2495 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002496 }
2497 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002498 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002499 if (IS_BLANK(*ctxt->input->cur)) {
2500 const xmlChar *tmp = ctxt->input->cur;
2501 ctxt->input->cur = in;
2502 if (areBlanks(ctxt, tmp, nbchar)) {
2503 if (ctxt->sax->ignorableWhitespace != NULL)
2504 ctxt->sax->ignorableWhitespace(ctxt->userData,
2505 tmp, nbchar);
2506 } else {
2507 if (ctxt->sax->characters != NULL)
2508 ctxt->sax->characters(ctxt->userData,
2509 tmp, nbchar);
2510 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002511 } else {
2512 if (ctxt->sax->characters != NULL)
2513 ctxt->sax->characters(ctxt->userData,
2514 ctxt->input->cur, nbchar);
2515 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002516 }
2517 ctxt->input->cur = in;
2518 if (*in == 0xD) {
2519 in++;
2520 if (*in == 0xA) {
2521 ctxt->input->cur = in;
2522 in++;
2523 ctxt->input->line++;
2524 continue; /* while */
2525 }
2526 in--;
2527 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002528 if (*in == '<') {
2529 return;
2530 }
2531 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002532 return;
2533 }
2534 SHRINK;
2535 GROW;
2536 in = ctxt->input->cur;
2537 } while ((*in >= 0x20) && (*in <= 0x7F));
2538 nbchar = 0;
2539 }
Daniel Veillard50582112001-03-26 22:52:16 +00002540 ctxt->input->line = line;
2541 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002542 xmlParseCharDataComplex(ctxt, cdata);
2543}
2544
2545void
2546xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002547 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2548 int nbchar = 0;
2549 int cur, l;
2550 int count = 0;
2551
2552 SHRINK;
2553 GROW;
2554 cur = CUR_CHAR(l);
2555 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2556 ((cur != '&') || (ctxt->token == '&')) &&
2557 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2558 if ((cur == ']') && (NXT(1) == ']') &&
2559 (NXT(2) == '>')) {
2560 if (cdata) break;
2561 else {
2562 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2564 ctxt->sax->error(ctxt->userData,
2565 "Sequence ']]>' not allowed in content\n");
2566 /* Should this be relaxed ??? I see a "must here */
2567 ctxt->wellFormed = 0;
2568 ctxt->disableSAX = 1;
2569 }
2570 }
2571 COPY_BUF(l,buf,nbchar,cur);
2572 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2573 /*
2574 * Ok the segment is to be consumed as chars.
2575 */
2576 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2577 if (areBlanks(ctxt, buf, nbchar)) {
2578 if (ctxt->sax->ignorableWhitespace != NULL)
2579 ctxt->sax->ignorableWhitespace(ctxt->userData,
2580 buf, nbchar);
2581 } else {
2582 if (ctxt->sax->characters != NULL)
2583 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2584 }
2585 }
2586 nbchar = 0;
2587 }
2588 count++;
2589 if (count > 50) {
2590 GROW;
2591 count = 0;
2592 }
2593 NEXTL(l);
2594 cur = CUR_CHAR(l);
2595 }
2596 if (nbchar != 0) {
2597 /*
2598 * Ok the segment is to be consumed as chars.
2599 */
2600 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2601 if (areBlanks(ctxt, buf, nbchar)) {
2602 if (ctxt->sax->ignorableWhitespace != NULL)
2603 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2604 } else {
2605 if (ctxt->sax->characters != NULL)
2606 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2607 }
2608 }
2609 }
2610}
2611
2612/**
2613 * xmlParseExternalID:
2614 * @ctxt: an XML parser context
2615 * @publicID: a xmlChar** receiving PubidLiteral
2616 * @strict: indicate whether we should restrict parsing to only
2617 * production [75], see NOTE below
2618 *
2619 * Parse an External ID or a Public ID
2620 *
2621 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2622 * 'PUBLIC' S PubidLiteral S SystemLiteral
2623 *
2624 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2625 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2626 *
2627 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2628 *
2629 * Returns the function returns SystemLiteral and in the second
2630 * case publicID receives PubidLiteral, is strict is off
2631 * it is possible to return NULL and have publicID set.
2632 */
2633
2634xmlChar *
2635xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2636 xmlChar *URI = NULL;
2637
2638 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002639
2640 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002641 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2642 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2643 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2644 SKIP(6);
2645 if (!IS_BLANK(CUR)) {
2646 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2648 ctxt->sax->error(ctxt->userData,
2649 "Space required after 'SYSTEM'\n");
2650 ctxt->wellFormed = 0;
2651 ctxt->disableSAX = 1;
2652 }
2653 SKIP_BLANKS;
2654 URI = xmlParseSystemLiteral(ctxt);
2655 if (URI == NULL) {
2656 ctxt->errNo = XML_ERR_URI_REQUIRED;
2657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2658 ctxt->sax->error(ctxt->userData,
2659 "xmlParseExternalID: SYSTEM, no URI\n");
2660 ctxt->wellFormed = 0;
2661 ctxt->disableSAX = 1;
2662 }
2663 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2664 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2665 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2666 SKIP(6);
2667 if (!IS_BLANK(CUR)) {
2668 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2670 ctxt->sax->error(ctxt->userData,
2671 "Space required after 'PUBLIC'\n");
2672 ctxt->wellFormed = 0;
2673 ctxt->disableSAX = 1;
2674 }
2675 SKIP_BLANKS;
2676 *publicID = xmlParsePubidLiteral(ctxt);
2677 if (*publicID == NULL) {
2678 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2680 ctxt->sax->error(ctxt->userData,
2681 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2682 ctxt->wellFormed = 0;
2683 ctxt->disableSAX = 1;
2684 }
2685 if (strict) {
2686 /*
2687 * We don't handle [83] so "S SystemLiteral" is required.
2688 */
2689 if (!IS_BLANK(CUR)) {
2690 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2692 ctxt->sax->error(ctxt->userData,
2693 "Space required after the Public Identifier\n");
2694 ctxt->wellFormed = 0;
2695 ctxt->disableSAX = 1;
2696 }
2697 } else {
2698 /*
2699 * We handle [83] so we return immediately, if
2700 * "S SystemLiteral" is not detected. From a purely parsing
2701 * point of view that's a nice mess.
2702 */
2703 const xmlChar *ptr;
2704 GROW;
2705
2706 ptr = CUR_PTR;
2707 if (!IS_BLANK(*ptr)) return(NULL);
2708
2709 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2710 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2711 }
2712 SKIP_BLANKS;
2713 URI = xmlParseSystemLiteral(ctxt);
2714 if (URI == NULL) {
2715 ctxt->errNo = XML_ERR_URI_REQUIRED;
2716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2717 ctxt->sax->error(ctxt->userData,
2718 "xmlParseExternalID: PUBLIC, no URI\n");
2719 ctxt->wellFormed = 0;
2720 ctxt->disableSAX = 1;
2721 }
2722 }
2723 return(URI);
2724}
2725
2726/**
2727 * xmlParseComment:
2728 * @ctxt: an XML parser context
2729 *
2730 * Skip an XML (SGML) comment <!-- .... -->
2731 * The spec says that "For compatibility, the string "--" (double-hyphen)
2732 * must not occur within comments. "
2733 *
2734 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2735 */
2736void
2737xmlParseComment(xmlParserCtxtPtr ctxt) {
2738 xmlChar *buf = NULL;
2739 int len;
2740 int size = XML_PARSER_BUFFER_SIZE;
2741 int q, ql;
2742 int r, rl;
2743 int cur, l;
2744 xmlParserInputState state;
2745 xmlParserInputPtr input = ctxt->input;
2746 int count = 0;
2747
2748 /*
2749 * Check that there is a comment right here.
2750 */
2751 if ((RAW != '<') || (NXT(1) != '!') ||
2752 (NXT(2) != '-') || (NXT(3) != '-')) return;
2753
2754 state = ctxt->instate;
2755 ctxt->instate = XML_PARSER_COMMENT;
2756 SHRINK;
2757 SKIP(4);
2758 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2759 if (buf == NULL) {
2760 xmlGenericError(xmlGenericErrorContext,
2761 "malloc of %d byte failed\n", size);
2762 ctxt->instate = state;
2763 return;
2764 }
2765 q = CUR_CHAR(ql);
2766 NEXTL(ql);
2767 r = CUR_CHAR(rl);
2768 NEXTL(rl);
2769 cur = CUR_CHAR(l);
2770 len = 0;
2771 while (IS_CHAR(cur) && /* checked */
2772 ((cur != '>') ||
2773 (r != '-') || (q != '-'))) {
2774 if ((r == '-') && (q == '-') && (len > 1)) {
2775 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2777 ctxt->sax->error(ctxt->userData,
2778 "Comment must not contain '--' (double-hyphen)`\n");
2779 ctxt->wellFormed = 0;
2780 ctxt->disableSAX = 1;
2781 }
2782 if (len + 5 >= size) {
2783 size *= 2;
2784 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2785 if (buf == NULL) {
2786 xmlGenericError(xmlGenericErrorContext,
2787 "realloc of %d byte failed\n", size);
2788 ctxt->instate = state;
2789 return;
2790 }
2791 }
2792 COPY_BUF(ql,buf,len,q);
2793 q = r;
2794 ql = rl;
2795 r = cur;
2796 rl = l;
2797
2798 count++;
2799 if (count > 50) {
2800 GROW;
2801 count = 0;
2802 }
2803 NEXTL(l);
2804 cur = CUR_CHAR(l);
2805 if (cur == 0) {
2806 SHRINK;
2807 GROW;
2808 cur = CUR_CHAR(l);
2809 }
2810 }
2811 buf[len] = 0;
2812 if (!IS_CHAR(cur)) {
2813 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2815 ctxt->sax->error(ctxt->userData,
2816 "Comment not terminated \n<!--%.50s\n", buf);
2817 ctxt->wellFormed = 0;
2818 ctxt->disableSAX = 1;
2819 xmlFree(buf);
2820 } else {
2821 if (input != ctxt->input) {
2822 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2824 ctxt->sax->error(ctxt->userData,
2825"Comment doesn't start and stop in the same entity\n");
2826 ctxt->wellFormed = 0;
2827 ctxt->disableSAX = 1;
2828 }
2829 NEXT;
2830 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2831 (!ctxt->disableSAX))
2832 ctxt->sax->comment(ctxt->userData, buf);
2833 xmlFree(buf);
2834 }
2835 ctxt->instate = state;
2836}
2837
2838/**
2839 * xmlParsePITarget:
2840 * @ctxt: an XML parser context
2841 *
2842 * parse the name of a PI
2843 *
2844 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2845 *
2846 * Returns the PITarget name or NULL
2847 */
2848
2849xmlChar *
2850xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2851 xmlChar *name;
2852
2853 name = xmlParseName(ctxt);
2854 if ((name != NULL) &&
2855 ((name[0] == 'x') || (name[0] == 'X')) &&
2856 ((name[1] == 'm') || (name[1] == 'M')) &&
2857 ((name[2] == 'l') || (name[2] == 'L'))) {
2858 int i;
2859 if ((name[0] == 'x') && (name[1] == 'm') &&
2860 (name[2] == 'l') && (name[3] == 0)) {
2861 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2863 ctxt->sax->error(ctxt->userData,
2864 "XML declaration allowed only at the start of the document\n");
2865 ctxt->wellFormed = 0;
2866 ctxt->disableSAX = 1;
2867 return(name);
2868 } else if (name[3] == 0) {
2869 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2871 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2872 ctxt->wellFormed = 0;
2873 ctxt->disableSAX = 1;
2874 return(name);
2875 }
2876 for (i = 0;;i++) {
2877 if (xmlW3CPIs[i] == NULL) break;
2878 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2879 return(name);
2880 }
2881 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2882 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2883 ctxt->sax->warning(ctxt->userData,
2884 "xmlParsePItarget: invalid name prefix 'xml'\n");
2885 }
2886 }
2887 return(name);
2888}
2889
2890/**
2891 * xmlParsePI:
2892 * @ctxt: an XML parser context
2893 *
2894 * parse an XML Processing Instruction.
2895 *
2896 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2897 *
2898 * The processing is transfered to SAX once parsed.
2899 */
2900
2901void
2902xmlParsePI(xmlParserCtxtPtr ctxt) {
2903 xmlChar *buf = NULL;
2904 int len = 0;
2905 int size = XML_PARSER_BUFFER_SIZE;
2906 int cur, l;
2907 xmlChar *target;
2908 xmlParserInputState state;
2909 int count = 0;
2910
2911 if ((RAW == '<') && (NXT(1) == '?')) {
2912 xmlParserInputPtr input = ctxt->input;
2913 state = ctxt->instate;
2914 ctxt->instate = XML_PARSER_PI;
2915 /*
2916 * this is a Processing Instruction.
2917 */
2918 SKIP(2);
2919 SHRINK;
2920
2921 /*
2922 * Parse the target name and check for special support like
2923 * namespace.
2924 */
2925 target = xmlParsePITarget(ctxt);
2926 if (target != NULL) {
2927 if ((RAW == '?') && (NXT(1) == '>')) {
2928 if (input != ctxt->input) {
2929 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2931 ctxt->sax->error(ctxt->userData,
2932 "PI declaration doesn't start and stop in the same entity\n");
2933 ctxt->wellFormed = 0;
2934 ctxt->disableSAX = 1;
2935 }
2936 SKIP(2);
2937
2938 /*
2939 * SAX: PI detected.
2940 */
2941 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2942 (ctxt->sax->processingInstruction != NULL))
2943 ctxt->sax->processingInstruction(ctxt->userData,
2944 target, NULL);
2945 ctxt->instate = state;
2946 xmlFree(target);
2947 return;
2948 }
2949 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2950 if (buf == NULL) {
2951 xmlGenericError(xmlGenericErrorContext,
2952 "malloc of %d byte failed\n", size);
2953 ctxt->instate = state;
2954 return;
2955 }
2956 cur = CUR;
2957 if (!IS_BLANK(cur)) {
2958 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2960 ctxt->sax->error(ctxt->userData,
2961 "xmlParsePI: PI %s space expected\n", target);
2962 ctxt->wellFormed = 0;
2963 ctxt->disableSAX = 1;
2964 }
2965 SKIP_BLANKS;
2966 cur = CUR_CHAR(l);
2967 while (IS_CHAR(cur) && /* checked */
2968 ((cur != '?') || (NXT(1) != '>'))) {
2969 if (len + 5 >= size) {
2970 size *= 2;
2971 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2972 if (buf == NULL) {
2973 xmlGenericError(xmlGenericErrorContext,
2974 "realloc of %d byte failed\n", size);
2975 ctxt->instate = state;
2976 return;
2977 }
2978 }
2979 count++;
2980 if (count > 50) {
2981 GROW;
2982 count = 0;
2983 }
2984 COPY_BUF(l,buf,len,cur);
2985 NEXTL(l);
2986 cur = CUR_CHAR(l);
2987 if (cur == 0) {
2988 SHRINK;
2989 GROW;
2990 cur = CUR_CHAR(l);
2991 }
2992 }
2993 buf[len] = 0;
2994 if (cur != '?') {
2995 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
2996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2997 ctxt->sax->error(ctxt->userData,
2998 "xmlParsePI: PI %s never end ...\n", target);
2999 ctxt->wellFormed = 0;
3000 ctxt->disableSAX = 1;
3001 } else {
3002 if (input != ctxt->input) {
3003 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3005 ctxt->sax->error(ctxt->userData,
3006 "PI declaration doesn't start and stop in the same entity\n");
3007 ctxt->wellFormed = 0;
3008 ctxt->disableSAX = 1;
3009 }
3010 SKIP(2);
3011
3012 /*
3013 * SAX: PI detected.
3014 */
3015 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3016 (ctxt->sax->processingInstruction != NULL))
3017 ctxt->sax->processingInstruction(ctxt->userData,
3018 target, buf);
3019 }
3020 xmlFree(buf);
3021 xmlFree(target);
3022 } else {
3023 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3025 ctxt->sax->error(ctxt->userData,
3026 "xmlParsePI : no target name\n");
3027 ctxt->wellFormed = 0;
3028 ctxt->disableSAX = 1;
3029 }
3030 ctxt->instate = state;
3031 }
3032}
3033
3034/**
3035 * xmlParseNotationDecl:
3036 * @ctxt: an XML parser context
3037 *
3038 * parse a notation declaration
3039 *
3040 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3041 *
3042 * Hence there is actually 3 choices:
3043 * 'PUBLIC' S PubidLiteral
3044 * 'PUBLIC' S PubidLiteral S SystemLiteral
3045 * and 'SYSTEM' S SystemLiteral
3046 *
3047 * See the NOTE on xmlParseExternalID().
3048 */
3049
3050void
3051xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3052 xmlChar *name;
3053 xmlChar *Pubid;
3054 xmlChar *Systemid;
3055
3056 if ((RAW == '<') && (NXT(1) == '!') &&
3057 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3058 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3059 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3060 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3061 xmlParserInputPtr input = ctxt->input;
3062 SHRINK;
3063 SKIP(10);
3064 if (!IS_BLANK(CUR)) {
3065 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3067 ctxt->sax->error(ctxt->userData,
3068 "Space required after '<!NOTATION'\n");
3069 ctxt->wellFormed = 0;
3070 ctxt->disableSAX = 1;
3071 return;
3072 }
3073 SKIP_BLANKS;
3074
Daniel Veillard76d66f42001-05-16 21:05:17 +00003075 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003076 if (name == NULL) {
3077 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3079 ctxt->sax->error(ctxt->userData,
3080 "NOTATION: Name expected here\n");
3081 ctxt->wellFormed = 0;
3082 ctxt->disableSAX = 1;
3083 return;
3084 }
3085 if (!IS_BLANK(CUR)) {
3086 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3088 ctxt->sax->error(ctxt->userData,
3089 "Space required after the NOTATION name'\n");
3090 ctxt->wellFormed = 0;
3091 ctxt->disableSAX = 1;
3092 return;
3093 }
3094 SKIP_BLANKS;
3095
3096 /*
3097 * Parse the IDs.
3098 */
3099 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3100 SKIP_BLANKS;
3101
3102 if (RAW == '>') {
3103 if (input != ctxt->input) {
3104 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3105 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3106 ctxt->sax->error(ctxt->userData,
3107"Notation declaration doesn't start and stop in the same entity\n");
3108 ctxt->wellFormed = 0;
3109 ctxt->disableSAX = 1;
3110 }
3111 NEXT;
3112 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3113 (ctxt->sax->notationDecl != NULL))
3114 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3115 } else {
3116 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3118 ctxt->sax->error(ctxt->userData,
3119 "'>' required to close NOTATION declaration\n");
3120 ctxt->wellFormed = 0;
3121 ctxt->disableSAX = 1;
3122 }
3123 xmlFree(name);
3124 if (Systemid != NULL) xmlFree(Systemid);
3125 if (Pubid != NULL) xmlFree(Pubid);
3126 }
3127}
3128
3129/**
3130 * xmlParseEntityDecl:
3131 * @ctxt: an XML parser context
3132 *
3133 * parse <!ENTITY declarations
3134 *
3135 * [70] EntityDecl ::= GEDecl | PEDecl
3136 *
3137 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3138 *
3139 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3140 *
3141 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3142 *
3143 * [74] PEDef ::= EntityValue | ExternalID
3144 *
3145 * [76] NDataDecl ::= S 'NDATA' S Name
3146 *
3147 * [ VC: Notation Declared ]
3148 * The Name must match the declared name of a notation.
3149 */
3150
3151void
3152xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3153 xmlChar *name = NULL;
3154 xmlChar *value = NULL;
3155 xmlChar *URI = NULL, *literal = NULL;
3156 xmlChar *ndata = NULL;
3157 int isParameter = 0;
3158 xmlChar *orig = NULL;
3159
3160 GROW;
3161 if ((RAW == '<') && (NXT(1) == '!') &&
3162 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3163 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3164 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3165 xmlParserInputPtr input = ctxt->input;
3166 ctxt->instate = XML_PARSER_ENTITY_DECL;
3167 SHRINK;
3168 SKIP(8);
3169 if (!IS_BLANK(CUR)) {
3170 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3172 ctxt->sax->error(ctxt->userData,
3173 "Space required after '<!ENTITY'\n");
3174 ctxt->wellFormed = 0;
3175 ctxt->disableSAX = 1;
3176 }
3177 SKIP_BLANKS;
3178
3179 if (RAW == '%') {
3180 NEXT;
3181 if (!IS_BLANK(CUR)) {
3182 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3184 ctxt->sax->error(ctxt->userData,
3185 "Space required after '%'\n");
3186 ctxt->wellFormed = 0;
3187 ctxt->disableSAX = 1;
3188 }
3189 SKIP_BLANKS;
3190 isParameter = 1;
3191 }
3192
Daniel Veillard76d66f42001-05-16 21:05:17 +00003193 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003194 if (name == NULL) {
3195 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3197 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3198 ctxt->wellFormed = 0;
3199 ctxt->disableSAX = 1;
3200 return;
3201 }
3202 if (!IS_BLANK(CUR)) {
3203 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3205 ctxt->sax->error(ctxt->userData,
3206 "Space required after the entity name\n");
3207 ctxt->wellFormed = 0;
3208 ctxt->disableSAX = 1;
3209 }
3210 SKIP_BLANKS;
3211
3212 /*
3213 * handle the various case of definitions...
3214 */
3215 if (isParameter) {
3216 if ((RAW == '"') || (RAW == '\'')) {
3217 value = xmlParseEntityValue(ctxt, &orig);
3218 if (value) {
3219 if ((ctxt->sax != NULL) &&
3220 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3221 ctxt->sax->entityDecl(ctxt->userData, name,
3222 XML_INTERNAL_PARAMETER_ENTITY,
3223 NULL, NULL, value);
3224 }
3225 } else {
3226 URI = xmlParseExternalID(ctxt, &literal, 1);
3227 if ((URI == NULL) && (literal == NULL)) {
3228 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3230 ctxt->sax->error(ctxt->userData,
3231 "Entity value required\n");
3232 ctxt->wellFormed = 0;
3233 ctxt->disableSAX = 1;
3234 }
3235 if (URI) {
3236 xmlURIPtr uri;
3237
3238 uri = xmlParseURI((const char *) URI);
3239 if (uri == NULL) {
3240 ctxt->errNo = XML_ERR_INVALID_URI;
3241 if ((ctxt->sax != NULL) &&
3242 (!ctxt->disableSAX) &&
3243 (ctxt->sax->error != NULL))
3244 ctxt->sax->error(ctxt->userData,
3245 "Invalid URI: %s\n", URI);
3246 ctxt->wellFormed = 0;
3247 } else {
3248 if (uri->fragment != NULL) {
3249 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3250 if ((ctxt->sax != NULL) &&
3251 (!ctxt->disableSAX) &&
3252 (ctxt->sax->error != NULL))
3253 ctxt->sax->error(ctxt->userData,
3254 "Fragment not allowed: %s\n", URI);
3255 ctxt->wellFormed = 0;
3256 } else {
3257 if ((ctxt->sax != NULL) &&
3258 (!ctxt->disableSAX) &&
3259 (ctxt->sax->entityDecl != NULL))
3260 ctxt->sax->entityDecl(ctxt->userData, name,
3261 XML_EXTERNAL_PARAMETER_ENTITY,
3262 literal, URI, NULL);
3263 }
3264 xmlFreeURI(uri);
3265 }
3266 }
3267 }
3268 } else {
3269 if ((RAW == '"') || (RAW == '\'')) {
3270 value = xmlParseEntityValue(ctxt, &orig);
3271 if ((ctxt->sax != NULL) &&
3272 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3273 ctxt->sax->entityDecl(ctxt->userData, name,
3274 XML_INTERNAL_GENERAL_ENTITY,
3275 NULL, NULL, value);
3276 } else {
3277 URI = xmlParseExternalID(ctxt, &literal, 1);
3278 if ((URI == NULL) && (literal == NULL)) {
3279 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3281 ctxt->sax->error(ctxt->userData,
3282 "Entity value required\n");
3283 ctxt->wellFormed = 0;
3284 ctxt->disableSAX = 1;
3285 }
3286 if (URI) {
3287 xmlURIPtr uri;
3288
3289 uri = xmlParseURI((const char *)URI);
3290 if (uri == NULL) {
3291 ctxt->errNo = XML_ERR_INVALID_URI;
3292 if ((ctxt->sax != NULL) &&
3293 (!ctxt->disableSAX) &&
3294 (ctxt->sax->error != NULL))
3295 ctxt->sax->error(ctxt->userData,
3296 "Invalid URI: %s\n", URI);
3297 ctxt->wellFormed = 0;
3298 } else {
3299 if (uri->fragment != NULL) {
3300 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3301 if ((ctxt->sax != NULL) &&
3302 (!ctxt->disableSAX) &&
3303 (ctxt->sax->error != NULL))
3304 ctxt->sax->error(ctxt->userData,
3305 "Fragment not allowed: %s\n", URI);
3306 ctxt->wellFormed = 0;
3307 }
3308 xmlFreeURI(uri);
3309 }
3310 }
3311 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3312 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3313 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3314 ctxt->sax->error(ctxt->userData,
3315 "Space required before 'NDATA'\n");
3316 ctxt->wellFormed = 0;
3317 ctxt->disableSAX = 1;
3318 }
3319 SKIP_BLANKS;
3320 if ((RAW == 'N') && (NXT(1) == 'D') &&
3321 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3322 (NXT(4) == 'A')) {
3323 SKIP(5);
3324 if (!IS_BLANK(CUR)) {
3325 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3327 ctxt->sax->error(ctxt->userData,
3328 "Space required after 'NDATA'\n");
3329 ctxt->wellFormed = 0;
3330 ctxt->disableSAX = 1;
3331 }
3332 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003333 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003334 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3335 (ctxt->sax->unparsedEntityDecl != NULL))
3336 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3337 literal, URI, ndata);
3338 } else {
3339 if ((ctxt->sax != NULL) &&
3340 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3341 ctxt->sax->entityDecl(ctxt->userData, name,
3342 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3343 literal, URI, NULL);
3344 }
3345 }
3346 }
3347 SKIP_BLANKS;
3348 if (RAW != '>') {
3349 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3351 ctxt->sax->error(ctxt->userData,
3352 "xmlParseEntityDecl: entity %s not terminated\n", name);
3353 ctxt->wellFormed = 0;
3354 ctxt->disableSAX = 1;
3355 } else {
3356 if (input != ctxt->input) {
3357 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3359 ctxt->sax->error(ctxt->userData,
3360"Entity declaration doesn't start and stop in the same entity\n");
3361 ctxt->wellFormed = 0;
3362 ctxt->disableSAX = 1;
3363 }
3364 NEXT;
3365 }
3366 if (orig != NULL) {
3367 /*
3368 * Ugly mechanism to save the raw entity value.
3369 */
3370 xmlEntityPtr cur = NULL;
3371
3372 if (isParameter) {
3373 if ((ctxt->sax != NULL) &&
3374 (ctxt->sax->getParameterEntity != NULL))
3375 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3376 } else {
3377 if ((ctxt->sax != NULL) &&
3378 (ctxt->sax->getEntity != NULL))
3379 cur = ctxt->sax->getEntity(ctxt->userData, name);
3380 }
3381 if (cur != NULL) {
3382 if (cur->orig != NULL)
3383 xmlFree(orig);
3384 else
3385 cur->orig = orig;
3386 } else
3387 xmlFree(orig);
3388 }
3389 if (name != NULL) xmlFree(name);
3390 if (value != NULL) xmlFree(value);
3391 if (URI != NULL) xmlFree(URI);
3392 if (literal != NULL) xmlFree(literal);
3393 if (ndata != NULL) xmlFree(ndata);
3394 }
3395}
3396
3397/**
3398 * xmlParseDefaultDecl:
3399 * @ctxt: an XML parser context
3400 * @value: Receive a possible fixed default value for the attribute
3401 *
3402 * Parse an attribute default declaration
3403 *
3404 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3405 *
3406 * [ VC: Required Attribute ]
3407 * if the default declaration is the keyword #REQUIRED, then the
3408 * attribute must be specified for all elements of the type in the
3409 * attribute-list declaration.
3410 *
3411 * [ VC: Attribute Default Legal ]
3412 * The declared default value must meet the lexical constraints of
3413 * the declared attribute type c.f. xmlValidateAttributeDecl()
3414 *
3415 * [ VC: Fixed Attribute Default ]
3416 * if an attribute has a default value declared with the #FIXED
3417 * keyword, instances of that attribute must match the default value.
3418 *
3419 * [ WFC: No < in Attribute Values ]
3420 * handled in xmlParseAttValue()
3421 *
3422 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3423 * or XML_ATTRIBUTE_FIXED.
3424 */
3425
3426int
3427xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3428 int val;
3429 xmlChar *ret;
3430
3431 *value = NULL;
3432 if ((RAW == '#') && (NXT(1) == 'R') &&
3433 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3434 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3435 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3436 (NXT(8) == 'D')) {
3437 SKIP(9);
3438 return(XML_ATTRIBUTE_REQUIRED);
3439 }
3440 if ((RAW == '#') && (NXT(1) == 'I') &&
3441 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3442 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3443 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3444 SKIP(8);
3445 return(XML_ATTRIBUTE_IMPLIED);
3446 }
3447 val = XML_ATTRIBUTE_NONE;
3448 if ((RAW == '#') && (NXT(1) == 'F') &&
3449 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3450 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3451 SKIP(6);
3452 val = XML_ATTRIBUTE_FIXED;
3453 if (!IS_BLANK(CUR)) {
3454 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3456 ctxt->sax->error(ctxt->userData,
3457 "Space required after '#FIXED'\n");
3458 ctxt->wellFormed = 0;
3459 ctxt->disableSAX = 1;
3460 }
3461 SKIP_BLANKS;
3462 }
3463 ret = xmlParseAttValue(ctxt);
3464 ctxt->instate = XML_PARSER_DTD;
3465 if (ret == NULL) {
3466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3467 ctxt->sax->error(ctxt->userData,
3468 "Attribute default value declaration error\n");
3469 ctxt->wellFormed = 0;
3470 ctxt->disableSAX = 1;
3471 } else
3472 *value = ret;
3473 return(val);
3474}
3475
3476/**
3477 * xmlParseNotationType:
3478 * @ctxt: an XML parser context
3479 *
3480 * parse an Notation attribute type.
3481 *
3482 * Note: the leading 'NOTATION' S part has already being parsed...
3483 *
3484 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3485 *
3486 * [ VC: Notation Attributes ]
3487 * Values of this type must match one of the notation names included
3488 * in the declaration; all notation names in the declaration must be declared.
3489 *
3490 * Returns: the notation attribute tree built while parsing
3491 */
3492
3493xmlEnumerationPtr
3494xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3495 xmlChar *name;
3496 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3497
3498 if (RAW != '(') {
3499 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3501 ctxt->sax->error(ctxt->userData,
3502 "'(' required to start 'NOTATION'\n");
3503 ctxt->wellFormed = 0;
3504 ctxt->disableSAX = 1;
3505 return(NULL);
3506 }
3507 SHRINK;
3508 do {
3509 NEXT;
3510 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003511 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003512 if (name == NULL) {
3513 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3515 ctxt->sax->error(ctxt->userData,
3516 "Name expected in NOTATION declaration\n");
3517 ctxt->wellFormed = 0;
3518 ctxt->disableSAX = 1;
3519 return(ret);
3520 }
3521 cur = xmlCreateEnumeration(name);
3522 xmlFree(name);
3523 if (cur == NULL) return(ret);
3524 if (last == NULL) ret = last = cur;
3525 else {
3526 last->next = cur;
3527 last = cur;
3528 }
3529 SKIP_BLANKS;
3530 } while (RAW == '|');
3531 if (RAW != ')') {
3532 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3534 ctxt->sax->error(ctxt->userData,
3535 "')' required to finish NOTATION declaration\n");
3536 ctxt->wellFormed = 0;
3537 ctxt->disableSAX = 1;
3538 if ((last != NULL) && (last != ret))
3539 xmlFreeEnumeration(last);
3540 return(ret);
3541 }
3542 NEXT;
3543 return(ret);
3544}
3545
3546/**
3547 * xmlParseEnumerationType:
3548 * @ctxt: an XML parser context
3549 *
3550 * parse an Enumeration attribute type.
3551 *
3552 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3553 *
3554 * [ VC: Enumeration ]
3555 * Values of this type must match one of the Nmtoken tokens in
3556 * the declaration
3557 *
3558 * Returns: the enumeration attribute tree built while parsing
3559 */
3560
3561xmlEnumerationPtr
3562xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3563 xmlChar *name;
3564 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3565
3566 if (RAW != '(') {
3567 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3569 ctxt->sax->error(ctxt->userData,
3570 "'(' required to start ATTLIST enumeration\n");
3571 ctxt->wellFormed = 0;
3572 ctxt->disableSAX = 1;
3573 return(NULL);
3574 }
3575 SHRINK;
3576 do {
3577 NEXT;
3578 SKIP_BLANKS;
3579 name = xmlParseNmtoken(ctxt);
3580 if (name == NULL) {
3581 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3582 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3583 ctxt->sax->error(ctxt->userData,
3584 "NmToken expected in ATTLIST enumeration\n");
3585 ctxt->wellFormed = 0;
3586 ctxt->disableSAX = 1;
3587 return(ret);
3588 }
3589 cur = xmlCreateEnumeration(name);
3590 xmlFree(name);
3591 if (cur == NULL) return(ret);
3592 if (last == NULL) ret = last = cur;
3593 else {
3594 last->next = cur;
3595 last = cur;
3596 }
3597 SKIP_BLANKS;
3598 } while (RAW == '|');
3599 if (RAW != ')') {
3600 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3602 ctxt->sax->error(ctxt->userData,
3603 "')' required to finish ATTLIST enumeration\n");
3604 ctxt->wellFormed = 0;
3605 ctxt->disableSAX = 1;
3606 return(ret);
3607 }
3608 NEXT;
3609 return(ret);
3610}
3611
3612/**
3613 * xmlParseEnumeratedType:
3614 * @ctxt: an XML parser context
3615 * @tree: the enumeration tree built while parsing
3616 *
3617 * parse an Enumerated attribute type.
3618 *
3619 * [57] EnumeratedType ::= NotationType | Enumeration
3620 *
3621 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3622 *
3623 *
3624 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3625 */
3626
3627int
3628xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3629 if ((RAW == 'N') && (NXT(1) == 'O') &&
3630 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3631 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3632 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3633 SKIP(8);
3634 if (!IS_BLANK(CUR)) {
3635 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3637 ctxt->sax->error(ctxt->userData,
3638 "Space required after 'NOTATION'\n");
3639 ctxt->wellFormed = 0;
3640 ctxt->disableSAX = 1;
3641 return(0);
3642 }
3643 SKIP_BLANKS;
3644 *tree = xmlParseNotationType(ctxt);
3645 if (*tree == NULL) return(0);
3646 return(XML_ATTRIBUTE_NOTATION);
3647 }
3648 *tree = xmlParseEnumerationType(ctxt);
3649 if (*tree == NULL) return(0);
3650 return(XML_ATTRIBUTE_ENUMERATION);
3651}
3652
3653/**
3654 * xmlParseAttributeType:
3655 * @ctxt: an XML parser context
3656 * @tree: the enumeration tree built while parsing
3657 *
3658 * parse the Attribute list def for an element
3659 *
3660 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3661 *
3662 * [55] StringType ::= 'CDATA'
3663 *
3664 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3665 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3666 *
3667 * Validity constraints for attribute values syntax are checked in
3668 * xmlValidateAttributeValue()
3669 *
3670 * [ VC: ID ]
3671 * Values of type ID must match the Name production. A name must not
3672 * appear more than once in an XML document as a value of this type;
3673 * i.e., ID values must uniquely identify the elements which bear them.
3674 *
3675 * [ VC: One ID per Element Type ]
3676 * No element type may have more than one ID attribute specified.
3677 *
3678 * [ VC: ID Attribute Default ]
3679 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3680 *
3681 * [ VC: IDREF ]
3682 * Values of type IDREF must match the Name production, and values
3683 * of type IDREFS must match Names; each IDREF Name must match the value
3684 * of an ID attribute on some element in the XML document; i.e. IDREF
3685 * values must match the value of some ID attribute.
3686 *
3687 * [ VC: Entity Name ]
3688 * Values of type ENTITY must match the Name production, values
3689 * of type ENTITIES must match Names; each Entity Name must match the
3690 * name of an unparsed entity declared in the DTD.
3691 *
3692 * [ VC: Name Token ]
3693 * Values of type NMTOKEN must match the Nmtoken production; values
3694 * of type NMTOKENS must match Nmtokens.
3695 *
3696 * Returns the attribute type
3697 */
3698int
3699xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3700 SHRINK;
3701 if ((RAW == 'C') && (NXT(1) == 'D') &&
3702 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3703 (NXT(4) == 'A')) {
3704 SKIP(5);
3705 return(XML_ATTRIBUTE_CDATA);
3706 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3707 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3708 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3709 SKIP(6);
3710 return(XML_ATTRIBUTE_IDREFS);
3711 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3712 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3713 (NXT(4) == 'F')) {
3714 SKIP(5);
3715 return(XML_ATTRIBUTE_IDREF);
3716 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3717 SKIP(2);
3718 return(XML_ATTRIBUTE_ID);
3719 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3720 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3721 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3722 SKIP(6);
3723 return(XML_ATTRIBUTE_ENTITY);
3724 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3725 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3726 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3727 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3728 SKIP(8);
3729 return(XML_ATTRIBUTE_ENTITIES);
3730 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3731 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3732 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3733 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3734 SKIP(8);
3735 return(XML_ATTRIBUTE_NMTOKENS);
3736 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3737 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3738 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3739 (NXT(6) == 'N')) {
3740 SKIP(7);
3741 return(XML_ATTRIBUTE_NMTOKEN);
3742 }
3743 return(xmlParseEnumeratedType(ctxt, tree));
3744}
3745
3746/**
3747 * xmlParseAttributeListDecl:
3748 * @ctxt: an XML parser context
3749 *
3750 * : parse the Attribute list def for an element
3751 *
3752 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3753 *
3754 * [53] AttDef ::= S Name S AttType S DefaultDecl
3755 *
3756 */
3757void
3758xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3759 xmlChar *elemName;
3760 xmlChar *attrName;
3761 xmlEnumerationPtr tree;
3762
3763 if ((RAW == '<') && (NXT(1) == '!') &&
3764 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3765 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3766 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3767 (NXT(8) == 'T')) {
3768 xmlParserInputPtr input = ctxt->input;
3769
3770 SKIP(9);
3771 if (!IS_BLANK(CUR)) {
3772 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3774 ctxt->sax->error(ctxt->userData,
3775 "Space required after '<!ATTLIST'\n");
3776 ctxt->wellFormed = 0;
3777 ctxt->disableSAX = 1;
3778 }
3779 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003780 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003781 if (elemName == NULL) {
3782 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3784 ctxt->sax->error(ctxt->userData,
3785 "ATTLIST: no name for Element\n");
3786 ctxt->wellFormed = 0;
3787 ctxt->disableSAX = 1;
3788 return;
3789 }
3790 SKIP_BLANKS;
3791 GROW;
3792 while (RAW != '>') {
3793 const xmlChar *check = CUR_PTR;
3794 int type;
3795 int def;
3796 xmlChar *defaultValue = NULL;
3797
3798 GROW;
3799 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003800 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003801 if (attrName == NULL) {
3802 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3803 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3804 ctxt->sax->error(ctxt->userData,
3805 "ATTLIST: no name for Attribute\n");
3806 ctxt->wellFormed = 0;
3807 ctxt->disableSAX = 1;
3808 break;
3809 }
3810 GROW;
3811 if (!IS_BLANK(CUR)) {
3812 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3814 ctxt->sax->error(ctxt->userData,
3815 "Space required after the attribute name\n");
3816 ctxt->wellFormed = 0;
3817 ctxt->disableSAX = 1;
3818 if (attrName != NULL)
3819 xmlFree(attrName);
3820 if (defaultValue != NULL)
3821 xmlFree(defaultValue);
3822 break;
3823 }
3824 SKIP_BLANKS;
3825
3826 type = xmlParseAttributeType(ctxt, &tree);
3827 if (type <= 0) {
3828 if (attrName != NULL)
3829 xmlFree(attrName);
3830 if (defaultValue != NULL)
3831 xmlFree(defaultValue);
3832 break;
3833 }
3834
3835 GROW;
3836 if (!IS_BLANK(CUR)) {
3837 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3839 ctxt->sax->error(ctxt->userData,
3840 "Space required after the attribute type\n");
3841 ctxt->wellFormed = 0;
3842 ctxt->disableSAX = 1;
3843 if (attrName != NULL)
3844 xmlFree(attrName);
3845 if (defaultValue != NULL)
3846 xmlFree(defaultValue);
3847 if (tree != NULL)
3848 xmlFreeEnumeration(tree);
3849 break;
3850 }
3851 SKIP_BLANKS;
3852
3853 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3854 if (def <= 0) {
3855 if (attrName != NULL)
3856 xmlFree(attrName);
3857 if (defaultValue != NULL)
3858 xmlFree(defaultValue);
3859 if (tree != NULL)
3860 xmlFreeEnumeration(tree);
3861 break;
3862 }
3863
3864 GROW;
3865 if (RAW != '>') {
3866 if (!IS_BLANK(CUR)) {
3867 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3869 ctxt->sax->error(ctxt->userData,
3870 "Space required after the attribute default value\n");
3871 ctxt->wellFormed = 0;
3872 ctxt->disableSAX = 1;
3873 if (attrName != NULL)
3874 xmlFree(attrName);
3875 if (defaultValue != NULL)
3876 xmlFree(defaultValue);
3877 if (tree != NULL)
3878 xmlFreeEnumeration(tree);
3879 break;
3880 }
3881 SKIP_BLANKS;
3882 }
3883 if (check == CUR_PTR) {
3884 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3885 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3886 ctxt->sax->error(ctxt->userData,
3887 "xmlParseAttributeListDecl: detected internal error\n");
3888 if (attrName != NULL)
3889 xmlFree(attrName);
3890 if (defaultValue != NULL)
3891 xmlFree(defaultValue);
3892 if (tree != NULL)
3893 xmlFreeEnumeration(tree);
3894 break;
3895 }
3896 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3897 (ctxt->sax->attributeDecl != NULL))
3898 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3899 type, def, defaultValue, tree);
3900 if (attrName != NULL)
3901 xmlFree(attrName);
3902 if (defaultValue != NULL)
3903 xmlFree(defaultValue);
3904 GROW;
3905 }
3906 if (RAW == '>') {
3907 if (input != ctxt->input) {
3908 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3910 ctxt->sax->error(ctxt->userData,
3911"Attribute list declaration doesn't start and stop in the same entity\n");
3912 ctxt->wellFormed = 0;
3913 ctxt->disableSAX = 1;
3914 }
3915 NEXT;
3916 }
3917
3918 xmlFree(elemName);
3919 }
3920}
3921
3922/**
3923 * xmlParseElementMixedContentDecl:
3924 * @ctxt: an XML parser context
3925 *
3926 * parse the declaration for a Mixed Element content
3927 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3928 *
3929 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3930 * '(' S? '#PCDATA' S? ')'
3931 *
3932 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3933 *
3934 * [ VC: No Duplicate Types ]
3935 * The same name must not appear more than once in a single
3936 * mixed-content declaration.
3937 *
3938 * returns: the list of the xmlElementContentPtr describing the element choices
3939 */
3940xmlElementContentPtr
3941xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3942 xmlElementContentPtr ret = NULL, cur = NULL, n;
3943 xmlChar *elem = NULL;
3944
3945 GROW;
3946 if ((RAW == '#') && (NXT(1) == 'P') &&
3947 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3948 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3949 (NXT(6) == 'A')) {
3950 SKIP(7);
3951 SKIP_BLANKS;
3952 SHRINK;
3953 if (RAW == ')') {
3954 ctxt->entity = ctxt->input;
3955 NEXT;
3956 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3957 if (RAW == '*') {
3958 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3959 NEXT;
3960 }
3961 return(ret);
3962 }
3963 if ((RAW == '(') || (RAW == '|')) {
3964 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3965 if (ret == NULL) return(NULL);
3966 }
3967 while (RAW == '|') {
3968 NEXT;
3969 if (elem == NULL) {
3970 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3971 if (ret == NULL) return(NULL);
3972 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003973 if (cur != NULL)
3974 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003975 cur = ret;
3976 } else {
3977 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3978 if (n == NULL) return(NULL);
3979 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003980 if (n->c1 != NULL)
3981 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00003982 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003983 if (n != NULL)
3984 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003985 cur = n;
3986 xmlFree(elem);
3987 }
3988 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003989 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003990 if (elem == NULL) {
3991 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3993 ctxt->sax->error(ctxt->userData,
3994 "xmlParseElementMixedContentDecl : Name expected\n");
3995 ctxt->wellFormed = 0;
3996 ctxt->disableSAX = 1;
3997 xmlFreeElementContent(cur);
3998 return(NULL);
3999 }
4000 SKIP_BLANKS;
4001 GROW;
4002 }
4003 if ((RAW == ')') && (NXT(1) == '*')) {
4004 if (elem != NULL) {
4005 cur->c2 = xmlNewElementContent(elem,
4006 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004007 if (cur->c2 != NULL)
4008 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004009 xmlFree(elem);
4010 }
4011 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4012 ctxt->entity = ctxt->input;
4013 SKIP(2);
4014 } else {
4015 if (elem != NULL) xmlFree(elem);
4016 xmlFreeElementContent(ret);
4017 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4019 ctxt->sax->error(ctxt->userData,
4020 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4021 ctxt->wellFormed = 0;
4022 ctxt->disableSAX = 1;
4023 return(NULL);
4024 }
4025
4026 } else {
4027 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4029 ctxt->sax->error(ctxt->userData,
4030 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4031 ctxt->wellFormed = 0;
4032 ctxt->disableSAX = 1;
4033 }
4034 return(ret);
4035}
4036
4037/**
4038 * xmlParseElementChildrenContentDecl:
4039 * @ctxt: an XML parser context
4040 *
4041 * parse the declaration for a Mixed Element content
4042 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4043 *
4044 *
4045 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4046 *
4047 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4048 *
4049 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4050 *
4051 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4052 *
4053 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4054 * TODO Parameter-entity replacement text must be properly nested
4055 * with parenthetized groups. That is to say, if either of the
4056 * opening or closing parentheses in a choice, seq, or Mixed
4057 * construct is contained in the replacement text for a parameter
4058 * entity, both must be contained in the same replacement text. For
4059 * interoperability, if a parameter-entity reference appears in a
4060 * choice, seq, or Mixed construct, its replacement text should not
4061 * be empty, and neither the first nor last non-blank character of
4062 * the replacement text should be a connector (| or ,).
4063 *
4064 * returns: the tree of xmlElementContentPtr describing the element
4065 * hierarchy.
4066 */
4067xmlElementContentPtr
4068#ifdef VMS
4069xmlParseElementChildrenContentD
4070#else
4071xmlParseElementChildrenContentDecl
4072#endif
4073(xmlParserCtxtPtr ctxt) {
4074 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4075 xmlChar *elem;
4076 xmlChar type = 0;
4077
4078 SKIP_BLANKS;
4079 GROW;
4080 if (RAW == '(') {
4081 /* Recurse on first child */
4082 NEXT;
4083 SKIP_BLANKS;
4084 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4085 SKIP_BLANKS;
4086 GROW;
4087 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004088 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004089 if (elem == NULL) {
4090 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4091 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4092 ctxt->sax->error(ctxt->userData,
4093 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4094 ctxt->wellFormed = 0;
4095 ctxt->disableSAX = 1;
4096 return(NULL);
4097 }
4098 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4099 GROW;
4100 if (RAW == '?') {
4101 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4102 NEXT;
4103 } else if (RAW == '*') {
4104 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4105 NEXT;
4106 } else if (RAW == '+') {
4107 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4108 NEXT;
4109 } else {
4110 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4111 }
4112 xmlFree(elem);
4113 GROW;
4114 }
4115 SKIP_BLANKS;
4116 SHRINK;
4117 while (RAW != ')') {
4118 /*
4119 * Each loop we parse one separator and one element.
4120 */
4121 if (RAW == ',') {
4122 if (type == 0) type = CUR;
4123
4124 /*
4125 * Detect "Name | Name , Name" error
4126 */
4127 else if (type != CUR) {
4128 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4130 ctxt->sax->error(ctxt->userData,
4131 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4132 type);
4133 ctxt->wellFormed = 0;
4134 ctxt->disableSAX = 1;
4135 if ((op != NULL) && (op != ret))
4136 xmlFreeElementContent(op);
4137 if ((last != NULL) && (last != ret) &&
4138 (last != ret->c1) && (last != ret->c2))
4139 xmlFreeElementContent(last);
4140 if (ret != NULL)
4141 xmlFreeElementContent(ret);
4142 return(NULL);
4143 }
4144 NEXT;
4145
4146 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4147 if (op == NULL) {
4148 xmlFreeElementContent(ret);
4149 return(NULL);
4150 }
4151 if (last == NULL) {
4152 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004153 if (ret != NULL)
4154 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004155 ret = cur = op;
4156 } else {
4157 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004158 if (op != NULL)
4159 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004160 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004161 if (last != NULL)
4162 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004163 cur =op;
4164 last = NULL;
4165 }
4166 } else if (RAW == '|') {
4167 if (type == 0) type = CUR;
4168
4169 /*
4170 * Detect "Name , Name | Name" error
4171 */
4172 else if (type != CUR) {
4173 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4175 ctxt->sax->error(ctxt->userData,
4176 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4177 type);
4178 ctxt->wellFormed = 0;
4179 ctxt->disableSAX = 1;
4180 if ((op != NULL) && (op != ret) && (op != last))
4181 xmlFreeElementContent(op);
4182 if ((last != NULL) && (last != ret) &&
4183 (last != ret->c1) && (last != ret->c2))
4184 xmlFreeElementContent(last);
4185 if (ret != NULL)
4186 xmlFreeElementContent(ret);
4187 return(NULL);
4188 }
4189 NEXT;
4190
4191 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4192 if (op == NULL) {
4193 if ((op != NULL) && (op != ret))
4194 xmlFreeElementContent(op);
4195 if ((last != NULL) && (last != ret) &&
4196 (last != ret->c1) && (last != ret->c2))
4197 xmlFreeElementContent(last);
4198 if (ret != NULL)
4199 xmlFreeElementContent(ret);
4200 return(NULL);
4201 }
4202 if (last == NULL) {
4203 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004204 if (ret != NULL)
4205 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004206 ret = cur = op;
4207 } else {
4208 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004209 if (op != NULL)
4210 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004211 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004212 if (last != NULL)
4213 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004214 cur =op;
4215 last = NULL;
4216 }
4217 } else {
4218 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4220 ctxt->sax->error(ctxt->userData,
4221 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4222 ctxt->wellFormed = 0;
4223 ctxt->disableSAX = 1;
4224 if ((op != NULL) && (op != ret))
4225 xmlFreeElementContent(op);
4226 if ((last != NULL) && (last != ret) &&
4227 (last != ret->c1) && (last != ret->c2))
4228 xmlFreeElementContent(last);
4229 if (ret != NULL)
4230 xmlFreeElementContent(ret);
4231 return(NULL);
4232 }
4233 GROW;
4234 SKIP_BLANKS;
4235 GROW;
4236 if (RAW == '(') {
4237 /* Recurse on second child */
4238 NEXT;
4239 SKIP_BLANKS;
4240 last = xmlParseElementChildrenContentDecl(ctxt);
4241 SKIP_BLANKS;
4242 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004243 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004244 if (elem == NULL) {
4245 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4247 ctxt->sax->error(ctxt->userData,
4248 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4249 ctxt->wellFormed = 0;
4250 ctxt->disableSAX = 1;
4251 if ((op != NULL) && (op != ret))
4252 xmlFreeElementContent(op);
4253 if ((last != NULL) && (last != ret) &&
4254 (last != ret->c1) && (last != ret->c2))
4255 xmlFreeElementContent(last);
4256 if (ret != NULL)
4257 xmlFreeElementContent(ret);
4258 return(NULL);
4259 }
4260 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4261 xmlFree(elem);
4262 if (RAW == '?') {
4263 last->ocur = XML_ELEMENT_CONTENT_OPT;
4264 NEXT;
4265 } else if (RAW == '*') {
4266 last->ocur = XML_ELEMENT_CONTENT_MULT;
4267 NEXT;
4268 } else if (RAW == '+') {
4269 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4270 NEXT;
4271 } else {
4272 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4273 }
4274 }
4275 SKIP_BLANKS;
4276 GROW;
4277 }
4278 if ((cur != NULL) && (last != NULL)) {
4279 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004280 if (last != NULL)
4281 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004282 }
4283 ctxt->entity = ctxt->input;
4284 NEXT;
4285 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004286 if (ret != NULL)
4287 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004288 NEXT;
4289 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004290 if (ret != NULL)
4291 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004292 NEXT;
4293 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004294 if (ret != NULL)
4295 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004296 NEXT;
4297 }
4298 return(ret);
4299}
4300
4301/**
4302 * xmlParseElementContentDecl:
4303 * @ctxt: an XML parser context
4304 * @name: the name of the element being defined.
4305 * @result: the Element Content pointer will be stored here if any
4306 *
4307 * parse the declaration for an Element content either Mixed or Children,
4308 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4309 *
4310 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4311 *
4312 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4313 */
4314
4315int
4316xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4317 xmlElementContentPtr *result) {
4318
4319 xmlElementContentPtr tree = NULL;
4320 xmlParserInputPtr input = ctxt->input;
4321 int res;
4322
4323 *result = NULL;
4324
4325 if (RAW != '(') {
4326 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4328 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004329 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004330 ctxt->wellFormed = 0;
4331 ctxt->disableSAX = 1;
4332 return(-1);
4333 }
4334 NEXT;
4335 GROW;
4336 SKIP_BLANKS;
4337 if ((RAW == '#') && (NXT(1) == 'P') &&
4338 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4339 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4340 (NXT(6) == 'A')) {
4341 tree = xmlParseElementMixedContentDecl(ctxt);
4342 res = XML_ELEMENT_TYPE_MIXED;
4343 } else {
4344 tree = xmlParseElementChildrenContentDecl(ctxt);
4345 res = XML_ELEMENT_TYPE_ELEMENT;
4346 }
4347 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4348 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4350 ctxt->sax->error(ctxt->userData,
4351"Element content declaration doesn't start and stop in the same entity\n");
4352 ctxt->wellFormed = 0;
4353 ctxt->disableSAX = 1;
4354 }
4355 SKIP_BLANKS;
4356 *result = tree;
4357 return(res);
4358}
4359
4360/**
4361 * xmlParseElementDecl:
4362 * @ctxt: an XML parser context
4363 *
4364 * parse an Element declaration.
4365 *
4366 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4367 *
4368 * [ VC: Unique Element Type Declaration ]
4369 * No element type may be declared more than once
4370 *
4371 * Returns the type of the element, or -1 in case of error
4372 */
4373int
4374xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4375 xmlChar *name;
4376 int ret = -1;
4377 xmlElementContentPtr content = NULL;
4378
4379 GROW;
4380 if ((RAW == '<') && (NXT(1) == '!') &&
4381 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4382 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4383 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4384 (NXT(8) == 'T')) {
4385 xmlParserInputPtr input = ctxt->input;
4386
4387 SKIP(9);
4388 if (!IS_BLANK(CUR)) {
4389 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4390 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4391 ctxt->sax->error(ctxt->userData,
4392 "Space required after 'ELEMENT'\n");
4393 ctxt->wellFormed = 0;
4394 ctxt->disableSAX = 1;
4395 }
4396 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004397 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004398 if (name == NULL) {
4399 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4401 ctxt->sax->error(ctxt->userData,
4402 "xmlParseElementDecl: no name for Element\n");
4403 ctxt->wellFormed = 0;
4404 ctxt->disableSAX = 1;
4405 return(-1);
4406 }
4407 while ((RAW == 0) && (ctxt->inputNr > 1))
4408 xmlPopInput(ctxt);
4409 if (!IS_BLANK(CUR)) {
4410 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4412 ctxt->sax->error(ctxt->userData,
4413 "Space required after the element name\n");
4414 ctxt->wellFormed = 0;
4415 ctxt->disableSAX = 1;
4416 }
4417 SKIP_BLANKS;
4418 if ((RAW == 'E') && (NXT(1) == 'M') &&
4419 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4420 (NXT(4) == 'Y')) {
4421 SKIP(5);
4422 /*
4423 * Element must always be empty.
4424 */
4425 ret = XML_ELEMENT_TYPE_EMPTY;
4426 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4427 (NXT(2) == 'Y')) {
4428 SKIP(3);
4429 /*
4430 * Element is a generic container.
4431 */
4432 ret = XML_ELEMENT_TYPE_ANY;
4433 } else if (RAW == '(') {
4434 ret = xmlParseElementContentDecl(ctxt, name, &content);
4435 } else {
4436 /*
4437 * [ WFC: PEs in Internal Subset ] error handling.
4438 */
4439 if ((RAW == '%') && (ctxt->external == 0) &&
4440 (ctxt->inputNr == 1)) {
4441 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4443 ctxt->sax->error(ctxt->userData,
4444 "PEReference: forbidden within markup decl in internal subset\n");
4445 } else {
4446 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4448 ctxt->sax->error(ctxt->userData,
4449 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4450 }
4451 ctxt->wellFormed = 0;
4452 ctxt->disableSAX = 1;
4453 if (name != NULL) xmlFree(name);
4454 return(-1);
4455 }
4456
4457 SKIP_BLANKS;
4458 /*
4459 * Pop-up of finished entities.
4460 */
4461 while ((RAW == 0) && (ctxt->inputNr > 1))
4462 xmlPopInput(ctxt);
4463 SKIP_BLANKS;
4464
4465 if (RAW != '>') {
4466 ctxt->errNo = XML_ERR_GT_REQUIRED;
4467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4468 ctxt->sax->error(ctxt->userData,
4469 "xmlParseElementDecl: expected '>' at the end\n");
4470 ctxt->wellFormed = 0;
4471 ctxt->disableSAX = 1;
4472 } else {
4473 if (input != ctxt->input) {
4474 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4476 ctxt->sax->error(ctxt->userData,
4477"Element declaration doesn't start and stop in the same entity\n");
4478 ctxt->wellFormed = 0;
4479 ctxt->disableSAX = 1;
4480 }
4481
4482 NEXT;
4483 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4484 (ctxt->sax->elementDecl != NULL))
4485 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4486 content);
4487 }
4488 if (content != NULL) {
4489 xmlFreeElementContent(content);
4490 }
4491 if (name != NULL) {
4492 xmlFree(name);
4493 }
4494 }
4495 return(ret);
4496}
4497
4498/**
4499 * xmlParseMarkupDecl:
4500 * @ctxt: an XML parser context
4501 *
4502 * parse Markup declarations
4503 *
4504 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4505 * NotationDecl | PI | Comment
4506 *
4507 * [ VC: Proper Declaration/PE Nesting ]
4508 * Parameter-entity replacement text must be properly nested with
4509 * markup declarations. That is to say, if either the first character
4510 * or the last character of a markup declaration (markupdecl above) is
4511 * contained in the replacement text for a parameter-entity reference,
4512 * both must be contained in the same replacement text.
4513 *
4514 * [ WFC: PEs in Internal Subset ]
4515 * In the internal DTD subset, parameter-entity references can occur
4516 * only where markup declarations can occur, not within markup declarations.
4517 * (This does not apply to references that occur in external parameter
4518 * entities or to the external subset.)
4519 */
4520void
4521xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4522 GROW;
4523 xmlParseElementDecl(ctxt);
4524 xmlParseAttributeListDecl(ctxt);
4525 xmlParseEntityDecl(ctxt);
4526 xmlParseNotationDecl(ctxt);
4527 xmlParsePI(ctxt);
4528 xmlParseComment(ctxt);
4529 /*
4530 * This is only for internal subset. On external entities,
4531 * the replacement is done before parsing stage
4532 */
4533 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4534 xmlParsePEReference(ctxt);
4535 ctxt->instate = XML_PARSER_DTD;
4536}
4537
4538/**
4539 * xmlParseTextDecl:
4540 * @ctxt: an XML parser context
4541 *
4542 * parse an XML declaration header for external entities
4543 *
4544 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4545 *
4546 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4547 */
4548
4549void
4550xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4551 xmlChar *version;
4552
4553 /*
4554 * We know that '<?xml' is here.
4555 */
4556 if ((RAW == '<') && (NXT(1) == '?') &&
4557 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4558 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4559 SKIP(5);
4560 } else {
4561 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4562 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4563 ctxt->sax->error(ctxt->userData,
4564 "Text declaration '<?xml' required\n");
4565 ctxt->wellFormed = 0;
4566 ctxt->disableSAX = 1;
4567
4568 return;
4569 }
4570
4571 if (!IS_BLANK(CUR)) {
4572 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4574 ctxt->sax->error(ctxt->userData,
4575 "Space needed after '<?xml'\n");
4576 ctxt->wellFormed = 0;
4577 ctxt->disableSAX = 1;
4578 }
4579 SKIP_BLANKS;
4580
4581 /*
4582 * We may have the VersionInfo here.
4583 */
4584 version = xmlParseVersionInfo(ctxt);
4585 if (version == NULL)
4586 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4587 ctxt->input->version = version;
4588
4589 /*
4590 * We must have the encoding declaration
4591 */
4592 if (!IS_BLANK(CUR)) {
4593 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4595 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4596 ctxt->wellFormed = 0;
4597 ctxt->disableSAX = 1;
4598 }
4599 xmlParseEncodingDecl(ctxt);
4600 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4601 /*
4602 * The XML REC instructs us to stop parsing right here
4603 */
4604 return;
4605 }
4606
4607 SKIP_BLANKS;
4608 if ((RAW == '?') && (NXT(1) == '>')) {
4609 SKIP(2);
4610 } else if (RAW == '>') {
4611 /* Deprecated old WD ... */
4612 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4614 ctxt->sax->error(ctxt->userData,
4615 "XML declaration must end-up with '?>'\n");
4616 ctxt->wellFormed = 0;
4617 ctxt->disableSAX = 1;
4618 NEXT;
4619 } else {
4620 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4622 ctxt->sax->error(ctxt->userData,
4623 "parsing XML declaration: '?>' expected\n");
4624 ctxt->wellFormed = 0;
4625 ctxt->disableSAX = 1;
4626 MOVETO_ENDTAG(CUR_PTR);
4627 NEXT;
4628 }
4629}
4630
4631/*
4632 * xmlParseConditionalSections
4633 * @ctxt: an XML parser context
4634 *
4635 * [61] conditionalSect ::= includeSect | ignoreSect
4636 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4637 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4638 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4639 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4640 */
4641
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004642static void
Owen Taylor3473f882001-02-23 17:55:21 +00004643xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4644 SKIP(3);
4645 SKIP_BLANKS;
4646 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4647 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4648 (NXT(6) == 'E')) {
4649 SKIP(7);
4650 SKIP_BLANKS;
4651 if (RAW != '[') {
4652 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4654 ctxt->sax->error(ctxt->userData,
4655 "XML conditional section '[' expected\n");
4656 ctxt->wellFormed = 0;
4657 ctxt->disableSAX = 1;
4658 } else {
4659 NEXT;
4660 }
4661 if (xmlParserDebugEntities) {
4662 if ((ctxt->input != NULL) && (ctxt->input->filename))
4663 xmlGenericError(xmlGenericErrorContext,
4664 "%s(%d): ", ctxt->input->filename,
4665 ctxt->input->line);
4666 xmlGenericError(xmlGenericErrorContext,
4667 "Entering INCLUDE Conditional Section\n");
4668 }
4669
4670 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4671 (NXT(2) != '>'))) {
4672 const xmlChar *check = CUR_PTR;
4673 int cons = ctxt->input->consumed;
4674 int tok = ctxt->token;
4675
4676 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4677 xmlParseConditionalSections(ctxt);
4678 } else if (IS_BLANK(CUR)) {
4679 NEXT;
4680 } else if (RAW == '%') {
4681 xmlParsePEReference(ctxt);
4682 } else
4683 xmlParseMarkupDecl(ctxt);
4684
4685 /*
4686 * Pop-up of finished entities.
4687 */
4688 while ((RAW == 0) && (ctxt->inputNr > 1))
4689 xmlPopInput(ctxt);
4690
4691 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4692 (tok == ctxt->token)) {
4693 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4695 ctxt->sax->error(ctxt->userData,
4696 "Content error in the external subset\n");
4697 ctxt->wellFormed = 0;
4698 ctxt->disableSAX = 1;
4699 break;
4700 }
4701 }
4702 if (xmlParserDebugEntities) {
4703 if ((ctxt->input != NULL) && (ctxt->input->filename))
4704 xmlGenericError(xmlGenericErrorContext,
4705 "%s(%d): ", ctxt->input->filename,
4706 ctxt->input->line);
4707 xmlGenericError(xmlGenericErrorContext,
4708 "Leaving INCLUDE Conditional Section\n");
4709 }
4710
4711 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4712 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4713 int state;
4714 int instate;
4715 int depth = 0;
4716
4717 SKIP(6);
4718 SKIP_BLANKS;
4719 if (RAW != '[') {
4720 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4722 ctxt->sax->error(ctxt->userData,
4723 "XML conditional section '[' expected\n");
4724 ctxt->wellFormed = 0;
4725 ctxt->disableSAX = 1;
4726 } else {
4727 NEXT;
4728 }
4729 if (xmlParserDebugEntities) {
4730 if ((ctxt->input != NULL) && (ctxt->input->filename))
4731 xmlGenericError(xmlGenericErrorContext,
4732 "%s(%d): ", ctxt->input->filename,
4733 ctxt->input->line);
4734 xmlGenericError(xmlGenericErrorContext,
4735 "Entering IGNORE Conditional Section\n");
4736 }
4737
4738 /*
4739 * Parse up to the end of the conditionnal section
4740 * But disable SAX event generating DTD building in the meantime
4741 */
4742 state = ctxt->disableSAX;
4743 instate = ctxt->instate;
4744 ctxt->disableSAX = 1;
4745 ctxt->instate = XML_PARSER_IGNORE;
4746
4747 while (depth >= 0) {
4748 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4749 depth++;
4750 SKIP(3);
4751 continue;
4752 }
4753 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4754 if (--depth >= 0) SKIP(3);
4755 continue;
4756 }
4757 NEXT;
4758 continue;
4759 }
4760
4761 ctxt->disableSAX = state;
4762 ctxt->instate = instate;
4763
4764 if (xmlParserDebugEntities) {
4765 if ((ctxt->input != NULL) && (ctxt->input->filename))
4766 xmlGenericError(xmlGenericErrorContext,
4767 "%s(%d): ", ctxt->input->filename,
4768 ctxt->input->line);
4769 xmlGenericError(xmlGenericErrorContext,
4770 "Leaving IGNORE Conditional Section\n");
4771 }
4772
4773 } else {
4774 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4776 ctxt->sax->error(ctxt->userData,
4777 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4778 ctxt->wellFormed = 0;
4779 ctxt->disableSAX = 1;
4780 }
4781
4782 if (RAW == 0)
4783 SHRINK;
4784
4785 if (RAW == 0) {
4786 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4788 ctxt->sax->error(ctxt->userData,
4789 "XML conditional section not closed\n");
4790 ctxt->wellFormed = 0;
4791 ctxt->disableSAX = 1;
4792 } else {
4793 SKIP(3);
4794 }
4795}
4796
4797/**
4798 * xmlParseExternalSubset:
4799 * @ctxt: an XML parser context
4800 * @ExternalID: the external identifier
4801 * @SystemID: the system identifier (or URL)
4802 *
4803 * parse Markup declarations from an external subset
4804 *
4805 * [30] extSubset ::= textDecl? extSubsetDecl
4806 *
4807 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4808 */
4809void
4810xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4811 const xmlChar *SystemID) {
4812 GROW;
4813 if ((RAW == '<') && (NXT(1) == '?') &&
4814 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4815 (NXT(4) == 'l')) {
4816 xmlParseTextDecl(ctxt);
4817 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4818 /*
4819 * The XML REC instructs us to stop parsing right here
4820 */
4821 ctxt->instate = XML_PARSER_EOF;
4822 return;
4823 }
4824 }
4825 if (ctxt->myDoc == NULL) {
4826 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4827 }
4828 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4829 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4830
4831 ctxt->instate = XML_PARSER_DTD;
4832 ctxt->external = 1;
4833 while (((RAW == '<') && (NXT(1) == '?')) ||
4834 ((RAW == '<') && (NXT(1) == '!')) ||
4835 IS_BLANK(CUR)) {
4836 const xmlChar *check = CUR_PTR;
4837 int cons = ctxt->input->consumed;
4838 int tok = ctxt->token;
4839
4840 GROW;
4841 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4842 xmlParseConditionalSections(ctxt);
4843 } else if (IS_BLANK(CUR)) {
4844 NEXT;
4845 } else if (RAW == '%') {
4846 xmlParsePEReference(ctxt);
4847 } else
4848 xmlParseMarkupDecl(ctxt);
4849
4850 /*
4851 * Pop-up of finished entities.
4852 */
4853 while ((RAW == 0) && (ctxt->inputNr > 1))
4854 xmlPopInput(ctxt);
4855
4856 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4857 (tok == ctxt->token)) {
4858 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4860 ctxt->sax->error(ctxt->userData,
4861 "Content error in the external subset\n");
4862 ctxt->wellFormed = 0;
4863 ctxt->disableSAX = 1;
4864 break;
4865 }
4866 }
4867
4868 if (RAW != 0) {
4869 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4871 ctxt->sax->error(ctxt->userData,
4872 "Extra content at the end of the document\n");
4873 ctxt->wellFormed = 0;
4874 ctxt->disableSAX = 1;
4875 }
4876
4877}
4878
4879/**
4880 * xmlParseReference:
4881 * @ctxt: an XML parser context
4882 *
4883 * parse and handle entity references in content, depending on the SAX
4884 * interface, this may end-up in a call to character() if this is a
4885 * CharRef, a predefined entity, if there is no reference() callback.
4886 * or if the parser was asked to switch to that mode.
4887 *
4888 * [67] Reference ::= EntityRef | CharRef
4889 */
4890void
4891xmlParseReference(xmlParserCtxtPtr ctxt) {
4892 xmlEntityPtr ent;
4893 xmlChar *val;
4894 if (RAW != '&') return;
4895
4896 if (NXT(1) == '#') {
4897 int i = 0;
4898 xmlChar out[10];
4899 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004900 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004901
4902 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4903 /*
4904 * So we are using non-UTF-8 buffers
4905 * Check that the char fit on 8bits, if not
4906 * generate a CharRef.
4907 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004908 if (value <= 0xFF) {
4909 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004910 out[1] = 0;
4911 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4912 (!ctxt->disableSAX))
4913 ctxt->sax->characters(ctxt->userData, out, 1);
4914 } else {
4915 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004916 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004917 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004918 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004919 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4920 (!ctxt->disableSAX))
4921 ctxt->sax->reference(ctxt->userData, out);
4922 }
4923 } else {
4924 /*
4925 * Just encode the value in UTF-8
4926 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004927 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00004928 out[i] = 0;
4929 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4930 (!ctxt->disableSAX))
4931 ctxt->sax->characters(ctxt->userData, out, i);
4932 }
4933 } else {
4934 ent = xmlParseEntityRef(ctxt);
4935 if (ent == NULL) return;
4936 if ((ent->name != NULL) &&
4937 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4938 xmlNodePtr list = NULL;
4939 int ret;
4940
4941
4942 /*
4943 * The first reference to the entity trigger a parsing phase
4944 * where the ent->children is filled with the result from
4945 * the parsing.
4946 */
4947 if (ent->children == NULL) {
4948 xmlChar *value;
4949 value = ent->content;
4950
4951 /*
4952 * Check that this entity is well formed
4953 */
4954 if ((value != NULL) &&
4955 (value[1] == 0) && (value[0] == '<') &&
4956 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4957 /*
4958 * DONE: get definite answer on this !!!
4959 * Lots of entity decls are used to declare a single
4960 * char
4961 * <!ENTITY lt "<">
4962 * Which seems to be valid since
4963 * 2.4: The ampersand character (&) and the left angle
4964 * bracket (<) may appear in their literal form only
4965 * when used ... They are also legal within the literal
4966 * entity value of an internal entity declaration;i
4967 * see "4.3.2 Well-Formed Parsed Entities".
4968 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4969 * Looking at the OASIS test suite and James Clark
4970 * tests, this is broken. However the XML REC uses
4971 * it. Is the XML REC not well-formed ????
4972 * This is a hack to avoid this problem
4973 *
4974 * ANSWER: since lt gt amp .. are already defined,
4975 * this is a redefinition and hence the fact that the
4976 * contentis not well balanced is not a Wf error, this
4977 * is lousy but acceptable.
4978 */
4979 list = xmlNewDocText(ctxt->myDoc, value);
4980 if (list != NULL) {
4981 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4982 (ent->children == NULL)) {
4983 ent->children = list;
4984 ent->last = list;
4985 list->parent = (xmlNodePtr) ent;
4986 } else {
4987 xmlFreeNodeList(list);
4988 }
4989 } else if (list != NULL) {
4990 xmlFreeNodeList(list);
4991 }
4992 } else {
4993 /*
4994 * 4.3.2: An internal general parsed entity is well-formed
4995 * if its replacement text matches the production labeled
4996 * content.
4997 */
4998 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4999 ctxt->depth++;
5000 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5001 ctxt->sax, NULL, ctxt->depth,
5002 value, &list);
5003 ctxt->depth--;
5004 } else if (ent->etype ==
5005 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5006 ctxt->depth++;
Daniel Veillard257d9102001-05-08 10:41:44 +00005007 ret = xmlParseExternalEntityPrivate(ctxt->myDoc,
Owen Taylor3473f882001-02-23 17:55:21 +00005008 ctxt->sax, NULL, ctxt->depth,
Daniel Veillard257d9102001-05-08 10:41:44 +00005009 ent->URI, ent->ExternalID, &list,
5010 ctxt->_private);
Owen Taylor3473f882001-02-23 17:55:21 +00005011 ctxt->depth--;
5012 } else {
5013 ret = -1;
5014 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5015 ctxt->sax->error(ctxt->userData,
5016 "Internal: invalid entity type\n");
5017 }
5018 if (ret == XML_ERR_ENTITY_LOOP) {
5019 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5021 ctxt->sax->error(ctxt->userData,
5022 "Detected entity reference loop\n");
5023 ctxt->wellFormed = 0;
5024 ctxt->disableSAX = 1;
5025 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005026 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5027 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005028 (ent->children == NULL)) {
5029 ent->children = list;
5030 while (list != NULL) {
5031 list->parent = (xmlNodePtr) ent;
5032 if (list->next == NULL)
5033 ent->last = list;
5034 list = list->next;
5035 }
5036 } else {
5037 xmlFreeNodeList(list);
5038 }
5039 } else if (ret > 0) {
5040 ctxt->errNo = ret;
5041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5042 ctxt->sax->error(ctxt->userData,
5043 "Entity value required\n");
5044 ctxt->wellFormed = 0;
5045 ctxt->disableSAX = 1;
5046 } else if (list != NULL) {
5047 xmlFreeNodeList(list);
5048 }
5049 }
5050 }
5051 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5052 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5053 /*
5054 * Create a node.
5055 */
5056 ctxt->sax->reference(ctxt->userData, ent->name);
5057 return;
5058 } else if (ctxt->replaceEntities) {
5059 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5060 /*
5061 * Seems we are generating the DOM content, do
5062 * a simple tree copy
5063 */
5064 xmlNodePtr new;
5065 new = xmlCopyNodeList(ent->children);
5066
5067 xmlAddChildList(ctxt->node, new);
5068 /*
5069 * This is to avoid a nasty side effect, see
5070 * characters() in SAX.c
5071 */
5072 ctxt->nodemem = 0;
5073 ctxt->nodelen = 0;
5074 return;
5075 } else {
5076 /*
5077 * Probably running in SAX mode
5078 */
5079 xmlParserInputPtr input;
5080
5081 input = xmlNewEntityInputStream(ctxt, ent);
5082 xmlPushInput(ctxt, input);
5083 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5084 (RAW == '<') && (NXT(1) == '?') &&
5085 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5086 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5087 xmlParseTextDecl(ctxt);
5088 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5089 /*
5090 * The XML REC instructs us to stop parsing right here
5091 */
5092 ctxt->instate = XML_PARSER_EOF;
5093 return;
5094 }
5095 if (input->standalone == 1) {
5096 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5098 ctxt->sax->error(ctxt->userData,
5099 "external parsed entities cannot be standalone\n");
5100 ctxt->wellFormed = 0;
5101 ctxt->disableSAX = 1;
5102 }
5103 }
5104 return;
5105 }
5106 }
5107 } else {
5108 val = ent->content;
5109 if (val == NULL) return;
5110 /*
5111 * inline the entity.
5112 */
5113 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5114 (!ctxt->disableSAX))
5115 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5116 }
5117 }
5118}
5119
5120/**
5121 * xmlParseEntityRef:
5122 * @ctxt: an XML parser context
5123 *
5124 * parse ENTITY references declarations
5125 *
5126 * [68] EntityRef ::= '&' Name ';'
5127 *
5128 * [ WFC: Entity Declared ]
5129 * In a document without any DTD, a document with only an internal DTD
5130 * subset which contains no parameter entity references, or a document
5131 * with "standalone='yes'", the Name given in the entity reference
5132 * must match that in an entity declaration, except that well-formed
5133 * documents need not declare any of the following entities: amp, lt,
5134 * gt, apos, quot. The declaration of a parameter entity must precede
5135 * any reference to it. Similarly, the declaration of a general entity
5136 * must precede any reference to it which appears in a default value in an
5137 * attribute-list declaration. Note that if entities are declared in the
5138 * external subset or in external parameter entities, a non-validating
5139 * processor is not obligated to read and process their declarations;
5140 * for such documents, the rule that an entity must be declared is a
5141 * well-formedness constraint only if standalone='yes'.
5142 *
5143 * [ WFC: Parsed Entity ]
5144 * An entity reference must not contain the name of an unparsed entity
5145 *
5146 * Returns the xmlEntityPtr if found, or NULL otherwise.
5147 */
5148xmlEntityPtr
5149xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5150 xmlChar *name;
5151 xmlEntityPtr ent = NULL;
5152
5153 GROW;
5154
5155 if (RAW == '&') {
5156 NEXT;
5157 name = xmlParseName(ctxt);
5158 if (name == NULL) {
5159 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5161 ctxt->sax->error(ctxt->userData,
5162 "xmlParseEntityRef: no name\n");
5163 ctxt->wellFormed = 0;
5164 ctxt->disableSAX = 1;
5165 } else {
5166 if (RAW == ';') {
5167 NEXT;
5168 /*
5169 * Ask first SAX for entity resolution, otherwise try the
5170 * predefined set.
5171 */
5172 if (ctxt->sax != NULL) {
5173 if (ctxt->sax->getEntity != NULL)
5174 ent = ctxt->sax->getEntity(ctxt->userData, name);
5175 if (ent == NULL)
5176 ent = xmlGetPredefinedEntity(name);
5177 }
5178 /*
5179 * [ WFC: Entity Declared ]
5180 * In a document without any DTD, a document with only an
5181 * internal DTD subset which contains no parameter entity
5182 * references, or a document with "standalone='yes'", the
5183 * Name given in the entity reference must match that in an
5184 * entity declaration, except that well-formed documents
5185 * need not declare any of the following entities: amp, lt,
5186 * gt, apos, quot.
5187 * The declaration of a parameter entity must precede any
5188 * reference to it.
5189 * Similarly, the declaration of a general entity must
5190 * precede any reference to it which appears in a default
5191 * value in an attribute-list declaration. Note that if
5192 * entities are declared in the external subset or in
5193 * external parameter entities, a non-validating processor
5194 * is not obligated to read and process their declarations;
5195 * for such documents, the rule that an entity must be
5196 * declared is a well-formedness constraint only if
5197 * standalone='yes'.
5198 */
5199 if (ent == NULL) {
5200 if ((ctxt->standalone == 1) ||
5201 ((ctxt->hasExternalSubset == 0) &&
5202 (ctxt->hasPErefs == 0))) {
5203 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5205 ctxt->sax->error(ctxt->userData,
5206 "Entity '%s' not defined\n", name);
5207 ctxt->wellFormed = 0;
5208 ctxt->disableSAX = 1;
5209 } else {
5210 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5211 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5212 ctxt->sax->warning(ctxt->userData,
5213 "Entity '%s' not defined\n", name);
5214 }
5215 }
5216
5217 /*
5218 * [ WFC: Parsed Entity ]
5219 * An entity reference must not contain the name of an
5220 * unparsed entity
5221 */
5222 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5223 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5225 ctxt->sax->error(ctxt->userData,
5226 "Entity reference to unparsed entity %s\n", name);
5227 ctxt->wellFormed = 0;
5228 ctxt->disableSAX = 1;
5229 }
5230
5231 /*
5232 * [ WFC: No External Entity References ]
5233 * Attribute values cannot contain direct or indirect
5234 * entity references to external entities.
5235 */
5236 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5237 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5238 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5239 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5240 ctxt->sax->error(ctxt->userData,
5241 "Attribute references external entity '%s'\n", name);
5242 ctxt->wellFormed = 0;
5243 ctxt->disableSAX = 1;
5244 }
5245 /*
5246 * [ WFC: No < in Attribute Values ]
5247 * The replacement text of any entity referred to directly or
5248 * indirectly in an attribute value (other than "&lt;") must
5249 * not contain a <.
5250 */
5251 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5252 (ent != NULL) &&
5253 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5254 (ent->content != NULL) &&
5255 (xmlStrchr(ent->content, '<'))) {
5256 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5257 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5258 ctxt->sax->error(ctxt->userData,
5259 "'<' in entity '%s' is not allowed in attributes values\n", name);
5260 ctxt->wellFormed = 0;
5261 ctxt->disableSAX = 1;
5262 }
5263
5264 /*
5265 * Internal check, no parameter entities here ...
5266 */
5267 else {
5268 switch (ent->etype) {
5269 case XML_INTERNAL_PARAMETER_ENTITY:
5270 case XML_EXTERNAL_PARAMETER_ENTITY:
5271 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5273 ctxt->sax->error(ctxt->userData,
5274 "Attempt to reference the parameter entity '%s'\n", name);
5275 ctxt->wellFormed = 0;
5276 ctxt->disableSAX = 1;
5277 break;
5278 default:
5279 break;
5280 }
5281 }
5282
5283 /*
5284 * [ WFC: No Recursion ]
5285 * A parsed entity must not contain a recursive reference
5286 * to itself, either directly or indirectly.
5287 * Done somewhere else
5288 */
5289
5290 } else {
5291 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5292 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5293 ctxt->sax->error(ctxt->userData,
5294 "xmlParseEntityRef: expecting ';'\n");
5295 ctxt->wellFormed = 0;
5296 ctxt->disableSAX = 1;
5297 }
5298 xmlFree(name);
5299 }
5300 }
5301 return(ent);
5302}
5303
5304/**
5305 * xmlParseStringEntityRef:
5306 * @ctxt: an XML parser context
5307 * @str: a pointer to an index in the string
5308 *
5309 * parse ENTITY references declarations, but this version parses it from
5310 * a string value.
5311 *
5312 * [68] EntityRef ::= '&' Name ';'
5313 *
5314 * [ WFC: Entity Declared ]
5315 * In a document without any DTD, a document with only an internal DTD
5316 * subset which contains no parameter entity references, or a document
5317 * with "standalone='yes'", the Name given in the entity reference
5318 * must match that in an entity declaration, except that well-formed
5319 * documents need not declare any of the following entities: amp, lt,
5320 * gt, apos, quot. The declaration of a parameter entity must precede
5321 * any reference to it. Similarly, the declaration of a general entity
5322 * must precede any reference to it which appears in a default value in an
5323 * attribute-list declaration. Note that if entities are declared in the
5324 * external subset or in external parameter entities, a non-validating
5325 * processor is not obligated to read and process their declarations;
5326 * for such documents, the rule that an entity must be declared is a
5327 * well-formedness constraint only if standalone='yes'.
5328 *
5329 * [ WFC: Parsed Entity ]
5330 * An entity reference must not contain the name of an unparsed entity
5331 *
5332 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5333 * is updated to the current location in the string.
5334 */
5335xmlEntityPtr
5336xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5337 xmlChar *name;
5338 const xmlChar *ptr;
5339 xmlChar cur;
5340 xmlEntityPtr ent = NULL;
5341
5342 if ((str == NULL) || (*str == NULL))
5343 return(NULL);
5344 ptr = *str;
5345 cur = *ptr;
5346 if (cur == '&') {
5347 ptr++;
5348 cur = *ptr;
5349 name = xmlParseStringName(ctxt, &ptr);
5350 if (name == NULL) {
5351 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5353 ctxt->sax->error(ctxt->userData,
5354 "xmlParseEntityRef: no name\n");
5355 ctxt->wellFormed = 0;
5356 ctxt->disableSAX = 1;
5357 } else {
5358 if (*ptr == ';') {
5359 ptr++;
5360 /*
5361 * Ask first SAX for entity resolution, otherwise try the
5362 * predefined set.
5363 */
5364 if (ctxt->sax != NULL) {
5365 if (ctxt->sax->getEntity != NULL)
5366 ent = ctxt->sax->getEntity(ctxt->userData, name);
5367 if (ent == NULL)
5368 ent = xmlGetPredefinedEntity(name);
5369 }
5370 /*
5371 * [ WFC: Entity Declared ]
5372 * In a document without any DTD, a document with only an
5373 * internal DTD subset which contains no parameter entity
5374 * references, or a document with "standalone='yes'", the
5375 * Name given in the entity reference must match that in an
5376 * entity declaration, except that well-formed documents
5377 * need not declare any of the following entities: amp, lt,
5378 * gt, apos, quot.
5379 * The declaration of a parameter entity must precede any
5380 * reference to it.
5381 * Similarly, the declaration of a general entity must
5382 * precede any reference to it which appears in a default
5383 * value in an attribute-list declaration. Note that if
5384 * entities are declared in the external subset or in
5385 * external parameter entities, a non-validating processor
5386 * is not obligated to read and process their declarations;
5387 * for such documents, the rule that an entity must be
5388 * declared is a well-formedness constraint only if
5389 * standalone='yes'.
5390 */
5391 if (ent == NULL) {
5392 if ((ctxt->standalone == 1) ||
5393 ((ctxt->hasExternalSubset == 0) &&
5394 (ctxt->hasPErefs == 0))) {
5395 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5397 ctxt->sax->error(ctxt->userData,
5398 "Entity '%s' not defined\n", name);
5399 ctxt->wellFormed = 0;
5400 ctxt->disableSAX = 1;
5401 } else {
5402 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5403 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5404 ctxt->sax->warning(ctxt->userData,
5405 "Entity '%s' not defined\n", name);
5406 }
5407 }
5408
5409 /*
5410 * [ WFC: Parsed Entity ]
5411 * An entity reference must not contain the name of an
5412 * unparsed entity
5413 */
5414 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5415 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5417 ctxt->sax->error(ctxt->userData,
5418 "Entity reference to unparsed entity %s\n", name);
5419 ctxt->wellFormed = 0;
5420 ctxt->disableSAX = 1;
5421 }
5422
5423 /*
5424 * [ WFC: No External Entity References ]
5425 * Attribute values cannot contain direct or indirect
5426 * entity references to external entities.
5427 */
5428 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5429 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5430 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5431 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5432 ctxt->sax->error(ctxt->userData,
5433 "Attribute references external entity '%s'\n", name);
5434 ctxt->wellFormed = 0;
5435 ctxt->disableSAX = 1;
5436 }
5437 /*
5438 * [ WFC: No < in Attribute Values ]
5439 * The replacement text of any entity referred to directly or
5440 * indirectly in an attribute value (other than "&lt;") must
5441 * not contain a <.
5442 */
5443 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5444 (ent != NULL) &&
5445 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5446 (ent->content != NULL) &&
5447 (xmlStrchr(ent->content, '<'))) {
5448 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5450 ctxt->sax->error(ctxt->userData,
5451 "'<' in entity '%s' is not allowed in attributes values\n", name);
5452 ctxt->wellFormed = 0;
5453 ctxt->disableSAX = 1;
5454 }
5455
5456 /*
5457 * Internal check, no parameter entities here ...
5458 */
5459 else {
5460 switch (ent->etype) {
5461 case XML_INTERNAL_PARAMETER_ENTITY:
5462 case XML_EXTERNAL_PARAMETER_ENTITY:
5463 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5464 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5465 ctxt->sax->error(ctxt->userData,
5466 "Attempt to reference the parameter entity '%s'\n", name);
5467 ctxt->wellFormed = 0;
5468 ctxt->disableSAX = 1;
5469 break;
5470 default:
5471 break;
5472 }
5473 }
5474
5475 /*
5476 * [ WFC: No Recursion ]
5477 * A parsed entity must not contain a recursive reference
5478 * to itself, either directly or indirectly.
5479 * Done somewhwere else
5480 */
5481
5482 } else {
5483 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5485 ctxt->sax->error(ctxt->userData,
5486 "xmlParseEntityRef: expecting ';'\n");
5487 ctxt->wellFormed = 0;
5488 ctxt->disableSAX = 1;
5489 }
5490 xmlFree(name);
5491 }
5492 }
5493 *str = ptr;
5494 return(ent);
5495}
5496
5497/**
5498 * xmlParsePEReference:
5499 * @ctxt: an XML parser context
5500 *
5501 * parse PEReference declarations
5502 * The entity content is handled directly by pushing it's content as
5503 * a new input stream.
5504 *
5505 * [69] PEReference ::= '%' Name ';'
5506 *
5507 * [ WFC: No Recursion ]
5508 * A parsed entity must not contain a recursive
5509 * reference to itself, either directly or indirectly.
5510 *
5511 * [ WFC: Entity Declared ]
5512 * In a document without any DTD, a document with only an internal DTD
5513 * subset which contains no parameter entity references, or a document
5514 * with "standalone='yes'", ... ... The declaration of a parameter
5515 * entity must precede any reference to it...
5516 *
5517 * [ VC: Entity Declared ]
5518 * In a document with an external subset or external parameter entities
5519 * with "standalone='no'", ... ... The declaration of a parameter entity
5520 * must precede any reference to it...
5521 *
5522 * [ WFC: In DTD ]
5523 * Parameter-entity references may only appear in the DTD.
5524 * NOTE: misleading but this is handled.
5525 */
5526void
5527xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5528 xmlChar *name;
5529 xmlEntityPtr entity = NULL;
5530 xmlParserInputPtr input;
5531
5532 if (RAW == '%') {
5533 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005534 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005535 if (name == NULL) {
5536 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5538 ctxt->sax->error(ctxt->userData,
5539 "xmlParsePEReference: no name\n");
5540 ctxt->wellFormed = 0;
5541 ctxt->disableSAX = 1;
5542 } else {
5543 if (RAW == ';') {
5544 NEXT;
5545 if ((ctxt->sax != NULL) &&
5546 (ctxt->sax->getParameterEntity != NULL))
5547 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5548 name);
5549 if (entity == NULL) {
5550 /*
5551 * [ WFC: Entity Declared ]
5552 * In a document without any DTD, a document with only an
5553 * internal DTD subset which contains no parameter entity
5554 * references, or a document with "standalone='yes'", ...
5555 * ... The declaration of a parameter entity must precede
5556 * any reference to it...
5557 */
5558 if ((ctxt->standalone == 1) ||
5559 ((ctxt->hasExternalSubset == 0) &&
5560 (ctxt->hasPErefs == 0))) {
5561 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5562 if ((!ctxt->disableSAX) &&
5563 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5564 ctxt->sax->error(ctxt->userData,
5565 "PEReference: %%%s; not found\n", name);
5566 ctxt->wellFormed = 0;
5567 ctxt->disableSAX = 1;
5568 } else {
5569 /*
5570 * [ VC: Entity Declared ]
5571 * In a document with an external subset or external
5572 * parameter entities with "standalone='no'", ...
5573 * ... The declaration of a parameter entity must precede
5574 * any reference to it...
5575 */
5576 if ((!ctxt->disableSAX) &&
5577 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5578 ctxt->sax->warning(ctxt->userData,
5579 "PEReference: %%%s; not found\n", name);
5580 ctxt->valid = 0;
5581 }
5582 } else {
5583 /*
5584 * Internal checking in case the entity quest barfed
5585 */
5586 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5587 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5588 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5589 ctxt->sax->warning(ctxt->userData,
5590 "Internal: %%%s; is not a parameter entity\n", name);
5591 } else {
5592 /*
5593 * TODO !!!
5594 * handle the extra spaces added before and after
5595 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5596 */
5597 input = xmlNewEntityInputStream(ctxt, entity);
5598 xmlPushInput(ctxt, input);
5599 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5600 (RAW == '<') && (NXT(1) == '?') &&
5601 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5602 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5603 xmlParseTextDecl(ctxt);
5604 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5605 /*
5606 * The XML REC instructs us to stop parsing
5607 * right here
5608 */
5609 ctxt->instate = XML_PARSER_EOF;
5610 xmlFree(name);
5611 return;
5612 }
5613 }
5614 if (ctxt->token == 0)
5615 ctxt->token = ' ';
5616 }
5617 }
5618 ctxt->hasPErefs = 1;
5619 } else {
5620 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5621 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5622 ctxt->sax->error(ctxt->userData,
5623 "xmlParsePEReference: expecting ';'\n");
5624 ctxt->wellFormed = 0;
5625 ctxt->disableSAX = 1;
5626 }
5627 xmlFree(name);
5628 }
5629 }
5630}
5631
5632/**
5633 * xmlParseStringPEReference:
5634 * @ctxt: an XML parser context
5635 * @str: a pointer to an index in the string
5636 *
5637 * parse PEReference declarations
5638 *
5639 * [69] PEReference ::= '%' Name ';'
5640 *
5641 * [ WFC: No Recursion ]
5642 * A parsed entity must not contain a recursive
5643 * reference to itself, either directly or indirectly.
5644 *
5645 * [ WFC: Entity Declared ]
5646 * In a document without any DTD, a document with only an internal DTD
5647 * subset which contains no parameter entity references, or a document
5648 * with "standalone='yes'", ... ... The declaration of a parameter
5649 * entity must precede any reference to it...
5650 *
5651 * [ VC: Entity Declared ]
5652 * In a document with an external subset or external parameter entities
5653 * with "standalone='no'", ... ... The declaration of a parameter entity
5654 * must precede any reference to it...
5655 *
5656 * [ WFC: In DTD ]
5657 * Parameter-entity references may only appear in the DTD.
5658 * NOTE: misleading but this is handled.
5659 *
5660 * Returns the string of the entity content.
5661 * str is updated to the current value of the index
5662 */
5663xmlEntityPtr
5664xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5665 const xmlChar *ptr;
5666 xmlChar cur;
5667 xmlChar *name;
5668 xmlEntityPtr entity = NULL;
5669
5670 if ((str == NULL) || (*str == NULL)) return(NULL);
5671 ptr = *str;
5672 cur = *ptr;
5673 if (cur == '%') {
5674 ptr++;
5675 cur = *ptr;
5676 name = xmlParseStringName(ctxt, &ptr);
5677 if (name == NULL) {
5678 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5680 ctxt->sax->error(ctxt->userData,
5681 "xmlParseStringPEReference: no name\n");
5682 ctxt->wellFormed = 0;
5683 ctxt->disableSAX = 1;
5684 } else {
5685 cur = *ptr;
5686 if (cur == ';') {
5687 ptr++;
5688 cur = *ptr;
5689 if ((ctxt->sax != NULL) &&
5690 (ctxt->sax->getParameterEntity != NULL))
5691 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5692 name);
5693 if (entity == NULL) {
5694 /*
5695 * [ WFC: Entity Declared ]
5696 * In a document without any DTD, a document with only an
5697 * internal DTD subset which contains no parameter entity
5698 * references, or a document with "standalone='yes'", ...
5699 * ... The declaration of a parameter entity must precede
5700 * any reference to it...
5701 */
5702 if ((ctxt->standalone == 1) ||
5703 ((ctxt->hasExternalSubset == 0) &&
5704 (ctxt->hasPErefs == 0))) {
5705 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5707 ctxt->sax->error(ctxt->userData,
5708 "PEReference: %%%s; not found\n", name);
5709 ctxt->wellFormed = 0;
5710 ctxt->disableSAX = 1;
5711 } else {
5712 /*
5713 * [ VC: Entity Declared ]
5714 * In a document with an external subset or external
5715 * parameter entities with "standalone='no'", ...
5716 * ... The declaration of a parameter entity must
5717 * precede any reference to it...
5718 */
5719 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5720 ctxt->sax->warning(ctxt->userData,
5721 "PEReference: %%%s; not found\n", name);
5722 ctxt->valid = 0;
5723 }
5724 } else {
5725 /*
5726 * Internal checking in case the entity quest barfed
5727 */
5728 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5729 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5730 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5731 ctxt->sax->warning(ctxt->userData,
5732 "Internal: %%%s; is not a parameter entity\n", name);
5733 }
5734 }
5735 ctxt->hasPErefs = 1;
5736 } else {
5737 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5739 ctxt->sax->error(ctxt->userData,
5740 "xmlParseStringPEReference: expecting ';'\n");
5741 ctxt->wellFormed = 0;
5742 ctxt->disableSAX = 1;
5743 }
5744 xmlFree(name);
5745 }
5746 }
5747 *str = ptr;
5748 return(entity);
5749}
5750
5751/**
5752 * xmlParseDocTypeDecl:
5753 * @ctxt: an XML parser context
5754 *
5755 * parse a DOCTYPE declaration
5756 *
5757 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5758 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5759 *
5760 * [ VC: Root Element Type ]
5761 * The Name in the document type declaration must match the element
5762 * type of the root element.
5763 */
5764
5765void
5766xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5767 xmlChar *name = NULL;
5768 xmlChar *ExternalID = NULL;
5769 xmlChar *URI = NULL;
5770
5771 /*
5772 * We know that '<!DOCTYPE' has been detected.
5773 */
5774 SKIP(9);
5775
5776 SKIP_BLANKS;
5777
5778 /*
5779 * Parse the DOCTYPE name.
5780 */
5781 name = xmlParseName(ctxt);
5782 if (name == NULL) {
5783 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5785 ctxt->sax->error(ctxt->userData,
5786 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5787 ctxt->wellFormed = 0;
5788 ctxt->disableSAX = 1;
5789 }
5790 ctxt->intSubName = name;
5791
5792 SKIP_BLANKS;
5793
5794 /*
5795 * Check for SystemID and ExternalID
5796 */
5797 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5798
5799 if ((URI != NULL) || (ExternalID != NULL)) {
5800 ctxt->hasExternalSubset = 1;
5801 }
5802 ctxt->extSubURI = URI;
5803 ctxt->extSubSystem = ExternalID;
5804
5805 SKIP_BLANKS;
5806
5807 /*
5808 * Create and update the internal subset.
5809 */
5810 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5811 (!ctxt->disableSAX))
5812 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5813
5814 /*
5815 * Is there any internal subset declarations ?
5816 * they are handled separately in xmlParseInternalSubset()
5817 */
5818 if (RAW == '[')
5819 return;
5820
5821 /*
5822 * We should be at the end of the DOCTYPE declaration.
5823 */
5824 if (RAW != '>') {
5825 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5827 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5828 ctxt->wellFormed = 0;
5829 ctxt->disableSAX = 1;
5830 }
5831 NEXT;
5832}
5833
5834/**
5835 * xmlParseInternalsubset:
5836 * @ctxt: an XML parser context
5837 *
5838 * parse the internal subset declaration
5839 *
5840 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5841 */
5842
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005843static void
Owen Taylor3473f882001-02-23 17:55:21 +00005844xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5845 /*
5846 * Is there any DTD definition ?
5847 */
5848 if (RAW == '[') {
5849 ctxt->instate = XML_PARSER_DTD;
5850 NEXT;
5851 /*
5852 * Parse the succession of Markup declarations and
5853 * PEReferences.
5854 * Subsequence (markupdecl | PEReference | S)*
5855 */
5856 while (RAW != ']') {
5857 const xmlChar *check = CUR_PTR;
5858 int cons = ctxt->input->consumed;
5859
5860 SKIP_BLANKS;
5861 xmlParseMarkupDecl(ctxt);
5862 xmlParsePEReference(ctxt);
5863
5864 /*
5865 * Pop-up of finished entities.
5866 */
5867 while ((RAW == 0) && (ctxt->inputNr > 1))
5868 xmlPopInput(ctxt);
5869
5870 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5871 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5873 ctxt->sax->error(ctxt->userData,
5874 "xmlParseInternalSubset: error detected in Markup declaration\n");
5875 ctxt->wellFormed = 0;
5876 ctxt->disableSAX = 1;
5877 break;
5878 }
5879 }
5880 if (RAW == ']') {
5881 NEXT;
5882 SKIP_BLANKS;
5883 }
5884 }
5885
5886 /*
5887 * We should be at the end of the DOCTYPE declaration.
5888 */
5889 if (RAW != '>') {
5890 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5892 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5893 ctxt->wellFormed = 0;
5894 ctxt->disableSAX = 1;
5895 }
5896 NEXT;
5897}
5898
5899/**
5900 * xmlParseAttribute:
5901 * @ctxt: an XML parser context
5902 * @value: a xmlChar ** used to store the value of the attribute
5903 *
5904 * parse an attribute
5905 *
5906 * [41] Attribute ::= Name Eq AttValue
5907 *
5908 * [ WFC: No External Entity References ]
5909 * Attribute values cannot contain direct or indirect entity references
5910 * to external entities.
5911 *
5912 * [ WFC: No < in Attribute Values ]
5913 * The replacement text of any entity referred to directly or indirectly in
5914 * an attribute value (other than "&lt;") must not contain a <.
5915 *
5916 * [ VC: Attribute Value Type ]
5917 * The attribute must have been declared; the value must be of the type
5918 * declared for it.
5919 *
5920 * [25] Eq ::= S? '=' S?
5921 *
5922 * With namespace:
5923 *
5924 * [NS 11] Attribute ::= QName Eq AttValue
5925 *
5926 * Also the case QName == xmlns:??? is handled independently as a namespace
5927 * definition.
5928 *
5929 * Returns the attribute name, and the value in *value.
5930 */
5931
5932xmlChar *
5933xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5934 xmlChar *name, *val;
5935
5936 *value = NULL;
5937 name = xmlParseName(ctxt);
5938 if (name == NULL) {
5939 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5941 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5942 ctxt->wellFormed = 0;
5943 ctxt->disableSAX = 1;
5944 return(NULL);
5945 }
5946
5947 /*
5948 * read the value
5949 */
5950 SKIP_BLANKS;
5951 if (RAW == '=') {
5952 NEXT;
5953 SKIP_BLANKS;
5954 val = xmlParseAttValue(ctxt);
5955 ctxt->instate = XML_PARSER_CONTENT;
5956 } else {
5957 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5959 ctxt->sax->error(ctxt->userData,
5960 "Specification mandate value for attribute %s\n", name);
5961 ctxt->wellFormed = 0;
5962 ctxt->disableSAX = 1;
5963 xmlFree(name);
5964 return(NULL);
5965 }
5966
5967 /*
5968 * Check that xml:lang conforms to the specification
5969 * No more registered as an error, just generate a warning now
5970 * since this was deprecated in XML second edition
5971 */
5972 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5973 if (!xmlCheckLanguageID(val)) {
5974 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5975 ctxt->sax->warning(ctxt->userData,
5976 "Malformed value for xml:lang : %s\n", val);
5977 }
5978 }
5979
5980 /*
5981 * Check that xml:space conforms to the specification
5982 */
5983 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5984 if (xmlStrEqual(val, BAD_CAST "default"))
5985 *(ctxt->space) = 0;
5986 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5987 *(ctxt->space) = 1;
5988 else {
5989 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5991 ctxt->sax->error(ctxt->userData,
5992"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5993 val);
5994 ctxt->wellFormed = 0;
5995 ctxt->disableSAX = 1;
5996 }
5997 }
5998
5999 *value = val;
6000 return(name);
6001}
6002
6003/**
6004 * xmlParseStartTag:
6005 * @ctxt: an XML parser context
6006 *
6007 * parse a start of tag either for rule element or
6008 * EmptyElement. In both case we don't parse the tag closing chars.
6009 *
6010 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6011 *
6012 * [ WFC: Unique Att Spec ]
6013 * No attribute name may appear more than once in the same start-tag or
6014 * empty-element tag.
6015 *
6016 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6017 *
6018 * [ WFC: Unique Att Spec ]
6019 * No attribute name may appear more than once in the same start-tag or
6020 * empty-element tag.
6021 *
6022 * With namespace:
6023 *
6024 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6025 *
6026 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6027 *
6028 * Returns the element name parsed
6029 */
6030
6031xmlChar *
6032xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6033 xmlChar *name;
6034 xmlChar *attname;
6035 xmlChar *attvalue;
6036 const xmlChar **atts = NULL;
6037 int nbatts = 0;
6038 int maxatts = 0;
6039 int i;
6040
6041 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006042 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006043
6044 name = xmlParseName(ctxt);
6045 if (name == NULL) {
6046 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6048 ctxt->sax->error(ctxt->userData,
6049 "xmlParseStartTag: invalid element name\n");
6050 ctxt->wellFormed = 0;
6051 ctxt->disableSAX = 1;
6052 return(NULL);
6053 }
6054
6055 /*
6056 * Now parse the attributes, it ends up with the ending
6057 *
6058 * (S Attribute)* S?
6059 */
6060 SKIP_BLANKS;
6061 GROW;
6062
Daniel Veillard21a0f912001-02-25 19:54:14 +00006063 while ((RAW != '>') &&
6064 ((RAW != '/') || (NXT(1) != '>')) &&
6065 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006066 const xmlChar *q = CUR_PTR;
6067 int cons = ctxt->input->consumed;
6068
6069 attname = xmlParseAttribute(ctxt, &attvalue);
6070 if ((attname != NULL) && (attvalue != NULL)) {
6071 /*
6072 * [ WFC: Unique Att Spec ]
6073 * No attribute name may appear more than once in the same
6074 * start-tag or empty-element tag.
6075 */
6076 for (i = 0; i < nbatts;i += 2) {
6077 if (xmlStrEqual(atts[i], attname)) {
6078 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6080 ctxt->sax->error(ctxt->userData,
6081 "Attribute %s redefined\n",
6082 attname);
6083 ctxt->wellFormed = 0;
6084 ctxt->disableSAX = 1;
6085 xmlFree(attname);
6086 xmlFree(attvalue);
6087 goto failed;
6088 }
6089 }
6090
6091 /*
6092 * Add the pair to atts
6093 */
6094 if (atts == NULL) {
6095 maxatts = 10;
6096 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6097 if (atts == NULL) {
6098 xmlGenericError(xmlGenericErrorContext,
6099 "malloc of %ld byte failed\n",
6100 maxatts * (long)sizeof(xmlChar *));
6101 return(NULL);
6102 }
6103 } else if (nbatts + 4 > maxatts) {
6104 maxatts *= 2;
6105 atts = (const xmlChar **) xmlRealloc((void *) atts,
6106 maxatts * sizeof(xmlChar *));
6107 if (atts == NULL) {
6108 xmlGenericError(xmlGenericErrorContext,
6109 "realloc of %ld byte failed\n",
6110 maxatts * (long)sizeof(xmlChar *));
6111 return(NULL);
6112 }
6113 }
6114 atts[nbatts++] = attname;
6115 atts[nbatts++] = attvalue;
6116 atts[nbatts] = NULL;
6117 atts[nbatts + 1] = NULL;
6118 } else {
6119 if (attname != NULL)
6120 xmlFree(attname);
6121 if (attvalue != NULL)
6122 xmlFree(attvalue);
6123 }
6124
6125failed:
6126
6127 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6128 break;
6129 if (!IS_BLANK(RAW)) {
6130 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6132 ctxt->sax->error(ctxt->userData,
6133 "attributes construct error\n");
6134 ctxt->wellFormed = 0;
6135 ctxt->disableSAX = 1;
6136 }
6137 SKIP_BLANKS;
6138 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6139 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6141 ctxt->sax->error(ctxt->userData,
6142 "xmlParseStartTag: problem parsing attributes\n");
6143 ctxt->wellFormed = 0;
6144 ctxt->disableSAX = 1;
6145 break;
6146 }
6147 GROW;
6148 }
6149
6150 /*
6151 * SAX: Start of Element !
6152 */
6153 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6154 (!ctxt->disableSAX))
6155 ctxt->sax->startElement(ctxt->userData, name, atts);
6156
6157 if (atts != NULL) {
6158 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6159 xmlFree((void *) atts);
6160 }
6161 return(name);
6162}
6163
6164/**
6165 * xmlParseEndTag:
6166 * @ctxt: an XML parser context
6167 *
6168 * parse an end of tag
6169 *
6170 * [42] ETag ::= '</' Name S? '>'
6171 *
6172 * With namespace
6173 *
6174 * [NS 9] ETag ::= '</' QName S? '>'
6175 */
6176
6177void
6178xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6179 xmlChar *name;
6180 xmlChar *oldname;
6181
6182 GROW;
6183 if ((RAW != '<') || (NXT(1) != '/')) {
6184 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6186 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6187 ctxt->wellFormed = 0;
6188 ctxt->disableSAX = 1;
6189 return;
6190 }
6191 SKIP(2);
6192
6193 name = xmlParseName(ctxt);
6194
6195 /*
6196 * We should definitely be at the ending "S? '>'" part
6197 */
6198 GROW;
6199 SKIP_BLANKS;
6200 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6201 ctxt->errNo = XML_ERR_GT_REQUIRED;
6202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6203 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6204 ctxt->wellFormed = 0;
6205 ctxt->disableSAX = 1;
6206 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006207 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006208
6209 /*
6210 * [ WFC: Element Type Match ]
6211 * The Name in an element's end-tag must match the element type in the
6212 * start-tag.
6213 *
6214 */
6215 if ((name == NULL) || (ctxt->name == NULL) ||
6216 (!xmlStrEqual(name, ctxt->name))) {
6217 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6219 if ((name != NULL) && (ctxt->name != NULL)) {
6220 ctxt->sax->error(ctxt->userData,
6221 "Opening and ending tag mismatch: %s and %s\n",
6222 ctxt->name, name);
6223 } else if (ctxt->name != NULL) {
6224 ctxt->sax->error(ctxt->userData,
6225 "Ending tag eror for: %s\n", ctxt->name);
6226 } else {
6227 ctxt->sax->error(ctxt->userData,
6228 "Ending tag error: internal error ???\n");
6229 }
6230
6231 }
6232 ctxt->wellFormed = 0;
6233 ctxt->disableSAX = 1;
6234 }
6235
6236 /*
6237 * SAX: End of Tag
6238 */
6239 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6240 (!ctxt->disableSAX))
6241 ctxt->sax->endElement(ctxt->userData, name);
6242
6243 if (name != NULL)
6244 xmlFree(name);
6245 oldname = namePop(ctxt);
6246 spacePop(ctxt);
6247 if (oldname != NULL) {
6248#ifdef DEBUG_STACK
6249 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6250#endif
6251 xmlFree(oldname);
6252 }
6253 return;
6254}
6255
6256/**
6257 * xmlParseCDSect:
6258 * @ctxt: an XML parser context
6259 *
6260 * Parse escaped pure raw content.
6261 *
6262 * [18] CDSect ::= CDStart CData CDEnd
6263 *
6264 * [19] CDStart ::= '<![CDATA['
6265 *
6266 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6267 *
6268 * [21] CDEnd ::= ']]>'
6269 */
6270void
6271xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6272 xmlChar *buf = NULL;
6273 int len = 0;
6274 int size = XML_PARSER_BUFFER_SIZE;
6275 int r, rl;
6276 int s, sl;
6277 int cur, l;
6278 int count = 0;
6279
6280 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6281 (NXT(2) == '[') && (NXT(3) == 'C') &&
6282 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6283 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6284 (NXT(8) == '[')) {
6285 SKIP(9);
6286 } else
6287 return;
6288
6289 ctxt->instate = XML_PARSER_CDATA_SECTION;
6290 r = CUR_CHAR(rl);
6291 if (!IS_CHAR(r)) {
6292 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6293 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6294 ctxt->sax->error(ctxt->userData,
6295 "CData section not finished\n");
6296 ctxt->wellFormed = 0;
6297 ctxt->disableSAX = 1;
6298 ctxt->instate = XML_PARSER_CONTENT;
6299 return;
6300 }
6301 NEXTL(rl);
6302 s = CUR_CHAR(sl);
6303 if (!IS_CHAR(s)) {
6304 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6306 ctxt->sax->error(ctxt->userData,
6307 "CData section not finished\n");
6308 ctxt->wellFormed = 0;
6309 ctxt->disableSAX = 1;
6310 ctxt->instate = XML_PARSER_CONTENT;
6311 return;
6312 }
6313 NEXTL(sl);
6314 cur = CUR_CHAR(l);
6315 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6316 if (buf == NULL) {
6317 xmlGenericError(xmlGenericErrorContext,
6318 "malloc of %d byte failed\n", size);
6319 return;
6320 }
6321 while (IS_CHAR(cur) &&
6322 ((r != ']') || (s != ']') || (cur != '>'))) {
6323 if (len + 5 >= size) {
6324 size *= 2;
6325 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6326 if (buf == NULL) {
6327 xmlGenericError(xmlGenericErrorContext,
6328 "realloc of %d byte failed\n", size);
6329 return;
6330 }
6331 }
6332 COPY_BUF(rl,buf,len,r);
6333 r = s;
6334 rl = sl;
6335 s = cur;
6336 sl = l;
6337 count++;
6338 if (count > 50) {
6339 GROW;
6340 count = 0;
6341 }
6342 NEXTL(l);
6343 cur = CUR_CHAR(l);
6344 }
6345 buf[len] = 0;
6346 ctxt->instate = XML_PARSER_CONTENT;
6347 if (cur != '>') {
6348 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6350 ctxt->sax->error(ctxt->userData,
6351 "CData section not finished\n%.50s\n", buf);
6352 ctxt->wellFormed = 0;
6353 ctxt->disableSAX = 1;
6354 xmlFree(buf);
6355 return;
6356 }
6357 NEXTL(l);
6358
6359 /*
6360 * Ok the buffer is to be consumed as cdata.
6361 */
6362 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6363 if (ctxt->sax->cdataBlock != NULL)
6364 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6365 }
6366 xmlFree(buf);
6367}
6368
6369/**
6370 * xmlParseContent:
6371 * @ctxt: an XML parser context
6372 *
6373 * Parse a content:
6374 *
6375 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6376 */
6377
6378void
6379xmlParseContent(xmlParserCtxtPtr ctxt) {
6380 GROW;
6381 while (((RAW != 0) || (ctxt->token != 0)) &&
6382 ((RAW != '<') || (NXT(1) != '/'))) {
6383 const xmlChar *test = CUR_PTR;
6384 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006385 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006386 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006387
6388 /*
6389 * Handle possible processed charrefs.
6390 */
6391 if (ctxt->token != 0) {
6392 xmlParseCharData(ctxt, 0);
6393 }
6394 /*
6395 * First case : a Processing Instruction.
6396 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006397 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006398 xmlParsePI(ctxt);
6399 }
6400
6401 /*
6402 * Second case : a CDSection
6403 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006404 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006405 (NXT(2) == '[') && (NXT(3) == 'C') &&
6406 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6407 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6408 (NXT(8) == '[')) {
6409 xmlParseCDSect(ctxt);
6410 }
6411
6412 /*
6413 * Third case : a comment
6414 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006415 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006416 (NXT(2) == '-') && (NXT(3) == '-')) {
6417 xmlParseComment(ctxt);
6418 ctxt->instate = XML_PARSER_CONTENT;
6419 }
6420
6421 /*
6422 * Fourth case : a sub-element.
6423 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006424 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006425 xmlParseElement(ctxt);
6426 }
6427
6428 /*
6429 * Fifth case : a reference. If if has not been resolved,
6430 * parsing returns it's Name, create the node
6431 */
6432
Daniel Veillard21a0f912001-02-25 19:54:14 +00006433 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006434 xmlParseReference(ctxt);
6435 }
6436
6437 /*
6438 * Last case, text. Note that References are handled directly.
6439 */
6440 else {
6441 xmlParseCharData(ctxt, 0);
6442 }
6443
6444 GROW;
6445 /*
6446 * Pop-up of finished entities.
6447 */
6448 while ((RAW == 0) && (ctxt->inputNr > 1))
6449 xmlPopInput(ctxt);
6450 SHRINK;
6451
6452 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6453 (tok == ctxt->token)) {
6454 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6456 ctxt->sax->error(ctxt->userData,
6457 "detected an error in element content\n");
6458 ctxt->wellFormed = 0;
6459 ctxt->disableSAX = 1;
6460 ctxt->instate = XML_PARSER_EOF;
6461 break;
6462 }
6463 }
6464}
6465
6466/**
6467 * xmlParseElement:
6468 * @ctxt: an XML parser context
6469 *
6470 * parse an XML element, this is highly recursive
6471 *
6472 * [39] element ::= EmptyElemTag | STag content ETag
6473 *
6474 * [ WFC: Element Type Match ]
6475 * The Name in an element's end-tag must match the element type in the
6476 * start-tag.
6477 *
6478 * [ VC: Element Valid ]
6479 * An element is valid if there is a declaration matching elementdecl
6480 * where the Name matches the element type and one of the following holds:
6481 * - The declaration matches EMPTY and the element has no content.
6482 * - The declaration matches children and the sequence of child elements
6483 * belongs to the language generated by the regular expression in the
6484 * content model, with optional white space (characters matching the
6485 * nonterminal S) between each pair of child elements.
6486 * - The declaration matches Mixed and the content consists of character
6487 * data and child elements whose types match names in the content model.
6488 * - The declaration matches ANY, and the types of any child elements have
6489 * been declared.
6490 */
6491
6492void
6493xmlParseElement(xmlParserCtxtPtr ctxt) {
6494 const xmlChar *openTag = CUR_PTR;
6495 xmlChar *name;
6496 xmlChar *oldname;
6497 xmlParserNodeInfo node_info;
6498 xmlNodePtr ret;
6499
6500 /* Capture start position */
6501 if (ctxt->record_info) {
6502 node_info.begin_pos = ctxt->input->consumed +
6503 (CUR_PTR - ctxt->input->base);
6504 node_info.begin_line = ctxt->input->line;
6505 }
6506
6507 if (ctxt->spaceNr == 0)
6508 spacePush(ctxt, -1);
6509 else
6510 spacePush(ctxt, *ctxt->space);
6511
6512 name = xmlParseStartTag(ctxt);
6513 if (name == NULL) {
6514 spacePop(ctxt);
6515 return;
6516 }
6517 namePush(ctxt, name);
6518 ret = ctxt->node;
6519
6520 /*
6521 * [ VC: Root Element Type ]
6522 * The Name in the document type declaration must match the element
6523 * type of the root element.
6524 */
6525 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6526 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6527 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6528
6529 /*
6530 * Check for an Empty Element.
6531 */
6532 if ((RAW == '/') && (NXT(1) == '>')) {
6533 SKIP(2);
6534 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6535 (!ctxt->disableSAX))
6536 ctxt->sax->endElement(ctxt->userData, name);
6537 oldname = namePop(ctxt);
6538 spacePop(ctxt);
6539 if (oldname != NULL) {
6540#ifdef DEBUG_STACK
6541 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6542#endif
6543 xmlFree(oldname);
6544 }
6545 if ( ret != NULL && ctxt->record_info ) {
6546 node_info.end_pos = ctxt->input->consumed +
6547 (CUR_PTR - ctxt->input->base);
6548 node_info.end_line = ctxt->input->line;
6549 node_info.node = ret;
6550 xmlParserAddNodeInfo(ctxt, &node_info);
6551 }
6552 return;
6553 }
6554 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006555 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006556 } else {
6557 ctxt->errNo = XML_ERR_GT_REQUIRED;
6558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6559 ctxt->sax->error(ctxt->userData,
6560 "Couldn't find end of Start Tag\n%.30s\n",
6561 openTag);
6562 ctxt->wellFormed = 0;
6563 ctxt->disableSAX = 1;
6564
6565 /*
6566 * end of parsing of this node.
6567 */
6568 nodePop(ctxt);
6569 oldname = namePop(ctxt);
6570 spacePop(ctxt);
6571 if (oldname != NULL) {
6572#ifdef DEBUG_STACK
6573 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6574#endif
6575 xmlFree(oldname);
6576 }
6577
6578 /*
6579 * Capture end position and add node
6580 */
6581 if ( ret != NULL && ctxt->record_info ) {
6582 node_info.end_pos = ctxt->input->consumed +
6583 (CUR_PTR - ctxt->input->base);
6584 node_info.end_line = ctxt->input->line;
6585 node_info.node = ret;
6586 xmlParserAddNodeInfo(ctxt, &node_info);
6587 }
6588 return;
6589 }
6590
6591 /*
6592 * Parse the content of the element:
6593 */
6594 xmlParseContent(ctxt);
6595 if (!IS_CHAR(RAW)) {
6596 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6598 ctxt->sax->error(ctxt->userData,
6599 "Premature end of data in tag %.30s\n", openTag);
6600 ctxt->wellFormed = 0;
6601 ctxt->disableSAX = 1;
6602
6603 /*
6604 * end of parsing of this node.
6605 */
6606 nodePop(ctxt);
6607 oldname = namePop(ctxt);
6608 spacePop(ctxt);
6609 if (oldname != NULL) {
6610#ifdef DEBUG_STACK
6611 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6612#endif
6613 xmlFree(oldname);
6614 }
6615 return;
6616 }
6617
6618 /*
6619 * parse the end of tag: '</' should be here.
6620 */
6621 xmlParseEndTag(ctxt);
6622
6623 /*
6624 * Capture end position and add node
6625 */
6626 if ( ret != NULL && ctxt->record_info ) {
6627 node_info.end_pos = ctxt->input->consumed +
6628 (CUR_PTR - ctxt->input->base);
6629 node_info.end_line = ctxt->input->line;
6630 node_info.node = ret;
6631 xmlParserAddNodeInfo(ctxt, &node_info);
6632 }
6633}
6634
6635/**
6636 * xmlParseVersionNum:
6637 * @ctxt: an XML parser context
6638 *
6639 * parse the XML version value.
6640 *
6641 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6642 *
6643 * Returns the string giving the XML version number, or NULL
6644 */
6645xmlChar *
6646xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6647 xmlChar *buf = NULL;
6648 int len = 0;
6649 int size = 10;
6650 xmlChar cur;
6651
6652 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6653 if (buf == NULL) {
6654 xmlGenericError(xmlGenericErrorContext,
6655 "malloc of %d byte failed\n", size);
6656 return(NULL);
6657 }
6658 cur = CUR;
6659 while (((cur >= 'a') && (cur <= 'z')) ||
6660 ((cur >= 'A') && (cur <= 'Z')) ||
6661 ((cur >= '0') && (cur <= '9')) ||
6662 (cur == '_') || (cur == '.') ||
6663 (cur == ':') || (cur == '-')) {
6664 if (len + 1 >= size) {
6665 size *= 2;
6666 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6667 if (buf == NULL) {
6668 xmlGenericError(xmlGenericErrorContext,
6669 "realloc of %d byte failed\n", size);
6670 return(NULL);
6671 }
6672 }
6673 buf[len++] = cur;
6674 NEXT;
6675 cur=CUR;
6676 }
6677 buf[len] = 0;
6678 return(buf);
6679}
6680
6681/**
6682 * xmlParseVersionInfo:
6683 * @ctxt: an XML parser context
6684 *
6685 * parse the XML version.
6686 *
6687 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6688 *
6689 * [25] Eq ::= S? '=' S?
6690 *
6691 * Returns the version string, e.g. "1.0"
6692 */
6693
6694xmlChar *
6695xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6696 xmlChar *version = NULL;
6697 const xmlChar *q;
6698
6699 if ((RAW == 'v') && (NXT(1) == 'e') &&
6700 (NXT(2) == 'r') && (NXT(3) == 's') &&
6701 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6702 (NXT(6) == 'n')) {
6703 SKIP(7);
6704 SKIP_BLANKS;
6705 if (RAW != '=') {
6706 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6708 ctxt->sax->error(ctxt->userData,
6709 "xmlParseVersionInfo : expected '='\n");
6710 ctxt->wellFormed = 0;
6711 ctxt->disableSAX = 1;
6712 return(NULL);
6713 }
6714 NEXT;
6715 SKIP_BLANKS;
6716 if (RAW == '"') {
6717 NEXT;
6718 q = CUR_PTR;
6719 version = xmlParseVersionNum(ctxt);
6720 if (RAW != '"') {
6721 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6723 ctxt->sax->error(ctxt->userData,
6724 "String not closed\n%.50s\n", q);
6725 ctxt->wellFormed = 0;
6726 ctxt->disableSAX = 1;
6727 } else
6728 NEXT;
6729 } else if (RAW == '\''){
6730 NEXT;
6731 q = CUR_PTR;
6732 version = xmlParseVersionNum(ctxt);
6733 if (RAW != '\'') {
6734 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6736 ctxt->sax->error(ctxt->userData,
6737 "String not closed\n%.50s\n", q);
6738 ctxt->wellFormed = 0;
6739 ctxt->disableSAX = 1;
6740 } else
6741 NEXT;
6742 } else {
6743 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6745 ctxt->sax->error(ctxt->userData,
6746 "xmlParseVersionInfo : expected ' or \"\n");
6747 ctxt->wellFormed = 0;
6748 ctxt->disableSAX = 1;
6749 }
6750 }
6751 return(version);
6752}
6753
6754/**
6755 * xmlParseEncName:
6756 * @ctxt: an XML parser context
6757 *
6758 * parse the XML encoding name
6759 *
6760 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6761 *
6762 * Returns the encoding name value or NULL
6763 */
6764xmlChar *
6765xmlParseEncName(xmlParserCtxtPtr ctxt) {
6766 xmlChar *buf = NULL;
6767 int len = 0;
6768 int size = 10;
6769 xmlChar cur;
6770
6771 cur = CUR;
6772 if (((cur >= 'a') && (cur <= 'z')) ||
6773 ((cur >= 'A') && (cur <= 'Z'))) {
6774 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6775 if (buf == NULL) {
6776 xmlGenericError(xmlGenericErrorContext,
6777 "malloc of %d byte failed\n", size);
6778 return(NULL);
6779 }
6780
6781 buf[len++] = cur;
6782 NEXT;
6783 cur = CUR;
6784 while (((cur >= 'a') && (cur <= 'z')) ||
6785 ((cur >= 'A') && (cur <= 'Z')) ||
6786 ((cur >= '0') && (cur <= '9')) ||
6787 (cur == '.') || (cur == '_') ||
6788 (cur == '-')) {
6789 if (len + 1 >= size) {
6790 size *= 2;
6791 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6792 if (buf == NULL) {
6793 xmlGenericError(xmlGenericErrorContext,
6794 "realloc of %d byte failed\n", size);
6795 return(NULL);
6796 }
6797 }
6798 buf[len++] = cur;
6799 NEXT;
6800 cur = CUR;
6801 if (cur == 0) {
6802 SHRINK;
6803 GROW;
6804 cur = CUR;
6805 }
6806 }
6807 buf[len] = 0;
6808 } else {
6809 ctxt->errNo = XML_ERR_ENCODING_NAME;
6810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6811 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6812 ctxt->wellFormed = 0;
6813 ctxt->disableSAX = 1;
6814 }
6815 return(buf);
6816}
6817
6818/**
6819 * xmlParseEncodingDecl:
6820 * @ctxt: an XML parser context
6821 *
6822 * parse the XML encoding declaration
6823 *
6824 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6825 *
6826 * this setups the conversion filters.
6827 *
6828 * Returns the encoding value or NULL
6829 */
6830
6831xmlChar *
6832xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6833 xmlChar *encoding = NULL;
6834 const xmlChar *q;
6835
6836 SKIP_BLANKS;
6837 if ((RAW == 'e') && (NXT(1) == 'n') &&
6838 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6839 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6840 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6841 SKIP(8);
6842 SKIP_BLANKS;
6843 if (RAW != '=') {
6844 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6846 ctxt->sax->error(ctxt->userData,
6847 "xmlParseEncodingDecl : expected '='\n");
6848 ctxt->wellFormed = 0;
6849 ctxt->disableSAX = 1;
6850 return(NULL);
6851 }
6852 NEXT;
6853 SKIP_BLANKS;
6854 if (RAW == '"') {
6855 NEXT;
6856 q = CUR_PTR;
6857 encoding = xmlParseEncName(ctxt);
6858 if (RAW != '"') {
6859 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6861 ctxt->sax->error(ctxt->userData,
6862 "String not closed\n%.50s\n", q);
6863 ctxt->wellFormed = 0;
6864 ctxt->disableSAX = 1;
6865 } else
6866 NEXT;
6867 } else if (RAW == '\''){
6868 NEXT;
6869 q = CUR_PTR;
6870 encoding = xmlParseEncName(ctxt);
6871 if (RAW != '\'') {
6872 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6874 ctxt->sax->error(ctxt->userData,
6875 "String not closed\n%.50s\n", q);
6876 ctxt->wellFormed = 0;
6877 ctxt->disableSAX = 1;
6878 } else
6879 NEXT;
6880 } else if (RAW == '"'){
6881 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6883 ctxt->sax->error(ctxt->userData,
6884 "xmlParseEncodingDecl : expected ' or \"\n");
6885 ctxt->wellFormed = 0;
6886 ctxt->disableSAX = 1;
6887 }
6888 if (encoding != NULL) {
6889 xmlCharEncoding enc;
6890 xmlCharEncodingHandlerPtr handler;
6891
6892 if (ctxt->input->encoding != NULL)
6893 xmlFree((xmlChar *) ctxt->input->encoding);
6894 ctxt->input->encoding = encoding;
6895
6896 enc = xmlParseCharEncoding((const char *) encoding);
6897 /*
6898 * registered set of known encodings
6899 */
6900 if (enc != XML_CHAR_ENCODING_ERROR) {
6901 xmlSwitchEncoding(ctxt, enc);
6902 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6903 xmlFree(encoding);
6904 return(NULL);
6905 }
6906 } else {
6907 /*
6908 * fallback for unknown encodings
6909 */
6910 handler = xmlFindCharEncodingHandler((const char *) encoding);
6911 if (handler != NULL) {
6912 xmlSwitchToEncoding(ctxt, handler);
6913 } else {
6914 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6916 ctxt->sax->error(ctxt->userData,
6917 "Unsupported encoding %s\n", encoding);
6918 return(NULL);
6919 }
6920 }
6921 }
6922 }
6923 return(encoding);
6924}
6925
6926/**
6927 * xmlParseSDDecl:
6928 * @ctxt: an XML parser context
6929 *
6930 * parse the XML standalone declaration
6931 *
6932 * [32] SDDecl ::= S 'standalone' Eq
6933 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6934 *
6935 * [ VC: Standalone Document Declaration ]
6936 * TODO The standalone document declaration must have the value "no"
6937 * if any external markup declarations contain declarations of:
6938 * - attributes with default values, if elements to which these
6939 * attributes apply appear in the document without specifications
6940 * of values for these attributes, or
6941 * - entities (other than amp, lt, gt, apos, quot), if references
6942 * to those entities appear in the document, or
6943 * - attributes with values subject to normalization, where the
6944 * attribute appears in the document with a value which will change
6945 * as a result of normalization, or
6946 * - element types with element content, if white space occurs directly
6947 * within any instance of those types.
6948 *
6949 * Returns 1 if standalone, 0 otherwise
6950 */
6951
6952int
6953xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6954 int standalone = -1;
6955
6956 SKIP_BLANKS;
6957 if ((RAW == 's') && (NXT(1) == 't') &&
6958 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6959 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6960 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6961 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6962 SKIP(10);
6963 SKIP_BLANKS;
6964 if (RAW != '=') {
6965 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6967 ctxt->sax->error(ctxt->userData,
6968 "XML standalone declaration : expected '='\n");
6969 ctxt->wellFormed = 0;
6970 ctxt->disableSAX = 1;
6971 return(standalone);
6972 }
6973 NEXT;
6974 SKIP_BLANKS;
6975 if (RAW == '\''){
6976 NEXT;
6977 if ((RAW == 'n') && (NXT(1) == 'o')) {
6978 standalone = 0;
6979 SKIP(2);
6980 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6981 (NXT(2) == 's')) {
6982 standalone = 1;
6983 SKIP(3);
6984 } else {
6985 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6987 ctxt->sax->error(ctxt->userData,
6988 "standalone accepts only 'yes' or 'no'\n");
6989 ctxt->wellFormed = 0;
6990 ctxt->disableSAX = 1;
6991 }
6992 if (RAW != '\'') {
6993 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6995 ctxt->sax->error(ctxt->userData, "String not closed\n");
6996 ctxt->wellFormed = 0;
6997 ctxt->disableSAX = 1;
6998 } else
6999 NEXT;
7000 } else if (RAW == '"'){
7001 NEXT;
7002 if ((RAW == 'n') && (NXT(1) == 'o')) {
7003 standalone = 0;
7004 SKIP(2);
7005 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7006 (NXT(2) == 's')) {
7007 standalone = 1;
7008 SKIP(3);
7009 } else {
7010 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7012 ctxt->sax->error(ctxt->userData,
7013 "standalone accepts only 'yes' or 'no'\n");
7014 ctxt->wellFormed = 0;
7015 ctxt->disableSAX = 1;
7016 }
7017 if (RAW != '"') {
7018 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7020 ctxt->sax->error(ctxt->userData, "String not closed\n");
7021 ctxt->wellFormed = 0;
7022 ctxt->disableSAX = 1;
7023 } else
7024 NEXT;
7025 } else {
7026 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7028 ctxt->sax->error(ctxt->userData,
7029 "Standalone value not found\n");
7030 ctxt->wellFormed = 0;
7031 ctxt->disableSAX = 1;
7032 }
7033 }
7034 return(standalone);
7035}
7036
7037/**
7038 * xmlParseXMLDecl:
7039 * @ctxt: an XML parser context
7040 *
7041 * parse an XML declaration header
7042 *
7043 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7044 */
7045
7046void
7047xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7048 xmlChar *version;
7049
7050 /*
7051 * We know that '<?xml' is here.
7052 */
7053 SKIP(5);
7054
7055 if (!IS_BLANK(RAW)) {
7056 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7058 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7059 ctxt->wellFormed = 0;
7060 ctxt->disableSAX = 1;
7061 }
7062 SKIP_BLANKS;
7063
7064 /*
7065 * We should have the VersionInfo here.
7066 */
7067 version = xmlParseVersionInfo(ctxt);
7068 if (version == NULL)
7069 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7070 ctxt->version = xmlStrdup(version);
7071 xmlFree(version);
7072
7073 /*
7074 * We may have the encoding declaration
7075 */
7076 if (!IS_BLANK(RAW)) {
7077 if ((RAW == '?') && (NXT(1) == '>')) {
7078 SKIP(2);
7079 return;
7080 }
7081 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7082 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7083 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7084 ctxt->wellFormed = 0;
7085 ctxt->disableSAX = 1;
7086 }
7087 xmlParseEncodingDecl(ctxt);
7088 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7089 /*
7090 * The XML REC instructs us to stop parsing right here
7091 */
7092 return;
7093 }
7094
7095 /*
7096 * We may have the standalone status.
7097 */
7098 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7099 if ((RAW == '?') && (NXT(1) == '>')) {
7100 SKIP(2);
7101 return;
7102 }
7103 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7105 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7106 ctxt->wellFormed = 0;
7107 ctxt->disableSAX = 1;
7108 }
7109 SKIP_BLANKS;
7110 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7111
7112 SKIP_BLANKS;
7113 if ((RAW == '?') && (NXT(1) == '>')) {
7114 SKIP(2);
7115 } else if (RAW == '>') {
7116 /* Deprecated old WD ... */
7117 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7118 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7119 ctxt->sax->error(ctxt->userData,
7120 "XML declaration must end-up with '?>'\n");
7121 ctxt->wellFormed = 0;
7122 ctxt->disableSAX = 1;
7123 NEXT;
7124 } else {
7125 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7127 ctxt->sax->error(ctxt->userData,
7128 "parsing XML declaration: '?>' expected\n");
7129 ctxt->wellFormed = 0;
7130 ctxt->disableSAX = 1;
7131 MOVETO_ENDTAG(CUR_PTR);
7132 NEXT;
7133 }
7134}
7135
7136/**
7137 * xmlParseMisc:
7138 * @ctxt: an XML parser context
7139 *
7140 * parse an XML Misc* optionnal field.
7141 *
7142 * [27] Misc ::= Comment | PI | S
7143 */
7144
7145void
7146xmlParseMisc(xmlParserCtxtPtr ctxt) {
7147 while (((RAW == '<') && (NXT(1) == '?')) ||
7148 ((RAW == '<') && (NXT(1) == '!') &&
7149 (NXT(2) == '-') && (NXT(3) == '-')) ||
7150 IS_BLANK(CUR)) {
7151 if ((RAW == '<') && (NXT(1) == '?')) {
7152 xmlParsePI(ctxt);
7153 } else if (IS_BLANK(CUR)) {
7154 NEXT;
7155 } else
7156 xmlParseComment(ctxt);
7157 }
7158}
7159
7160/**
7161 * xmlParseDocument:
7162 * @ctxt: an XML parser context
7163 *
7164 * parse an XML document (and build a tree if using the standard SAX
7165 * interface).
7166 *
7167 * [1] document ::= prolog element Misc*
7168 *
7169 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7170 *
7171 * Returns 0, -1 in case of error. the parser context is augmented
7172 * as a result of the parsing.
7173 */
7174
7175int
7176xmlParseDocument(xmlParserCtxtPtr ctxt) {
7177 xmlChar start[4];
7178 xmlCharEncoding enc;
7179
7180 xmlInitParser();
7181
7182 GROW;
7183
7184 /*
7185 * SAX: beginning of the document processing.
7186 */
7187 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7188 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7189
7190 /*
7191 * Get the 4 first bytes and decode the charset
7192 * if enc != XML_CHAR_ENCODING_NONE
7193 * plug some encoding conversion routines.
7194 */
7195 start[0] = RAW;
7196 start[1] = NXT(1);
7197 start[2] = NXT(2);
7198 start[3] = NXT(3);
7199 enc = xmlDetectCharEncoding(start, 4);
7200 if (enc != XML_CHAR_ENCODING_NONE) {
7201 xmlSwitchEncoding(ctxt, enc);
7202 }
7203
7204
7205 if (CUR == 0) {
7206 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7208 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7209 ctxt->wellFormed = 0;
7210 ctxt->disableSAX = 1;
7211 }
7212
7213 /*
7214 * Check for the XMLDecl in the Prolog.
7215 */
7216 GROW;
7217 if ((RAW == '<') && (NXT(1) == '?') &&
7218 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7219 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7220
7221 /*
7222 * Note that we will switch encoding on the fly.
7223 */
7224 xmlParseXMLDecl(ctxt);
7225 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7226 /*
7227 * The XML REC instructs us to stop parsing right here
7228 */
7229 return(-1);
7230 }
7231 ctxt->standalone = ctxt->input->standalone;
7232 SKIP_BLANKS;
7233 } else {
7234 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7235 }
7236 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7237 ctxt->sax->startDocument(ctxt->userData);
7238
7239 /*
7240 * The Misc part of the Prolog
7241 */
7242 GROW;
7243 xmlParseMisc(ctxt);
7244
7245 /*
7246 * Then possibly doc type declaration(s) and more Misc
7247 * (doctypedecl Misc*)?
7248 */
7249 GROW;
7250 if ((RAW == '<') && (NXT(1) == '!') &&
7251 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7252 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7253 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7254 (NXT(8) == 'E')) {
7255
7256 ctxt->inSubset = 1;
7257 xmlParseDocTypeDecl(ctxt);
7258 if (RAW == '[') {
7259 ctxt->instate = XML_PARSER_DTD;
7260 xmlParseInternalSubset(ctxt);
7261 }
7262
7263 /*
7264 * Create and update the external subset.
7265 */
7266 ctxt->inSubset = 2;
7267 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7268 (!ctxt->disableSAX))
7269 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7270 ctxt->extSubSystem, ctxt->extSubURI);
7271 ctxt->inSubset = 0;
7272
7273
7274 ctxt->instate = XML_PARSER_PROLOG;
7275 xmlParseMisc(ctxt);
7276 }
7277
7278 /*
7279 * Time to start parsing the tree itself
7280 */
7281 GROW;
7282 if (RAW != '<') {
7283 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7285 ctxt->sax->error(ctxt->userData,
7286 "Start tag expected, '<' not found\n");
7287 ctxt->wellFormed = 0;
7288 ctxt->disableSAX = 1;
7289 ctxt->instate = XML_PARSER_EOF;
7290 } else {
7291 ctxt->instate = XML_PARSER_CONTENT;
7292 xmlParseElement(ctxt);
7293 ctxt->instate = XML_PARSER_EPILOG;
7294
7295
7296 /*
7297 * The Misc part at the end
7298 */
7299 xmlParseMisc(ctxt);
7300
7301 if (RAW != 0) {
7302 ctxt->errNo = XML_ERR_DOCUMENT_END;
7303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7304 ctxt->sax->error(ctxt->userData,
7305 "Extra content at the end of the document\n");
7306 ctxt->wellFormed = 0;
7307 ctxt->disableSAX = 1;
7308 }
7309 ctxt->instate = XML_PARSER_EOF;
7310 }
7311
7312 /*
7313 * SAX: end of the document processing.
7314 */
7315 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7316 (!ctxt->disableSAX))
7317 ctxt->sax->endDocument(ctxt->userData);
7318
7319 if (! ctxt->wellFormed) return(-1);
7320 return(0);
7321}
7322
7323/**
7324 * xmlParseExtParsedEnt:
7325 * @ctxt: an XML parser context
7326 *
7327 * parse a genreral parsed entity
7328 * An external general parsed entity is well-formed if it matches the
7329 * production labeled extParsedEnt.
7330 *
7331 * [78] extParsedEnt ::= TextDecl? content
7332 *
7333 * Returns 0, -1 in case of error. the parser context is augmented
7334 * as a result of the parsing.
7335 */
7336
7337int
7338xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7339 xmlChar start[4];
7340 xmlCharEncoding enc;
7341
7342 xmlDefaultSAXHandlerInit();
7343
7344 GROW;
7345
7346 /*
7347 * SAX: beginning of the document processing.
7348 */
7349 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7350 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7351
7352 /*
7353 * Get the 4 first bytes and decode the charset
7354 * if enc != XML_CHAR_ENCODING_NONE
7355 * plug some encoding conversion routines.
7356 */
7357 start[0] = RAW;
7358 start[1] = NXT(1);
7359 start[2] = NXT(2);
7360 start[3] = NXT(3);
7361 enc = xmlDetectCharEncoding(start, 4);
7362 if (enc != XML_CHAR_ENCODING_NONE) {
7363 xmlSwitchEncoding(ctxt, enc);
7364 }
7365
7366
7367 if (CUR == 0) {
7368 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7370 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7371 ctxt->wellFormed = 0;
7372 ctxt->disableSAX = 1;
7373 }
7374
7375 /*
7376 * Check for the XMLDecl in the Prolog.
7377 */
7378 GROW;
7379 if ((RAW == '<') && (NXT(1) == '?') &&
7380 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7381 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7382
7383 /*
7384 * Note that we will switch encoding on the fly.
7385 */
7386 xmlParseXMLDecl(ctxt);
7387 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7388 /*
7389 * The XML REC instructs us to stop parsing right here
7390 */
7391 return(-1);
7392 }
7393 SKIP_BLANKS;
7394 } else {
7395 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7396 }
7397 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7398 ctxt->sax->startDocument(ctxt->userData);
7399
7400 /*
7401 * Doing validity checking on chunk doesn't make sense
7402 */
7403 ctxt->instate = XML_PARSER_CONTENT;
7404 ctxt->validate = 0;
7405 ctxt->loadsubset = 0;
7406 ctxt->depth = 0;
7407
7408 xmlParseContent(ctxt);
7409
7410 if ((RAW == '<') && (NXT(1) == '/')) {
7411 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7413 ctxt->sax->error(ctxt->userData,
7414 "chunk is not well balanced\n");
7415 ctxt->wellFormed = 0;
7416 ctxt->disableSAX = 1;
7417 } else if (RAW != 0) {
7418 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7419 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7420 ctxt->sax->error(ctxt->userData,
7421 "extra content at the end of well balanced chunk\n");
7422 ctxt->wellFormed = 0;
7423 ctxt->disableSAX = 1;
7424 }
7425
7426 /*
7427 * SAX: end of the document processing.
7428 */
7429 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7430 (!ctxt->disableSAX))
7431 ctxt->sax->endDocument(ctxt->userData);
7432
7433 if (! ctxt->wellFormed) return(-1);
7434 return(0);
7435}
7436
7437/************************************************************************
7438 * *
7439 * Progressive parsing interfaces *
7440 * *
7441 ************************************************************************/
7442
7443/**
7444 * xmlParseLookupSequence:
7445 * @ctxt: an XML parser context
7446 * @first: the first char to lookup
7447 * @next: the next char to lookup or zero
7448 * @third: the next char to lookup or zero
7449 *
7450 * Try to find if a sequence (first, next, third) or just (first next) or
7451 * (first) is available in the input stream.
7452 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7453 * to avoid rescanning sequences of bytes, it DOES change the state of the
7454 * parser, do not use liberally.
7455 *
7456 * Returns the index to the current parsing point if the full sequence
7457 * is available, -1 otherwise.
7458 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007459static int
Owen Taylor3473f882001-02-23 17:55:21 +00007460xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7461 xmlChar next, xmlChar third) {
7462 int base, len;
7463 xmlParserInputPtr in;
7464 const xmlChar *buf;
7465
7466 in = ctxt->input;
7467 if (in == NULL) return(-1);
7468 base = in->cur - in->base;
7469 if (base < 0) return(-1);
7470 if (ctxt->checkIndex > base)
7471 base = ctxt->checkIndex;
7472 if (in->buf == NULL) {
7473 buf = in->base;
7474 len = in->length;
7475 } else {
7476 buf = in->buf->buffer->content;
7477 len = in->buf->buffer->use;
7478 }
7479 /* take into account the sequence length */
7480 if (third) len -= 2;
7481 else if (next) len --;
7482 for (;base < len;base++) {
7483 if (buf[base] == first) {
7484 if (third != 0) {
7485 if ((buf[base + 1] != next) ||
7486 (buf[base + 2] != third)) continue;
7487 } else if (next != 0) {
7488 if (buf[base + 1] != next) continue;
7489 }
7490 ctxt->checkIndex = 0;
7491#ifdef DEBUG_PUSH
7492 if (next == 0)
7493 xmlGenericError(xmlGenericErrorContext,
7494 "PP: lookup '%c' found at %d\n",
7495 first, base);
7496 else if (third == 0)
7497 xmlGenericError(xmlGenericErrorContext,
7498 "PP: lookup '%c%c' found at %d\n",
7499 first, next, base);
7500 else
7501 xmlGenericError(xmlGenericErrorContext,
7502 "PP: lookup '%c%c%c' found at %d\n",
7503 first, next, third, base);
7504#endif
7505 return(base - (in->cur - in->base));
7506 }
7507 }
7508 ctxt->checkIndex = base;
7509#ifdef DEBUG_PUSH
7510 if (next == 0)
7511 xmlGenericError(xmlGenericErrorContext,
7512 "PP: lookup '%c' failed\n", first);
7513 else if (third == 0)
7514 xmlGenericError(xmlGenericErrorContext,
7515 "PP: lookup '%c%c' failed\n", first, next);
7516 else
7517 xmlGenericError(xmlGenericErrorContext,
7518 "PP: lookup '%c%c%c' failed\n", first, next, third);
7519#endif
7520 return(-1);
7521}
7522
7523/**
7524 * xmlParseTryOrFinish:
7525 * @ctxt: an XML parser context
7526 * @terminate: last chunk indicator
7527 *
7528 * Try to progress on parsing
7529 *
7530 * Returns zero if no parsing was possible
7531 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007532static int
Owen Taylor3473f882001-02-23 17:55:21 +00007533xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7534 int ret = 0;
7535 int avail;
7536 xmlChar cur, next;
7537
7538#ifdef DEBUG_PUSH
7539 switch (ctxt->instate) {
7540 case XML_PARSER_EOF:
7541 xmlGenericError(xmlGenericErrorContext,
7542 "PP: try EOF\n"); break;
7543 case XML_PARSER_START:
7544 xmlGenericError(xmlGenericErrorContext,
7545 "PP: try START\n"); break;
7546 case XML_PARSER_MISC:
7547 xmlGenericError(xmlGenericErrorContext,
7548 "PP: try MISC\n");break;
7549 case XML_PARSER_COMMENT:
7550 xmlGenericError(xmlGenericErrorContext,
7551 "PP: try COMMENT\n");break;
7552 case XML_PARSER_PROLOG:
7553 xmlGenericError(xmlGenericErrorContext,
7554 "PP: try PROLOG\n");break;
7555 case XML_PARSER_START_TAG:
7556 xmlGenericError(xmlGenericErrorContext,
7557 "PP: try START_TAG\n");break;
7558 case XML_PARSER_CONTENT:
7559 xmlGenericError(xmlGenericErrorContext,
7560 "PP: try CONTENT\n");break;
7561 case XML_PARSER_CDATA_SECTION:
7562 xmlGenericError(xmlGenericErrorContext,
7563 "PP: try CDATA_SECTION\n");break;
7564 case XML_PARSER_END_TAG:
7565 xmlGenericError(xmlGenericErrorContext,
7566 "PP: try END_TAG\n");break;
7567 case XML_PARSER_ENTITY_DECL:
7568 xmlGenericError(xmlGenericErrorContext,
7569 "PP: try ENTITY_DECL\n");break;
7570 case XML_PARSER_ENTITY_VALUE:
7571 xmlGenericError(xmlGenericErrorContext,
7572 "PP: try ENTITY_VALUE\n");break;
7573 case XML_PARSER_ATTRIBUTE_VALUE:
7574 xmlGenericError(xmlGenericErrorContext,
7575 "PP: try ATTRIBUTE_VALUE\n");break;
7576 case XML_PARSER_DTD:
7577 xmlGenericError(xmlGenericErrorContext,
7578 "PP: try DTD\n");break;
7579 case XML_PARSER_EPILOG:
7580 xmlGenericError(xmlGenericErrorContext,
7581 "PP: try EPILOG\n");break;
7582 case XML_PARSER_PI:
7583 xmlGenericError(xmlGenericErrorContext,
7584 "PP: try PI\n");break;
7585 case XML_PARSER_IGNORE:
7586 xmlGenericError(xmlGenericErrorContext,
7587 "PP: try IGNORE\n");break;
7588 }
7589#endif
7590
7591 while (1) {
7592 /*
7593 * Pop-up of finished entities.
7594 */
7595 while ((RAW == 0) && (ctxt->inputNr > 1))
7596 xmlPopInput(ctxt);
7597
7598 if (ctxt->input ==NULL) break;
7599 if (ctxt->input->buf == NULL)
7600 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7601 else
7602 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7603 if (avail < 1)
7604 goto done;
7605 switch (ctxt->instate) {
7606 case XML_PARSER_EOF:
7607 /*
7608 * Document parsing is done !
7609 */
7610 goto done;
7611 case XML_PARSER_START:
7612 /*
7613 * Very first chars read from the document flow.
7614 */
Owen Taylor3473f882001-02-23 17:55:21 +00007615 if (avail < 2)
7616 goto done;
7617
7618 cur = ctxt->input->cur[0];
7619 next = ctxt->input->cur[1];
7620 if (cur == 0) {
7621 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7622 ctxt->sax->setDocumentLocator(ctxt->userData,
7623 &xmlDefaultSAXLocator);
7624 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7626 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7627 ctxt->wellFormed = 0;
7628 ctxt->disableSAX = 1;
7629 ctxt->instate = XML_PARSER_EOF;
7630#ifdef DEBUG_PUSH
7631 xmlGenericError(xmlGenericErrorContext,
7632 "PP: entering EOF\n");
7633#endif
7634 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7635 ctxt->sax->endDocument(ctxt->userData);
7636 goto done;
7637 }
7638 if ((cur == '<') && (next == '?')) {
7639 /* PI or XML decl */
7640 if (avail < 5) return(ret);
7641 if ((!terminate) &&
7642 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7643 return(ret);
7644 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7645 ctxt->sax->setDocumentLocator(ctxt->userData,
7646 &xmlDefaultSAXLocator);
7647 if ((ctxt->input->cur[2] == 'x') &&
7648 (ctxt->input->cur[3] == 'm') &&
7649 (ctxt->input->cur[4] == 'l') &&
7650 (IS_BLANK(ctxt->input->cur[5]))) {
7651 ret += 5;
7652#ifdef DEBUG_PUSH
7653 xmlGenericError(xmlGenericErrorContext,
7654 "PP: Parsing XML Decl\n");
7655#endif
7656 xmlParseXMLDecl(ctxt);
7657 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7658 /*
7659 * The XML REC instructs us to stop parsing right
7660 * here
7661 */
7662 ctxt->instate = XML_PARSER_EOF;
7663 return(0);
7664 }
7665 ctxt->standalone = ctxt->input->standalone;
7666 if ((ctxt->encoding == NULL) &&
7667 (ctxt->input->encoding != NULL))
7668 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7669 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7670 (!ctxt->disableSAX))
7671 ctxt->sax->startDocument(ctxt->userData);
7672 ctxt->instate = XML_PARSER_MISC;
7673#ifdef DEBUG_PUSH
7674 xmlGenericError(xmlGenericErrorContext,
7675 "PP: entering MISC\n");
7676#endif
7677 } else {
7678 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7679 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7680 (!ctxt->disableSAX))
7681 ctxt->sax->startDocument(ctxt->userData);
7682 ctxt->instate = XML_PARSER_MISC;
7683#ifdef DEBUG_PUSH
7684 xmlGenericError(xmlGenericErrorContext,
7685 "PP: entering MISC\n");
7686#endif
7687 }
7688 } else {
7689 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7690 ctxt->sax->setDocumentLocator(ctxt->userData,
7691 &xmlDefaultSAXLocator);
7692 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7693 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7694 (!ctxt->disableSAX))
7695 ctxt->sax->startDocument(ctxt->userData);
7696 ctxt->instate = XML_PARSER_MISC;
7697#ifdef DEBUG_PUSH
7698 xmlGenericError(xmlGenericErrorContext,
7699 "PP: entering MISC\n");
7700#endif
7701 }
7702 break;
7703 case XML_PARSER_MISC:
7704 SKIP_BLANKS;
7705 if (ctxt->input->buf == NULL)
7706 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7707 else
7708 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7709 if (avail < 2)
7710 goto done;
7711 cur = ctxt->input->cur[0];
7712 next = ctxt->input->cur[1];
7713 if ((cur == '<') && (next == '?')) {
7714 if ((!terminate) &&
7715 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7716 goto done;
7717#ifdef DEBUG_PUSH
7718 xmlGenericError(xmlGenericErrorContext,
7719 "PP: Parsing PI\n");
7720#endif
7721 xmlParsePI(ctxt);
7722 } else if ((cur == '<') && (next == '!') &&
7723 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7724 if ((!terminate) &&
7725 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7726 goto done;
7727#ifdef DEBUG_PUSH
7728 xmlGenericError(xmlGenericErrorContext,
7729 "PP: Parsing Comment\n");
7730#endif
7731 xmlParseComment(ctxt);
7732 ctxt->instate = XML_PARSER_MISC;
7733 } else if ((cur == '<') && (next == '!') &&
7734 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7735 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7736 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7737 (ctxt->input->cur[8] == 'E')) {
7738 if ((!terminate) &&
7739 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7740 goto done;
7741#ifdef DEBUG_PUSH
7742 xmlGenericError(xmlGenericErrorContext,
7743 "PP: Parsing internal subset\n");
7744#endif
7745 ctxt->inSubset = 1;
7746 xmlParseDocTypeDecl(ctxt);
7747 if (RAW == '[') {
7748 ctxt->instate = XML_PARSER_DTD;
7749#ifdef DEBUG_PUSH
7750 xmlGenericError(xmlGenericErrorContext,
7751 "PP: entering DTD\n");
7752#endif
7753 } else {
7754 /*
7755 * Create and update the external subset.
7756 */
7757 ctxt->inSubset = 2;
7758 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7759 (ctxt->sax->externalSubset != NULL))
7760 ctxt->sax->externalSubset(ctxt->userData,
7761 ctxt->intSubName, ctxt->extSubSystem,
7762 ctxt->extSubURI);
7763 ctxt->inSubset = 0;
7764 ctxt->instate = XML_PARSER_PROLOG;
7765#ifdef DEBUG_PUSH
7766 xmlGenericError(xmlGenericErrorContext,
7767 "PP: entering PROLOG\n");
7768#endif
7769 }
7770 } else if ((cur == '<') && (next == '!') &&
7771 (avail < 9)) {
7772 goto done;
7773 } else {
7774 ctxt->instate = XML_PARSER_START_TAG;
7775#ifdef DEBUG_PUSH
7776 xmlGenericError(xmlGenericErrorContext,
7777 "PP: entering START_TAG\n");
7778#endif
7779 }
7780 break;
7781 case XML_PARSER_IGNORE:
7782 xmlGenericError(xmlGenericErrorContext,
7783 "PP: internal error, state == IGNORE");
7784 ctxt->instate = XML_PARSER_DTD;
7785#ifdef DEBUG_PUSH
7786 xmlGenericError(xmlGenericErrorContext,
7787 "PP: entering DTD\n");
7788#endif
7789 break;
7790 case XML_PARSER_PROLOG:
7791 SKIP_BLANKS;
7792 if (ctxt->input->buf == NULL)
7793 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7794 else
7795 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7796 if (avail < 2)
7797 goto done;
7798 cur = ctxt->input->cur[0];
7799 next = ctxt->input->cur[1];
7800 if ((cur == '<') && (next == '?')) {
7801 if ((!terminate) &&
7802 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7803 goto done;
7804#ifdef DEBUG_PUSH
7805 xmlGenericError(xmlGenericErrorContext,
7806 "PP: Parsing PI\n");
7807#endif
7808 xmlParsePI(ctxt);
7809 } else if ((cur == '<') && (next == '!') &&
7810 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7811 if ((!terminate) &&
7812 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7813 goto done;
7814#ifdef DEBUG_PUSH
7815 xmlGenericError(xmlGenericErrorContext,
7816 "PP: Parsing Comment\n");
7817#endif
7818 xmlParseComment(ctxt);
7819 ctxt->instate = XML_PARSER_PROLOG;
7820 } else if ((cur == '<') && (next == '!') &&
7821 (avail < 4)) {
7822 goto done;
7823 } else {
7824 ctxt->instate = XML_PARSER_START_TAG;
7825#ifdef DEBUG_PUSH
7826 xmlGenericError(xmlGenericErrorContext,
7827 "PP: entering START_TAG\n");
7828#endif
7829 }
7830 break;
7831 case XML_PARSER_EPILOG:
7832 SKIP_BLANKS;
7833 if (ctxt->input->buf == NULL)
7834 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7835 else
7836 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7837 if (avail < 2)
7838 goto done;
7839 cur = ctxt->input->cur[0];
7840 next = ctxt->input->cur[1];
7841 if ((cur == '<') && (next == '?')) {
7842 if ((!terminate) &&
7843 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7844 goto done;
7845#ifdef DEBUG_PUSH
7846 xmlGenericError(xmlGenericErrorContext,
7847 "PP: Parsing PI\n");
7848#endif
7849 xmlParsePI(ctxt);
7850 ctxt->instate = XML_PARSER_EPILOG;
7851 } else if ((cur == '<') && (next == '!') &&
7852 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7853 if ((!terminate) &&
7854 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7855 goto done;
7856#ifdef DEBUG_PUSH
7857 xmlGenericError(xmlGenericErrorContext,
7858 "PP: Parsing Comment\n");
7859#endif
7860 xmlParseComment(ctxt);
7861 ctxt->instate = XML_PARSER_EPILOG;
7862 } else if ((cur == '<') && (next == '!') &&
7863 (avail < 4)) {
7864 goto done;
7865 } else {
7866 ctxt->errNo = XML_ERR_DOCUMENT_END;
7867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7868 ctxt->sax->error(ctxt->userData,
7869 "Extra content at the end of the document\n");
7870 ctxt->wellFormed = 0;
7871 ctxt->disableSAX = 1;
7872 ctxt->instate = XML_PARSER_EOF;
7873#ifdef DEBUG_PUSH
7874 xmlGenericError(xmlGenericErrorContext,
7875 "PP: entering EOF\n");
7876#endif
7877 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7878 (!ctxt->disableSAX))
7879 ctxt->sax->endDocument(ctxt->userData);
7880 goto done;
7881 }
7882 break;
7883 case XML_PARSER_START_TAG: {
7884 xmlChar *name, *oldname;
7885
7886 if ((avail < 2) && (ctxt->inputNr == 1))
7887 goto done;
7888 cur = ctxt->input->cur[0];
7889 if (cur != '<') {
7890 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7892 ctxt->sax->error(ctxt->userData,
7893 "Start tag expect, '<' not found\n");
7894 ctxt->wellFormed = 0;
7895 ctxt->disableSAX = 1;
7896 ctxt->instate = XML_PARSER_EOF;
7897#ifdef DEBUG_PUSH
7898 xmlGenericError(xmlGenericErrorContext,
7899 "PP: entering EOF\n");
7900#endif
7901 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7902 (!ctxt->disableSAX))
7903 ctxt->sax->endDocument(ctxt->userData);
7904 goto done;
7905 }
7906 if ((!terminate) &&
7907 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7908 goto done;
7909 if (ctxt->spaceNr == 0)
7910 spacePush(ctxt, -1);
7911 else
7912 spacePush(ctxt, *ctxt->space);
7913 name = xmlParseStartTag(ctxt);
7914 if (name == NULL) {
7915 spacePop(ctxt);
7916 ctxt->instate = XML_PARSER_EOF;
7917#ifdef DEBUG_PUSH
7918 xmlGenericError(xmlGenericErrorContext,
7919 "PP: entering EOF\n");
7920#endif
7921 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7922 (!ctxt->disableSAX))
7923 ctxt->sax->endDocument(ctxt->userData);
7924 goto done;
7925 }
7926 namePush(ctxt, xmlStrdup(name));
7927
7928 /*
7929 * [ VC: Root Element Type ]
7930 * The Name in the document type declaration must match
7931 * the element type of the root element.
7932 */
7933 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7934 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7935 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7936
7937 /*
7938 * Check for an Empty Element.
7939 */
7940 if ((RAW == '/') && (NXT(1) == '>')) {
7941 SKIP(2);
7942 if ((ctxt->sax != NULL) &&
7943 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7944 ctxt->sax->endElement(ctxt->userData, name);
7945 xmlFree(name);
7946 oldname = namePop(ctxt);
7947 spacePop(ctxt);
7948 if (oldname != NULL) {
7949#ifdef DEBUG_STACK
7950 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7951#endif
7952 xmlFree(oldname);
7953 }
7954 if (ctxt->name == NULL) {
7955 ctxt->instate = XML_PARSER_EPILOG;
7956#ifdef DEBUG_PUSH
7957 xmlGenericError(xmlGenericErrorContext,
7958 "PP: entering EPILOG\n");
7959#endif
7960 } else {
7961 ctxt->instate = XML_PARSER_CONTENT;
7962#ifdef DEBUG_PUSH
7963 xmlGenericError(xmlGenericErrorContext,
7964 "PP: entering CONTENT\n");
7965#endif
7966 }
7967 break;
7968 }
7969 if (RAW == '>') {
7970 NEXT;
7971 } else {
7972 ctxt->errNo = XML_ERR_GT_REQUIRED;
7973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7974 ctxt->sax->error(ctxt->userData,
7975 "Couldn't find end of Start Tag %s\n",
7976 name);
7977 ctxt->wellFormed = 0;
7978 ctxt->disableSAX = 1;
7979
7980 /*
7981 * end of parsing of this node.
7982 */
7983 nodePop(ctxt);
7984 oldname = namePop(ctxt);
7985 spacePop(ctxt);
7986 if (oldname != NULL) {
7987#ifdef DEBUG_STACK
7988 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7989#endif
7990 xmlFree(oldname);
7991 }
7992 }
7993 xmlFree(name);
7994 ctxt->instate = XML_PARSER_CONTENT;
7995#ifdef DEBUG_PUSH
7996 xmlGenericError(xmlGenericErrorContext,
7997 "PP: entering CONTENT\n");
7998#endif
7999 break;
8000 }
8001 case XML_PARSER_CONTENT: {
8002 const xmlChar *test;
8003 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008004 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008005
8006 /*
8007 * Handle preparsed entities and charRef
8008 */
8009 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008010 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008011
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008012 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008013 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8014 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008015 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008016 ctxt->token = 0;
8017 }
8018 if ((avail < 2) && (ctxt->inputNr == 1))
8019 goto done;
8020 cur = ctxt->input->cur[0];
8021 next = ctxt->input->cur[1];
8022
8023 test = CUR_PTR;
8024 cons = ctxt->input->consumed;
8025 tok = ctxt->token;
8026 if ((cur == '<') && (next == '?')) {
8027 if ((!terminate) &&
8028 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8029 goto done;
8030#ifdef DEBUG_PUSH
8031 xmlGenericError(xmlGenericErrorContext,
8032 "PP: Parsing PI\n");
8033#endif
8034 xmlParsePI(ctxt);
8035 } else if ((cur == '<') && (next == '!') &&
8036 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8037 if ((!terminate) &&
8038 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8039 goto done;
8040#ifdef DEBUG_PUSH
8041 xmlGenericError(xmlGenericErrorContext,
8042 "PP: Parsing Comment\n");
8043#endif
8044 xmlParseComment(ctxt);
8045 ctxt->instate = XML_PARSER_CONTENT;
8046 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8047 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8048 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8049 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8050 (ctxt->input->cur[8] == '[')) {
8051 SKIP(9);
8052 ctxt->instate = XML_PARSER_CDATA_SECTION;
8053#ifdef DEBUG_PUSH
8054 xmlGenericError(xmlGenericErrorContext,
8055 "PP: entering CDATA_SECTION\n");
8056#endif
8057 break;
8058 } else if ((cur == '<') && (next == '!') &&
8059 (avail < 9)) {
8060 goto done;
8061 } else if ((cur == '<') && (next == '/')) {
8062 ctxt->instate = XML_PARSER_END_TAG;
8063#ifdef DEBUG_PUSH
8064 xmlGenericError(xmlGenericErrorContext,
8065 "PP: entering END_TAG\n");
8066#endif
8067 break;
8068 } else if (cur == '<') {
8069 ctxt->instate = XML_PARSER_START_TAG;
8070#ifdef DEBUG_PUSH
8071 xmlGenericError(xmlGenericErrorContext,
8072 "PP: entering START_TAG\n");
8073#endif
8074 break;
8075 } else if (cur == '&') {
8076 if ((!terminate) &&
8077 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8078 goto done;
8079#ifdef DEBUG_PUSH
8080 xmlGenericError(xmlGenericErrorContext,
8081 "PP: Parsing Reference\n");
8082#endif
8083 xmlParseReference(ctxt);
8084 } else {
8085 /* TODO Avoid the extra copy, handle directly !!! */
8086 /*
8087 * Goal of the following test is:
8088 * - minimize calls to the SAX 'character' callback
8089 * when they are mergeable
8090 * - handle an problem for isBlank when we only parse
8091 * a sequence of blank chars and the next one is
8092 * not available to check against '<' presence.
8093 * - tries to homogenize the differences in SAX
8094 * callbacks beween the push and pull versions
8095 * of the parser.
8096 */
8097 if ((ctxt->inputNr == 1) &&
8098 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8099 if ((!terminate) &&
8100 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8101 goto done;
8102 }
8103 ctxt->checkIndex = 0;
8104#ifdef DEBUG_PUSH
8105 xmlGenericError(xmlGenericErrorContext,
8106 "PP: Parsing char data\n");
8107#endif
8108 xmlParseCharData(ctxt, 0);
8109 }
8110 /*
8111 * Pop-up of finished entities.
8112 */
8113 while ((RAW == 0) && (ctxt->inputNr > 1))
8114 xmlPopInput(ctxt);
8115 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8116 (tok == ctxt->token)) {
8117 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8118 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8119 ctxt->sax->error(ctxt->userData,
8120 "detected an error in element content\n");
8121 ctxt->wellFormed = 0;
8122 ctxt->disableSAX = 1;
8123 ctxt->instate = XML_PARSER_EOF;
8124 break;
8125 }
8126 break;
8127 }
8128 case XML_PARSER_CDATA_SECTION: {
8129 /*
8130 * The Push mode need to have the SAX callback for
8131 * cdataBlock merge back contiguous callbacks.
8132 */
8133 int base;
8134
8135 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8136 if (base < 0) {
8137 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8138 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8139 if (ctxt->sax->cdataBlock != NULL)
8140 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8141 XML_PARSER_BIG_BUFFER_SIZE);
8142 }
8143 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8144 ctxt->checkIndex = 0;
8145 }
8146 goto done;
8147 } else {
8148 if ((ctxt->sax != NULL) && (base > 0) &&
8149 (!ctxt->disableSAX)) {
8150 if (ctxt->sax->cdataBlock != NULL)
8151 ctxt->sax->cdataBlock(ctxt->userData,
8152 ctxt->input->cur, base);
8153 }
8154 SKIP(base + 3);
8155 ctxt->checkIndex = 0;
8156 ctxt->instate = XML_PARSER_CONTENT;
8157#ifdef DEBUG_PUSH
8158 xmlGenericError(xmlGenericErrorContext,
8159 "PP: entering CONTENT\n");
8160#endif
8161 }
8162 break;
8163 }
8164 case XML_PARSER_END_TAG:
8165 if (avail < 2)
8166 goto done;
8167 if ((!terminate) &&
8168 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8169 goto done;
8170 xmlParseEndTag(ctxt);
8171 if (ctxt->name == NULL) {
8172 ctxt->instate = XML_PARSER_EPILOG;
8173#ifdef DEBUG_PUSH
8174 xmlGenericError(xmlGenericErrorContext,
8175 "PP: entering EPILOG\n");
8176#endif
8177 } else {
8178 ctxt->instate = XML_PARSER_CONTENT;
8179#ifdef DEBUG_PUSH
8180 xmlGenericError(xmlGenericErrorContext,
8181 "PP: entering CONTENT\n");
8182#endif
8183 }
8184 break;
8185 case XML_PARSER_DTD: {
8186 /*
8187 * Sorry but progressive parsing of the internal subset
8188 * is not expected to be supported. We first check that
8189 * the full content of the internal subset is available and
8190 * the parsing is launched only at that point.
8191 * Internal subset ends up with "']' S? '>'" in an unescaped
8192 * section and not in a ']]>' sequence which are conditional
8193 * sections (whoever argued to keep that crap in XML deserve
8194 * a place in hell !).
8195 */
8196 int base, i;
8197 xmlChar *buf;
8198 xmlChar quote = 0;
8199
8200 base = ctxt->input->cur - ctxt->input->base;
8201 if (base < 0) return(0);
8202 if (ctxt->checkIndex > base)
8203 base = ctxt->checkIndex;
8204 buf = ctxt->input->buf->buffer->content;
8205 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8206 base++) {
8207 if (quote != 0) {
8208 if (buf[base] == quote)
8209 quote = 0;
8210 continue;
8211 }
8212 if (buf[base] == '"') {
8213 quote = '"';
8214 continue;
8215 }
8216 if (buf[base] == '\'') {
8217 quote = '\'';
8218 continue;
8219 }
8220 if (buf[base] == ']') {
8221 if ((unsigned int) base +1 >=
8222 ctxt->input->buf->buffer->use)
8223 break;
8224 if (buf[base + 1] == ']') {
8225 /* conditional crap, skip both ']' ! */
8226 base++;
8227 continue;
8228 }
8229 for (i = 0;
8230 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8231 i++) {
8232 if (buf[base + i] == '>')
8233 goto found_end_int_subset;
8234 }
8235 break;
8236 }
8237 }
8238 /*
8239 * We didn't found the end of the Internal subset
8240 */
8241 if (quote == 0)
8242 ctxt->checkIndex = base;
8243#ifdef DEBUG_PUSH
8244 if (next == 0)
8245 xmlGenericError(xmlGenericErrorContext,
8246 "PP: lookup of int subset end filed\n");
8247#endif
8248 goto done;
8249
8250found_end_int_subset:
8251 xmlParseInternalSubset(ctxt);
8252 ctxt->inSubset = 2;
8253 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8254 (ctxt->sax->externalSubset != NULL))
8255 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8256 ctxt->extSubSystem, ctxt->extSubURI);
8257 ctxt->inSubset = 0;
8258 ctxt->instate = XML_PARSER_PROLOG;
8259 ctxt->checkIndex = 0;
8260#ifdef DEBUG_PUSH
8261 xmlGenericError(xmlGenericErrorContext,
8262 "PP: entering PROLOG\n");
8263#endif
8264 break;
8265 }
8266 case XML_PARSER_COMMENT:
8267 xmlGenericError(xmlGenericErrorContext,
8268 "PP: internal error, state == COMMENT\n");
8269 ctxt->instate = XML_PARSER_CONTENT;
8270#ifdef DEBUG_PUSH
8271 xmlGenericError(xmlGenericErrorContext,
8272 "PP: entering CONTENT\n");
8273#endif
8274 break;
8275 case XML_PARSER_PI:
8276 xmlGenericError(xmlGenericErrorContext,
8277 "PP: internal error, state == PI\n");
8278 ctxt->instate = XML_PARSER_CONTENT;
8279#ifdef DEBUG_PUSH
8280 xmlGenericError(xmlGenericErrorContext,
8281 "PP: entering CONTENT\n");
8282#endif
8283 break;
8284 case XML_PARSER_ENTITY_DECL:
8285 xmlGenericError(xmlGenericErrorContext,
8286 "PP: internal error, state == ENTITY_DECL\n");
8287 ctxt->instate = XML_PARSER_DTD;
8288#ifdef DEBUG_PUSH
8289 xmlGenericError(xmlGenericErrorContext,
8290 "PP: entering DTD\n");
8291#endif
8292 break;
8293 case XML_PARSER_ENTITY_VALUE:
8294 xmlGenericError(xmlGenericErrorContext,
8295 "PP: internal error, state == ENTITY_VALUE\n");
8296 ctxt->instate = XML_PARSER_CONTENT;
8297#ifdef DEBUG_PUSH
8298 xmlGenericError(xmlGenericErrorContext,
8299 "PP: entering DTD\n");
8300#endif
8301 break;
8302 case XML_PARSER_ATTRIBUTE_VALUE:
8303 xmlGenericError(xmlGenericErrorContext,
8304 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8305 ctxt->instate = XML_PARSER_START_TAG;
8306#ifdef DEBUG_PUSH
8307 xmlGenericError(xmlGenericErrorContext,
8308 "PP: entering START_TAG\n");
8309#endif
8310 break;
8311 case XML_PARSER_SYSTEM_LITERAL:
8312 xmlGenericError(xmlGenericErrorContext,
8313 "PP: internal error, state == SYSTEM_LITERAL\n");
8314 ctxt->instate = XML_PARSER_START_TAG;
8315#ifdef DEBUG_PUSH
8316 xmlGenericError(xmlGenericErrorContext,
8317 "PP: entering START_TAG\n");
8318#endif
8319 break;
8320 }
8321 }
8322done:
8323#ifdef DEBUG_PUSH
8324 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8325#endif
8326 return(ret);
8327}
8328
8329/**
Owen Taylor3473f882001-02-23 17:55:21 +00008330 * xmlParseChunk:
8331 * @ctxt: an XML parser context
8332 * @chunk: an char array
8333 * @size: the size in byte of the chunk
8334 * @terminate: last chunk indicator
8335 *
8336 * Parse a Chunk of memory
8337 *
8338 * Returns zero if no error, the xmlParserErrors otherwise.
8339 */
8340int
8341xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8342 int terminate) {
8343 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8344 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8345 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8346 int cur = ctxt->input->cur - ctxt->input->base;
8347
8348 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8349 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8350 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008351 ctxt->input->end =
8352 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008353#ifdef DEBUG_PUSH
8354 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8355#endif
8356
8357 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8358 xmlParseTryOrFinish(ctxt, terminate);
8359 } else if (ctxt->instate != XML_PARSER_EOF) {
8360 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8361 xmlParserInputBufferPtr in = ctxt->input->buf;
8362 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8363 (in->raw != NULL)) {
8364 int nbchars;
8365
8366 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8367 if (nbchars < 0) {
8368 xmlGenericError(xmlGenericErrorContext,
8369 "xmlParseChunk: encoder error\n");
8370 return(XML_ERR_INVALID_ENCODING);
8371 }
8372 }
8373 }
8374 }
8375 xmlParseTryOrFinish(ctxt, terminate);
8376 if (terminate) {
8377 /*
8378 * Check for termination
8379 */
8380 if ((ctxt->instate != XML_PARSER_EOF) &&
8381 (ctxt->instate != XML_PARSER_EPILOG)) {
8382 ctxt->errNo = XML_ERR_DOCUMENT_END;
8383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8384 ctxt->sax->error(ctxt->userData,
8385 "Extra content at the end of the document\n");
8386 ctxt->wellFormed = 0;
8387 ctxt->disableSAX = 1;
8388 }
8389 if (ctxt->instate != XML_PARSER_EOF) {
8390 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8391 (!ctxt->disableSAX))
8392 ctxt->sax->endDocument(ctxt->userData);
8393 }
8394 ctxt->instate = XML_PARSER_EOF;
8395 }
8396 return((xmlParserErrors) ctxt->errNo);
8397}
8398
8399/************************************************************************
8400 * *
8401 * I/O front end functions to the parser *
8402 * *
8403 ************************************************************************/
8404
8405/**
8406 * xmlStopParser:
8407 * @ctxt: an XML parser context
8408 *
8409 * Blocks further parser processing
8410 */
8411void
8412xmlStopParser(xmlParserCtxtPtr ctxt) {
8413 ctxt->instate = XML_PARSER_EOF;
8414 if (ctxt->input != NULL)
8415 ctxt->input->cur = BAD_CAST"";
8416}
8417
8418/**
8419 * xmlCreatePushParserCtxt:
8420 * @sax: a SAX handler
8421 * @user_data: The user data returned on SAX callbacks
8422 * @chunk: a pointer to an array of chars
8423 * @size: number of chars in the array
8424 * @filename: an optional file name or URI
8425 *
8426 * Create a parser context for using the XML parser in push mode
8427 * To allow content encoding detection, @size should be >= 4
8428 * The value of @filename is used for fetching external entities
8429 * and error/warning reports.
8430 *
8431 * Returns the new parser context or NULL
8432 */
8433xmlParserCtxtPtr
8434xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8435 const char *chunk, int size, const char *filename) {
8436 xmlParserCtxtPtr ctxt;
8437 xmlParserInputPtr inputStream;
8438 xmlParserInputBufferPtr buf;
8439 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8440
8441 /*
8442 * plug some encoding conversion routines
8443 */
8444 if ((chunk != NULL) && (size >= 4))
8445 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8446
8447 buf = xmlAllocParserInputBuffer(enc);
8448 if (buf == NULL) return(NULL);
8449
8450 ctxt = xmlNewParserCtxt();
8451 if (ctxt == NULL) {
8452 xmlFree(buf);
8453 return(NULL);
8454 }
8455 if (sax != NULL) {
8456 if (ctxt->sax != &xmlDefaultSAXHandler)
8457 xmlFree(ctxt->sax);
8458 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8459 if (ctxt->sax == NULL) {
8460 xmlFree(buf);
8461 xmlFree(ctxt);
8462 return(NULL);
8463 }
8464 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8465 if (user_data != NULL)
8466 ctxt->userData = user_data;
8467 }
8468 if (filename == NULL) {
8469 ctxt->directory = NULL;
8470 } else {
8471 ctxt->directory = xmlParserGetDirectory(filename);
8472 }
8473
8474 inputStream = xmlNewInputStream(ctxt);
8475 if (inputStream == NULL) {
8476 xmlFreeParserCtxt(ctxt);
8477 return(NULL);
8478 }
8479
8480 if (filename == NULL)
8481 inputStream->filename = NULL;
8482 else
8483 inputStream->filename = xmlMemStrdup(filename);
8484 inputStream->buf = buf;
8485 inputStream->base = inputStream->buf->buffer->content;
8486 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008487 inputStream->end =
8488 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008489 if (enc != XML_CHAR_ENCODING_NONE) {
8490 xmlSwitchEncoding(ctxt, enc);
8491 }
8492
8493 inputPush(ctxt, inputStream);
8494
8495 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8496 (ctxt->input->buf != NULL)) {
8497 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8498#ifdef DEBUG_PUSH
8499 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8500#endif
8501 }
8502
8503 return(ctxt);
8504}
8505
8506/**
8507 * xmlCreateIOParserCtxt:
8508 * @sax: a SAX handler
8509 * @user_data: The user data returned on SAX callbacks
8510 * @ioread: an I/O read function
8511 * @ioclose: an I/O close function
8512 * @ioctx: an I/O handler
8513 * @enc: the charset encoding if known
8514 *
8515 * Create a parser context for using the XML parser with an existing
8516 * I/O stream
8517 *
8518 * Returns the new parser context or NULL
8519 */
8520xmlParserCtxtPtr
8521xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8522 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8523 void *ioctx, xmlCharEncoding enc) {
8524 xmlParserCtxtPtr ctxt;
8525 xmlParserInputPtr inputStream;
8526 xmlParserInputBufferPtr buf;
8527
8528 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8529 if (buf == NULL) return(NULL);
8530
8531 ctxt = xmlNewParserCtxt();
8532 if (ctxt == NULL) {
8533 xmlFree(buf);
8534 return(NULL);
8535 }
8536 if (sax != NULL) {
8537 if (ctxt->sax != &xmlDefaultSAXHandler)
8538 xmlFree(ctxt->sax);
8539 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8540 if (ctxt->sax == NULL) {
8541 xmlFree(buf);
8542 xmlFree(ctxt);
8543 return(NULL);
8544 }
8545 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8546 if (user_data != NULL)
8547 ctxt->userData = user_data;
8548 }
8549
8550 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8551 if (inputStream == NULL) {
8552 xmlFreeParserCtxt(ctxt);
8553 return(NULL);
8554 }
8555 inputPush(ctxt, inputStream);
8556
8557 return(ctxt);
8558}
8559
8560/************************************************************************
8561 * *
8562 * Front ends when parsing a Dtd *
8563 * *
8564 ************************************************************************/
8565
8566/**
8567 * xmlIOParseDTD:
8568 * @sax: the SAX handler block or NULL
8569 * @input: an Input Buffer
8570 * @enc: the charset encoding if known
8571 *
8572 * Load and parse a DTD
8573 *
8574 * Returns the resulting xmlDtdPtr or NULL in case of error.
8575 * @input will be freed at parsing end.
8576 */
8577
8578xmlDtdPtr
8579xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8580 xmlCharEncoding enc) {
8581 xmlDtdPtr ret = NULL;
8582 xmlParserCtxtPtr ctxt;
8583 xmlParserInputPtr pinput = NULL;
8584
8585 if (input == NULL)
8586 return(NULL);
8587
8588 ctxt = xmlNewParserCtxt();
8589 if (ctxt == NULL) {
8590 return(NULL);
8591 }
8592
8593 /*
8594 * Set-up the SAX context
8595 */
8596 if (sax != NULL) {
8597 if (ctxt->sax != NULL)
8598 xmlFree(ctxt->sax);
8599 ctxt->sax = sax;
8600 ctxt->userData = NULL;
8601 }
8602
8603 /*
8604 * generate a parser input from the I/O handler
8605 */
8606
8607 pinput = xmlNewIOInputStream(ctxt, input, enc);
8608 if (pinput == NULL) {
8609 if (sax != NULL) ctxt->sax = NULL;
8610 xmlFreeParserCtxt(ctxt);
8611 return(NULL);
8612 }
8613
8614 /*
8615 * plug some encoding conversion routines here.
8616 */
8617 xmlPushInput(ctxt, pinput);
8618
8619 pinput->filename = NULL;
8620 pinput->line = 1;
8621 pinput->col = 1;
8622 pinput->base = ctxt->input->cur;
8623 pinput->cur = ctxt->input->cur;
8624 pinput->free = NULL;
8625
8626 /*
8627 * let's parse that entity knowing it's an external subset.
8628 */
8629 ctxt->inSubset = 2;
8630 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8631 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8632 BAD_CAST "none", BAD_CAST "none");
8633 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8634
8635 if (ctxt->myDoc != NULL) {
8636 if (ctxt->wellFormed) {
8637 ret = ctxt->myDoc->extSubset;
8638 ctxt->myDoc->extSubset = NULL;
8639 } else {
8640 ret = NULL;
8641 }
8642 xmlFreeDoc(ctxt->myDoc);
8643 ctxt->myDoc = NULL;
8644 }
8645 if (sax != NULL) ctxt->sax = NULL;
8646 xmlFreeParserCtxt(ctxt);
8647
8648 return(ret);
8649}
8650
8651/**
8652 * xmlSAXParseDTD:
8653 * @sax: the SAX handler block
8654 * @ExternalID: a NAME* containing the External ID of the DTD
8655 * @SystemID: a NAME* containing the URL to the DTD
8656 *
8657 * Load and parse an external subset.
8658 *
8659 * Returns the resulting xmlDtdPtr or NULL in case of error.
8660 */
8661
8662xmlDtdPtr
8663xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8664 const xmlChar *SystemID) {
8665 xmlDtdPtr ret = NULL;
8666 xmlParserCtxtPtr ctxt;
8667 xmlParserInputPtr input = NULL;
8668 xmlCharEncoding enc;
8669
8670 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8671
8672 ctxt = xmlNewParserCtxt();
8673 if (ctxt == NULL) {
8674 return(NULL);
8675 }
8676
8677 /*
8678 * Set-up the SAX context
8679 */
8680 if (sax != NULL) {
8681 if (ctxt->sax != NULL)
8682 xmlFree(ctxt->sax);
8683 ctxt->sax = sax;
8684 ctxt->userData = NULL;
8685 }
8686
8687 /*
8688 * Ask the Entity resolver to load the damn thing
8689 */
8690
8691 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8692 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8693 if (input == NULL) {
8694 if (sax != NULL) ctxt->sax = NULL;
8695 xmlFreeParserCtxt(ctxt);
8696 return(NULL);
8697 }
8698
8699 /*
8700 * plug some encoding conversion routines here.
8701 */
8702 xmlPushInput(ctxt, input);
8703 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8704 xmlSwitchEncoding(ctxt, enc);
8705
8706 if (input->filename == NULL)
8707 input->filename = (char *) xmlStrdup(SystemID);
8708 input->line = 1;
8709 input->col = 1;
8710 input->base = ctxt->input->cur;
8711 input->cur = ctxt->input->cur;
8712 input->free = NULL;
8713
8714 /*
8715 * let's parse that entity knowing it's an external subset.
8716 */
8717 ctxt->inSubset = 2;
8718 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8719 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8720 ExternalID, SystemID);
8721 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8722
8723 if (ctxt->myDoc != NULL) {
8724 if (ctxt->wellFormed) {
8725 ret = ctxt->myDoc->extSubset;
8726 ctxt->myDoc->extSubset = NULL;
8727 } else {
8728 ret = NULL;
8729 }
8730 xmlFreeDoc(ctxt->myDoc);
8731 ctxt->myDoc = NULL;
8732 }
8733 if (sax != NULL) ctxt->sax = NULL;
8734 xmlFreeParserCtxt(ctxt);
8735
8736 return(ret);
8737}
8738
8739/**
8740 * xmlParseDTD:
8741 * @ExternalID: a NAME* containing the External ID of the DTD
8742 * @SystemID: a NAME* containing the URL to the DTD
8743 *
8744 * Load and parse an external subset.
8745 *
8746 * Returns the resulting xmlDtdPtr or NULL in case of error.
8747 */
8748
8749xmlDtdPtr
8750xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8751 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8752}
8753
8754/************************************************************************
8755 * *
8756 * Front ends when parsing an Entity *
8757 * *
8758 ************************************************************************/
8759
8760/**
Owen Taylor3473f882001-02-23 17:55:21 +00008761 * xmlParseCtxtExternalEntity:
8762 * @ctx: the existing parsing context
8763 * @URL: the URL for the entity to load
8764 * @ID: the System ID for the entity to load
8765 * @list: the return value for the set of parsed nodes
8766 *
8767 * Parse an external general entity within an existing parsing context
8768 * An external general parsed entity is well-formed if it matches the
8769 * production labeled extParsedEnt.
8770 *
8771 * [78] extParsedEnt ::= TextDecl? content
8772 *
8773 * Returns 0 if the entity is well formed, -1 in case of args problem and
8774 * the parser error code otherwise
8775 */
8776
8777int
8778xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8779 const xmlChar *ID, xmlNodePtr *list) {
8780 xmlParserCtxtPtr ctxt;
8781 xmlDocPtr newDoc;
8782 xmlSAXHandlerPtr oldsax = NULL;
8783 int ret = 0;
8784
8785 if (ctx->depth > 40) {
8786 return(XML_ERR_ENTITY_LOOP);
8787 }
8788
8789 if (list != NULL)
8790 *list = NULL;
8791 if ((URL == NULL) && (ID == NULL))
8792 return(-1);
8793 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8794 return(-1);
8795
8796
8797 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8798 if (ctxt == NULL) return(-1);
8799 ctxt->userData = ctxt;
8800 oldsax = ctxt->sax;
8801 ctxt->sax = ctx->sax;
8802 newDoc = xmlNewDoc(BAD_CAST "1.0");
8803 if (newDoc == NULL) {
8804 xmlFreeParserCtxt(ctxt);
8805 return(-1);
8806 }
8807 if (ctx->myDoc != NULL) {
8808 newDoc->intSubset = ctx->myDoc->intSubset;
8809 newDoc->extSubset = ctx->myDoc->extSubset;
8810 }
8811 if (ctx->myDoc->URL != NULL) {
8812 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8813 }
8814 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8815 if (newDoc->children == NULL) {
8816 ctxt->sax = oldsax;
8817 xmlFreeParserCtxt(ctxt);
8818 newDoc->intSubset = NULL;
8819 newDoc->extSubset = NULL;
8820 xmlFreeDoc(newDoc);
8821 return(-1);
8822 }
8823 nodePush(ctxt, newDoc->children);
8824 if (ctx->myDoc == NULL) {
8825 ctxt->myDoc = newDoc;
8826 } else {
8827 ctxt->myDoc = ctx->myDoc;
8828 newDoc->children->doc = ctx->myDoc;
8829 }
8830
8831 /*
8832 * Parse a possible text declaration first
8833 */
8834 GROW;
8835 if ((RAW == '<') && (NXT(1) == '?') &&
8836 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8837 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8838 xmlParseTextDecl(ctxt);
8839 }
8840
8841 /*
8842 * Doing validity checking on chunk doesn't make sense
8843 */
8844 ctxt->instate = XML_PARSER_CONTENT;
8845 ctxt->validate = ctx->validate;
8846 ctxt->loadsubset = ctx->loadsubset;
8847 ctxt->depth = ctx->depth + 1;
8848 ctxt->replaceEntities = ctx->replaceEntities;
8849 if (ctxt->validate) {
8850 ctxt->vctxt.error = ctx->vctxt.error;
8851 ctxt->vctxt.warning = ctx->vctxt.warning;
8852 /* Allocate the Node stack */
8853 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8854 if (ctxt->vctxt.nodeTab == NULL) {
8855 xmlGenericError(xmlGenericErrorContext,
8856 "xmlParseCtxtExternalEntity: out of memory\n");
8857 ctxt->validate = 0;
8858 ctxt->vctxt.error = NULL;
8859 ctxt->vctxt.warning = NULL;
8860 } else {
8861 ctxt->vctxt.nodeNr = 0;
8862 ctxt->vctxt.nodeMax = 4;
8863 ctxt->vctxt.node = NULL;
8864 }
8865 } else {
8866 ctxt->vctxt.error = NULL;
8867 ctxt->vctxt.warning = NULL;
8868 }
8869
8870 xmlParseContent(ctxt);
8871
8872 if ((RAW == '<') && (NXT(1) == '/')) {
8873 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8875 ctxt->sax->error(ctxt->userData,
8876 "chunk is not well balanced\n");
8877 ctxt->wellFormed = 0;
8878 ctxt->disableSAX = 1;
8879 } else if (RAW != 0) {
8880 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8882 ctxt->sax->error(ctxt->userData,
8883 "extra content at the end of well balanced chunk\n");
8884 ctxt->wellFormed = 0;
8885 ctxt->disableSAX = 1;
8886 }
8887 if (ctxt->node != newDoc->children) {
8888 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8890 ctxt->sax->error(ctxt->userData,
8891 "chunk is not well balanced\n");
8892 ctxt->wellFormed = 0;
8893 ctxt->disableSAX = 1;
8894 }
8895
8896 if (!ctxt->wellFormed) {
8897 if (ctxt->errNo == 0)
8898 ret = 1;
8899 else
8900 ret = ctxt->errNo;
8901 } else {
8902 if (list != NULL) {
8903 xmlNodePtr cur;
8904
8905 /*
8906 * Return the newly created nodeset after unlinking it from
8907 * they pseudo parent.
8908 */
8909 cur = newDoc->children->children;
8910 *list = cur;
8911 while (cur != NULL) {
8912 cur->parent = NULL;
8913 cur = cur->next;
8914 }
8915 newDoc->children->children = NULL;
8916 }
8917 ret = 0;
8918 }
8919 ctxt->sax = oldsax;
8920 xmlFreeParserCtxt(ctxt);
8921 newDoc->intSubset = NULL;
8922 newDoc->extSubset = NULL;
8923 xmlFreeDoc(newDoc);
8924
8925 return(ret);
8926}
8927
8928/**
Daniel Veillard257d9102001-05-08 10:41:44 +00008929 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00008930 * @doc: the document the chunk pertains to
8931 * @sax: the SAX handler bloc (possibly NULL)
8932 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8933 * @depth: Used for loop detection, use 0
8934 * @URL: the URL for the entity to load
8935 * @ID: the System ID for the entity to load
8936 * @list: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00008937 * @private: extra field for the _private parser context
Owen Taylor3473f882001-02-23 17:55:21 +00008938 *
Daniel Veillard257d9102001-05-08 10:41:44 +00008939 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00008940 *
8941 * Returns 0 if the entity is well formed, -1 in case of args problem and
8942 * the parser error code otherwise
8943 */
8944
Daniel Veillard257d9102001-05-08 10:41:44 +00008945static int
8946xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlSAXHandlerPtr sax,
8947 void *user_data, int depth, const xmlChar *URL,
8948 const xmlChar *ID, xmlNodePtr *list, void *private) {
Owen Taylor3473f882001-02-23 17:55:21 +00008949 xmlParserCtxtPtr ctxt;
8950 xmlDocPtr newDoc;
8951 xmlSAXHandlerPtr oldsax = NULL;
8952 int ret = 0;
8953
8954 if (depth > 40) {
8955 return(XML_ERR_ENTITY_LOOP);
8956 }
8957
8958
8959
8960 if (list != NULL)
8961 *list = NULL;
8962 if ((URL == NULL) && (ID == NULL))
8963 return(-1);
8964 if (doc == NULL) /* @@ relax but check for dereferences */
8965 return(-1);
8966
8967
8968 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8969 if (ctxt == NULL) return(-1);
8970 ctxt->userData = ctxt;
Daniel Veillard257d9102001-05-08 10:41:44 +00008971 ctxt->_private = private;
Owen Taylor3473f882001-02-23 17:55:21 +00008972 if (sax != NULL) {
8973 oldsax = ctxt->sax;
8974 ctxt->sax = sax;
8975 if (user_data != NULL)
8976 ctxt->userData = user_data;
8977 }
8978 newDoc = xmlNewDoc(BAD_CAST "1.0");
8979 if (newDoc == NULL) {
8980 xmlFreeParserCtxt(ctxt);
8981 return(-1);
8982 }
8983 if (doc != NULL) {
8984 newDoc->intSubset = doc->intSubset;
8985 newDoc->extSubset = doc->extSubset;
8986 }
8987 if (doc->URL != NULL) {
8988 newDoc->URL = xmlStrdup(doc->URL);
8989 }
8990 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8991 if (newDoc->children == NULL) {
8992 if (sax != NULL)
8993 ctxt->sax = oldsax;
8994 xmlFreeParserCtxt(ctxt);
8995 newDoc->intSubset = NULL;
8996 newDoc->extSubset = NULL;
8997 xmlFreeDoc(newDoc);
8998 return(-1);
8999 }
9000 nodePush(ctxt, newDoc->children);
9001 if (doc == NULL) {
9002 ctxt->myDoc = newDoc;
9003 } else {
9004 ctxt->myDoc = doc;
9005 newDoc->children->doc = doc;
9006 }
9007
9008 /*
9009 * Parse a possible text declaration first
9010 */
9011 GROW;
9012 if ((RAW == '<') && (NXT(1) == '?') &&
9013 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9014 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9015 xmlParseTextDecl(ctxt);
9016 }
9017
9018 /*
9019 * Doing validity checking on chunk doesn't make sense
9020 */
9021 ctxt->instate = XML_PARSER_CONTENT;
9022 ctxt->validate = 0;
Daniel Veillarde470df72001-04-18 21:41:07 +00009023 ctxt->external = 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009024 ctxt->loadsubset = 0;
9025 ctxt->depth = depth;
9026
9027 xmlParseContent(ctxt);
9028
9029 if ((RAW == '<') && (NXT(1) == '/')) {
9030 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9031 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9032 ctxt->sax->error(ctxt->userData,
9033 "chunk is not well balanced\n");
9034 ctxt->wellFormed = 0;
9035 ctxt->disableSAX = 1;
9036 } else if (RAW != 0) {
9037 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9039 ctxt->sax->error(ctxt->userData,
9040 "extra content at the end of well balanced chunk\n");
9041 ctxt->wellFormed = 0;
9042 ctxt->disableSAX = 1;
9043 }
9044 if (ctxt->node != newDoc->children) {
9045 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9047 ctxt->sax->error(ctxt->userData,
9048 "chunk is not well balanced\n");
9049 ctxt->wellFormed = 0;
9050 ctxt->disableSAX = 1;
9051 }
9052
9053 if (!ctxt->wellFormed) {
9054 if (ctxt->errNo == 0)
9055 ret = 1;
9056 else
9057 ret = ctxt->errNo;
9058 } else {
9059 if (list != NULL) {
9060 xmlNodePtr cur;
9061
9062 /*
9063 * Return the newly created nodeset after unlinking it from
9064 * they pseudo parent.
9065 */
9066 cur = newDoc->children->children;
9067 *list = cur;
9068 while (cur != NULL) {
9069 cur->parent = NULL;
9070 cur = cur->next;
9071 }
9072 newDoc->children->children = NULL;
9073 }
9074 ret = 0;
9075 }
9076 if (sax != NULL)
9077 ctxt->sax = oldsax;
9078 xmlFreeParserCtxt(ctxt);
9079 newDoc->intSubset = NULL;
9080 newDoc->extSubset = NULL;
9081 xmlFreeDoc(newDoc);
9082
9083 return(ret);
9084}
9085
9086/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009087 * xmlParseExternalEntity:
9088 * @doc: the document the chunk pertains to
9089 * @sax: the SAX handler bloc (possibly NULL)
9090 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9091 * @depth: Used for loop detection, use 0
9092 * @URL: the URL for the entity to load
9093 * @ID: the System ID for the entity to load
9094 * @list: the return value for the set of parsed nodes
9095 *
9096 * Parse an external general entity
9097 * An external general parsed entity is well-formed if it matches the
9098 * production labeled extParsedEnt.
9099 *
9100 * [78] extParsedEnt ::= TextDecl? content
9101 *
9102 * Returns 0 if the entity is well formed, -1 in case of args problem and
9103 * the parser error code otherwise
9104 */
9105
9106int
9107xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
9108 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
9109 return(xmlParseExternalEntityPrivate(doc, sax, user_data, depth, URL,
9110 ID, list, NULL));
9111}
9112
9113/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009114 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009115 * @doc: the document the chunk pertains to
9116 * @sax: the SAX handler bloc (possibly NULL)
9117 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9118 * @depth: Used for loop detection, use 0
9119 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9120 * @list: the return value for the set of parsed nodes
9121 *
9122 * Parse a well-balanced chunk of an XML document
9123 * called by the parser
9124 * The allowed sequence for the Well Balanced Chunk is the one defined by
9125 * the content production in the XML grammar:
9126 *
9127 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9128 *
9129 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9130 * the parser error code otherwise
9131 */
9132
9133int
9134xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9135 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9136 xmlParserCtxtPtr ctxt;
9137 xmlDocPtr newDoc;
9138 xmlSAXHandlerPtr oldsax = NULL;
9139 int size;
9140 int ret = 0;
9141
9142 if (depth > 40) {
9143 return(XML_ERR_ENTITY_LOOP);
9144 }
9145
9146
9147 if (list != NULL)
9148 *list = NULL;
9149 if (string == NULL)
9150 return(-1);
9151
9152 size = xmlStrlen(string);
9153
9154 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9155 if (ctxt == NULL) return(-1);
9156 ctxt->userData = ctxt;
9157 if (sax != NULL) {
9158 oldsax = ctxt->sax;
9159 ctxt->sax = sax;
9160 if (user_data != NULL)
9161 ctxt->userData = user_data;
9162 }
9163 newDoc = xmlNewDoc(BAD_CAST "1.0");
9164 if (newDoc == NULL) {
9165 xmlFreeParserCtxt(ctxt);
9166 return(-1);
9167 }
9168 if (doc != NULL) {
9169 newDoc->intSubset = doc->intSubset;
9170 newDoc->extSubset = doc->extSubset;
9171 }
9172 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9173 if (newDoc->children == NULL) {
9174 if (sax != NULL)
9175 ctxt->sax = oldsax;
9176 xmlFreeParserCtxt(ctxt);
9177 newDoc->intSubset = NULL;
9178 newDoc->extSubset = NULL;
9179 xmlFreeDoc(newDoc);
9180 return(-1);
9181 }
9182 nodePush(ctxt, newDoc->children);
9183 if (doc == NULL) {
9184 ctxt->myDoc = newDoc;
9185 } else {
9186 ctxt->myDoc = doc;
9187 newDoc->children->doc = doc;
9188 }
9189 ctxt->instate = XML_PARSER_CONTENT;
9190 ctxt->depth = depth;
9191
9192 /*
9193 * Doing validity checking on chunk doesn't make sense
9194 */
9195 ctxt->validate = 0;
9196 ctxt->loadsubset = 0;
9197
9198 xmlParseContent(ctxt);
9199
9200 if ((RAW == '<') && (NXT(1) == '/')) {
9201 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9203 ctxt->sax->error(ctxt->userData,
9204 "chunk is not well balanced\n");
9205 ctxt->wellFormed = 0;
9206 ctxt->disableSAX = 1;
9207 } else if (RAW != 0) {
9208 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9209 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9210 ctxt->sax->error(ctxt->userData,
9211 "extra content at the end of well balanced chunk\n");
9212 ctxt->wellFormed = 0;
9213 ctxt->disableSAX = 1;
9214 }
9215 if (ctxt->node != newDoc->children) {
9216 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9217 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9218 ctxt->sax->error(ctxt->userData,
9219 "chunk is not well balanced\n");
9220 ctxt->wellFormed = 0;
9221 ctxt->disableSAX = 1;
9222 }
9223
9224 if (!ctxt->wellFormed) {
9225 if (ctxt->errNo == 0)
9226 ret = 1;
9227 else
9228 ret = ctxt->errNo;
9229 } else {
9230 if (list != NULL) {
9231 xmlNodePtr cur;
9232
9233 /*
9234 * Return the newly created nodeset after unlinking it from
9235 * they pseudo parent.
9236 */
9237 cur = newDoc->children->children;
9238 *list = cur;
9239 while (cur != NULL) {
9240 cur->parent = NULL;
9241 cur = cur->next;
9242 }
9243 newDoc->children->children = NULL;
9244 }
9245 ret = 0;
9246 }
9247 if (sax != NULL)
9248 ctxt->sax = oldsax;
9249 xmlFreeParserCtxt(ctxt);
9250 newDoc->intSubset = NULL;
9251 newDoc->extSubset = NULL;
9252 xmlFreeDoc(newDoc);
9253
9254 return(ret);
9255}
9256
9257/**
9258 * xmlSAXParseEntity:
9259 * @sax: the SAX handler block
9260 * @filename: the filename
9261 *
9262 * parse an XML external entity out of context and build a tree.
9263 * It use the given SAX function block to handle the parsing callback.
9264 * If sax is NULL, fallback to the default DOM tree building routines.
9265 *
9266 * [78] extParsedEnt ::= TextDecl? content
9267 *
9268 * This correspond to a "Well Balanced" chunk
9269 *
9270 * Returns the resulting document tree
9271 */
9272
9273xmlDocPtr
9274xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9275 xmlDocPtr ret;
9276 xmlParserCtxtPtr ctxt;
9277 char *directory = NULL;
9278
9279 ctxt = xmlCreateFileParserCtxt(filename);
9280 if (ctxt == NULL) {
9281 return(NULL);
9282 }
9283 if (sax != NULL) {
9284 if (ctxt->sax != NULL)
9285 xmlFree(ctxt->sax);
9286 ctxt->sax = sax;
9287 ctxt->userData = NULL;
9288 }
9289
9290 if ((ctxt->directory == NULL) && (directory == NULL))
9291 directory = xmlParserGetDirectory(filename);
9292
9293 xmlParseExtParsedEnt(ctxt);
9294
9295 if (ctxt->wellFormed)
9296 ret = ctxt->myDoc;
9297 else {
9298 ret = NULL;
9299 xmlFreeDoc(ctxt->myDoc);
9300 ctxt->myDoc = NULL;
9301 }
9302 if (sax != NULL)
9303 ctxt->sax = NULL;
9304 xmlFreeParserCtxt(ctxt);
9305
9306 return(ret);
9307}
9308
9309/**
9310 * xmlParseEntity:
9311 * @filename: the filename
9312 *
9313 * parse an XML external entity out of context and build a tree.
9314 *
9315 * [78] extParsedEnt ::= TextDecl? content
9316 *
9317 * This correspond to a "Well Balanced" chunk
9318 *
9319 * Returns the resulting document tree
9320 */
9321
9322xmlDocPtr
9323xmlParseEntity(const char *filename) {
9324 return(xmlSAXParseEntity(NULL, filename));
9325}
9326
9327/**
9328 * xmlCreateEntityParserCtxt:
9329 * @URL: the entity URL
9330 * @ID: the entity PUBLIC ID
9331 * @base: a posible base for the target URI
9332 *
9333 * Create a parser context for an external entity
9334 * Automatic support for ZLIB/Compress compressed document is provided
9335 * by default if found at compile-time.
9336 *
9337 * Returns the new parser context or NULL
9338 */
9339xmlParserCtxtPtr
9340xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9341 const xmlChar *base) {
9342 xmlParserCtxtPtr ctxt;
9343 xmlParserInputPtr inputStream;
9344 char *directory = NULL;
9345 xmlChar *uri;
9346
9347 ctxt = xmlNewParserCtxt();
9348 if (ctxt == NULL) {
9349 return(NULL);
9350 }
9351
9352 uri = xmlBuildURI(URL, base);
9353
9354 if (uri == NULL) {
9355 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9356 if (inputStream == NULL) {
9357 xmlFreeParserCtxt(ctxt);
9358 return(NULL);
9359 }
9360
9361 inputPush(ctxt, inputStream);
9362
9363 if ((ctxt->directory == NULL) && (directory == NULL))
9364 directory = xmlParserGetDirectory((char *)URL);
9365 if ((ctxt->directory == NULL) && (directory != NULL))
9366 ctxt->directory = directory;
9367 } else {
9368 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9369 if (inputStream == NULL) {
9370 xmlFree(uri);
9371 xmlFreeParserCtxt(ctxt);
9372 return(NULL);
9373 }
9374
9375 inputPush(ctxt, inputStream);
9376
9377 if ((ctxt->directory == NULL) && (directory == NULL))
9378 directory = xmlParserGetDirectory((char *)uri);
9379 if ((ctxt->directory == NULL) && (directory != NULL))
9380 ctxt->directory = directory;
9381 xmlFree(uri);
9382 }
9383
9384 return(ctxt);
9385}
9386
9387/************************************************************************
9388 * *
9389 * Front ends when parsing from a file *
9390 * *
9391 ************************************************************************/
9392
9393/**
9394 * xmlCreateFileParserCtxt:
9395 * @filename: the filename
9396 *
9397 * Create a parser context for a file content.
9398 * Automatic support for ZLIB/Compress compressed document is provided
9399 * by default if found at compile-time.
9400 *
9401 * Returns the new parser context or NULL
9402 */
9403xmlParserCtxtPtr
9404xmlCreateFileParserCtxt(const char *filename)
9405{
9406 xmlParserCtxtPtr ctxt;
9407 xmlParserInputPtr inputStream;
9408 xmlParserInputBufferPtr buf;
9409 char *directory = NULL;
9410
9411 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9412 if (buf == NULL) {
9413 return(NULL);
9414 }
9415
9416 ctxt = xmlNewParserCtxt();
9417 if (ctxt == NULL) {
9418 if (xmlDefaultSAXHandler.error != NULL) {
9419 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9420 }
9421 return(NULL);
9422 }
9423
9424 inputStream = xmlNewInputStream(ctxt);
9425 if (inputStream == NULL) {
9426 xmlFreeParserCtxt(ctxt);
9427 return(NULL);
9428 }
9429
9430 inputStream->filename = xmlMemStrdup(filename);
9431 inputStream->buf = buf;
9432 inputStream->base = inputStream->buf->buffer->content;
9433 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009434 inputStream->end =
9435 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009436
9437 inputPush(ctxt, inputStream);
9438 if ((ctxt->directory == NULL) && (directory == NULL))
9439 directory = xmlParserGetDirectory(filename);
9440 if ((ctxt->directory == NULL) && (directory != NULL))
9441 ctxt->directory = directory;
9442
9443 return(ctxt);
9444}
9445
9446/**
9447 * xmlSAXParseFile:
9448 * @sax: the SAX handler block
9449 * @filename: the filename
9450 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9451 * documents
9452 *
9453 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9454 * compressed document is provided by default if found at compile-time.
9455 * It use the given SAX function block to handle the parsing callback.
9456 * If sax is NULL, fallback to the default DOM tree building routines.
9457 *
9458 * Returns the resulting document tree
9459 */
9460
9461xmlDocPtr
9462xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9463 int recovery) {
9464 xmlDocPtr ret;
9465 xmlParserCtxtPtr ctxt;
9466 char *directory = NULL;
9467
9468 ctxt = xmlCreateFileParserCtxt(filename);
9469 if (ctxt == NULL) {
9470 return(NULL);
9471 }
9472 if (sax != NULL) {
9473 if (ctxt->sax != NULL)
9474 xmlFree(ctxt->sax);
9475 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009476 }
9477
9478 if ((ctxt->directory == NULL) && (directory == NULL))
9479 directory = xmlParserGetDirectory(filename);
9480 if ((ctxt->directory == NULL) && (directory != NULL))
9481 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9482
9483 xmlParseDocument(ctxt);
9484
9485 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9486 else {
9487 ret = NULL;
9488 xmlFreeDoc(ctxt->myDoc);
9489 ctxt->myDoc = NULL;
9490 }
9491 if (sax != NULL)
9492 ctxt->sax = NULL;
9493 xmlFreeParserCtxt(ctxt);
9494
9495 return(ret);
9496}
9497
9498/**
9499 * xmlRecoverDoc:
9500 * @cur: a pointer to an array of xmlChar
9501 *
9502 * parse an XML in-memory document and build a tree.
9503 * In the case the document is not Well Formed, a tree is built anyway
9504 *
9505 * Returns the resulting document tree
9506 */
9507
9508xmlDocPtr
9509xmlRecoverDoc(xmlChar *cur) {
9510 return(xmlSAXParseDoc(NULL, cur, 1));
9511}
9512
9513/**
9514 * xmlParseFile:
9515 * @filename: the filename
9516 *
9517 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9518 * compressed document is provided by default if found at compile-time.
9519 *
9520 * Returns the resulting document tree
9521 */
9522
9523xmlDocPtr
9524xmlParseFile(const char *filename) {
9525 return(xmlSAXParseFile(NULL, filename, 0));
9526}
9527
9528/**
9529 * xmlRecoverFile:
9530 * @filename: the filename
9531 *
9532 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9533 * compressed document is provided by default if found at compile-time.
9534 * In the case the document is not Well Formed, a tree is built anyway
9535 *
9536 * Returns the resulting document tree
9537 */
9538
9539xmlDocPtr
9540xmlRecoverFile(const char *filename) {
9541 return(xmlSAXParseFile(NULL, filename, 1));
9542}
9543
9544
9545/**
9546 * xmlSetupParserForBuffer:
9547 * @ctxt: an XML parser context
9548 * @buffer: a xmlChar * buffer
9549 * @filename: a file name
9550 *
9551 * Setup the parser context to parse a new buffer; Clears any prior
9552 * contents from the parser context. The buffer parameter must not be
9553 * NULL, but the filename parameter can be
9554 */
9555void
9556xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9557 const char* filename)
9558{
9559 xmlParserInputPtr input;
9560
9561 input = xmlNewInputStream(ctxt);
9562 if (input == NULL) {
9563 perror("malloc");
9564 xmlFree(ctxt);
9565 return;
9566 }
9567
9568 xmlClearParserCtxt(ctxt);
9569 if (filename != NULL)
9570 input->filename = xmlMemStrdup(filename);
9571 input->base = buffer;
9572 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009573 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009574 inputPush(ctxt, input);
9575}
9576
9577/**
9578 * xmlSAXUserParseFile:
9579 * @sax: a SAX handler
9580 * @user_data: The user data returned on SAX callbacks
9581 * @filename: a file name
9582 *
9583 * parse an XML file and call the given SAX handler routines.
9584 * Automatic support for ZLIB/Compress compressed document is provided
9585 *
9586 * Returns 0 in case of success or a error number otherwise
9587 */
9588int
9589xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9590 const char *filename) {
9591 int ret = 0;
9592 xmlParserCtxtPtr ctxt;
9593
9594 ctxt = xmlCreateFileParserCtxt(filename);
9595 if (ctxt == NULL) return -1;
9596 if (ctxt->sax != &xmlDefaultSAXHandler)
9597 xmlFree(ctxt->sax);
9598 ctxt->sax = sax;
9599 if (user_data != NULL)
9600 ctxt->userData = user_data;
9601
9602 xmlParseDocument(ctxt);
9603
9604 if (ctxt->wellFormed)
9605 ret = 0;
9606 else {
9607 if (ctxt->errNo != 0)
9608 ret = ctxt->errNo;
9609 else
9610 ret = -1;
9611 }
9612 if (sax != NULL)
9613 ctxt->sax = NULL;
9614 xmlFreeParserCtxt(ctxt);
9615
9616 return ret;
9617}
9618
9619/************************************************************************
9620 * *
9621 * Front ends when parsing from memory *
9622 * *
9623 ************************************************************************/
9624
9625/**
9626 * xmlCreateMemoryParserCtxt:
9627 * @buffer: a pointer to a char array
9628 * @size: the size of the array
9629 *
9630 * Create a parser context for an XML in-memory document.
9631 *
9632 * Returns the new parser context or NULL
9633 */
9634xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009635xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009636 xmlParserCtxtPtr ctxt;
9637 xmlParserInputPtr input;
9638 xmlParserInputBufferPtr buf;
9639
9640 if (buffer == NULL)
9641 return(NULL);
9642 if (size <= 0)
9643 return(NULL);
9644
9645 ctxt = xmlNewParserCtxt();
9646 if (ctxt == NULL)
9647 return(NULL);
9648
9649 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9650 if (buf == NULL) return(NULL);
9651
9652 input = xmlNewInputStream(ctxt);
9653 if (input == NULL) {
9654 xmlFreeParserCtxt(ctxt);
9655 return(NULL);
9656 }
9657
9658 input->filename = NULL;
9659 input->buf = buf;
9660 input->base = input->buf->buffer->content;
9661 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009662 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009663
9664 inputPush(ctxt, input);
9665 return(ctxt);
9666}
9667
9668/**
9669 * xmlSAXParseMemory:
9670 * @sax: the SAX handler block
9671 * @buffer: an pointer to a char array
9672 * @size: the size of the array
9673 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9674 * documents
9675 *
9676 * parse an XML in-memory block and use the given SAX function block
9677 * to handle the parsing callback. If sax is NULL, fallback to the default
9678 * DOM tree building routines.
9679 *
9680 * Returns the resulting document tree
9681 */
9682xmlDocPtr
9683xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9684 xmlDocPtr ret;
9685 xmlParserCtxtPtr ctxt;
9686
9687 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9688 if (ctxt == NULL) return(NULL);
9689 if (sax != NULL) {
9690 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009691 }
9692
9693 xmlParseDocument(ctxt);
9694
9695 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9696 else {
9697 ret = NULL;
9698 xmlFreeDoc(ctxt->myDoc);
9699 ctxt->myDoc = NULL;
9700 }
9701 if (sax != NULL)
9702 ctxt->sax = NULL;
9703 xmlFreeParserCtxt(ctxt);
9704
9705 return(ret);
9706}
9707
9708/**
9709 * xmlParseMemory:
9710 * @buffer: an pointer to a char array
9711 * @size: the size of the array
9712 *
9713 * parse an XML in-memory block and build a tree.
9714 *
9715 * Returns the resulting document tree
9716 */
9717
9718xmlDocPtr xmlParseMemory(char *buffer, int size) {
9719 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9720}
9721
9722/**
9723 * xmlRecoverMemory:
9724 * @buffer: an pointer to a char array
9725 * @size: the size of the array
9726 *
9727 * parse an XML in-memory block and build a tree.
9728 * In the case the document is not Well Formed, a tree is built anyway
9729 *
9730 * Returns the resulting document tree
9731 */
9732
9733xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9734 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9735}
9736
9737/**
9738 * xmlSAXUserParseMemory:
9739 * @sax: a SAX handler
9740 * @user_data: The user data returned on SAX callbacks
9741 * @buffer: an in-memory XML document input
9742 * @size: the length of the XML document in bytes
9743 *
9744 * A better SAX parsing routine.
9745 * parse an XML in-memory buffer and call the given SAX handler routines.
9746 *
9747 * Returns 0 in case of success or a error number otherwise
9748 */
9749int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009750 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009751 int ret = 0;
9752 xmlParserCtxtPtr ctxt;
9753 xmlSAXHandlerPtr oldsax = NULL;
9754
9755 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9756 if (ctxt == NULL) return -1;
9757 if (sax != NULL) {
9758 oldsax = ctxt->sax;
9759 ctxt->sax = sax;
9760 }
Daniel Veillard30211a02001-04-26 09:33:18 +00009761 if (user_data != NULL)
9762 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00009763
9764 xmlParseDocument(ctxt);
9765
9766 if (ctxt->wellFormed)
9767 ret = 0;
9768 else {
9769 if (ctxt->errNo != 0)
9770 ret = ctxt->errNo;
9771 else
9772 ret = -1;
9773 }
9774 if (sax != NULL) {
9775 ctxt->sax = oldsax;
9776 }
9777 xmlFreeParserCtxt(ctxt);
9778
9779 return ret;
9780}
9781
9782/**
9783 * xmlCreateDocParserCtxt:
9784 * @cur: a pointer to an array of xmlChar
9785 *
9786 * Creates a parser context for an XML in-memory document.
9787 *
9788 * Returns the new parser context or NULL
9789 */
9790xmlParserCtxtPtr
9791xmlCreateDocParserCtxt(xmlChar *cur) {
9792 int len;
9793
9794 if (cur == NULL)
9795 return(NULL);
9796 len = xmlStrlen(cur);
9797 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9798}
9799
9800/**
9801 * xmlSAXParseDoc:
9802 * @sax: the SAX handler block
9803 * @cur: a pointer to an array of xmlChar
9804 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9805 * documents
9806 *
9807 * parse an XML in-memory document and build a tree.
9808 * It use the given SAX function block to handle the parsing callback.
9809 * If sax is NULL, fallback to the default DOM tree building routines.
9810 *
9811 * Returns the resulting document tree
9812 */
9813
9814xmlDocPtr
9815xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9816 xmlDocPtr ret;
9817 xmlParserCtxtPtr ctxt;
9818
9819 if (cur == NULL) return(NULL);
9820
9821
9822 ctxt = xmlCreateDocParserCtxt(cur);
9823 if (ctxt == NULL) return(NULL);
9824 if (sax != NULL) {
9825 ctxt->sax = sax;
9826 ctxt->userData = NULL;
9827 }
9828
9829 xmlParseDocument(ctxt);
9830 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9831 else {
9832 ret = NULL;
9833 xmlFreeDoc(ctxt->myDoc);
9834 ctxt->myDoc = NULL;
9835 }
9836 if (sax != NULL)
9837 ctxt->sax = NULL;
9838 xmlFreeParserCtxt(ctxt);
9839
9840 return(ret);
9841}
9842
9843/**
9844 * xmlParseDoc:
9845 * @cur: a pointer to an array of xmlChar
9846 *
9847 * parse an XML in-memory document and build a tree.
9848 *
9849 * Returns the resulting document tree
9850 */
9851
9852xmlDocPtr
9853xmlParseDoc(xmlChar *cur) {
9854 return(xmlSAXParseDoc(NULL, cur, 0));
9855}
9856
9857
9858/************************************************************************
9859 * *
9860 * Miscellaneous *
9861 * *
9862 ************************************************************************/
9863
9864#ifdef LIBXML_XPATH_ENABLED
9865#include <libxml/xpath.h>
9866#endif
9867
9868static int xmlParserInitialized = 0;
9869
9870/**
9871 * xmlInitParser:
9872 *
9873 * Initialization function for the XML parser.
9874 * This is not reentrant. Call once before processing in case of
9875 * use in multithreaded programs.
9876 */
9877
9878void
9879xmlInitParser(void) {
9880 if (xmlParserInitialized) return;
9881
9882 xmlInitCharEncodingHandlers();
9883 xmlInitializePredefinedEntities();
9884 xmlDefaultSAXHandlerInit();
9885 xmlRegisterDefaultInputCallbacks();
9886 xmlRegisterDefaultOutputCallbacks();
9887#ifdef LIBXML_HTML_ENABLED
9888 htmlInitAutoClose();
9889 htmlDefaultSAXHandlerInit();
9890#endif
9891#ifdef LIBXML_XPATH_ENABLED
9892 xmlXPathInit();
9893#endif
9894 xmlParserInitialized = 1;
9895}
9896
9897/**
9898 * xmlCleanupParser:
9899 *
9900 * Cleanup function for the XML parser. It tries to reclaim all
9901 * parsing related global memory allocated for the parser processing.
9902 * It doesn't deallocate any document related memory. Calling this
9903 * function should not prevent reusing the parser.
9904 */
9905
9906void
9907xmlCleanupParser(void) {
9908 xmlParserInitialized = 0;
9909 xmlCleanupCharEncodingHandlers();
9910 xmlCleanupPredefinedEntities();
9911}
9912
9913/**
9914 * xmlPedanticParserDefault:
9915 * @val: int 0 or 1
9916 *
9917 * Set and return the previous value for enabling pedantic warnings.
9918 *
9919 * Returns the last value for 0 for no substitution, 1 for substitution.
9920 */
9921
9922int
9923xmlPedanticParserDefault(int val) {
9924 int old = xmlPedanticParserDefaultValue;
9925
9926 xmlPedanticParserDefaultValue = val;
9927 return(old);
9928}
9929
9930/**
9931 * xmlSubstituteEntitiesDefault:
9932 * @val: int 0 or 1
9933 *
9934 * Set and return the previous value for default entity support.
9935 * Initially the parser always keep entity references instead of substituting
9936 * entity values in the output. This function has to be used to change the
9937 * default parser behaviour
9938 * SAX::subtituteEntities() has to be used for changing that on a file by
9939 * file basis.
9940 *
9941 * Returns the last value for 0 for no substitution, 1 for substitution.
9942 */
9943
9944int
9945xmlSubstituteEntitiesDefault(int val) {
9946 int old = xmlSubstituteEntitiesDefaultValue;
9947
9948 xmlSubstituteEntitiesDefaultValue = val;
9949 return(old);
9950}
9951
9952/**
9953 * xmlKeepBlanksDefault:
9954 * @val: int 0 or 1
9955 *
9956 * Set and return the previous value for default blanks text nodes support.
9957 * The 1.x version of the parser used an heuristic to try to detect
9958 * ignorable white spaces. As a result the SAX callback was generating
9959 * ignorableWhitespace() callbacks instead of characters() one, and when
9960 * using the DOM output text nodes containing those blanks were not generated.
9961 * The 2.x and later version will switch to the XML standard way and
9962 * ignorableWhitespace() are only generated when running the parser in
9963 * validating mode and when the current element doesn't allow CDATA or
9964 * mixed content.
9965 * This function is provided as a way to force the standard behaviour
9966 * on 1.X libs and to switch back to the old mode for compatibility when
9967 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9968 * by using xmlIsBlankNode() commodity function to detect the "empty"
9969 * nodes generated.
9970 * This value also affect autogeneration of indentation when saving code
9971 * if blanks sections are kept, indentation is not generated.
9972 *
9973 * Returns the last value for 0 for no substitution, 1 for substitution.
9974 */
9975
9976int
9977xmlKeepBlanksDefault(int val) {
9978 int old = xmlKeepBlanksDefaultValue;
9979
9980 xmlKeepBlanksDefaultValue = val;
9981 xmlIndentTreeOutput = !val;
9982 return(old);
9983}
9984