blob: 9aa091cddb2dcf8913299253486f9788195bc7c6 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Owen Taylor3473f882001-02-23 17:55:21 +000035#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
44#include <libxml/tree.h>
45#include <libxml/parser.h>
46#include <libxml/parserInternals.h>
47#include <libxml/valid.h>
48#include <libxml/entities.h>
49#include <libxml/xmlerror.h>
50#include <libxml/encoding.h>
51#include <libxml/xmlIO.h>
52#include <libxml/uri.h>
53
54#ifdef HAVE_CTYPE_H
55#include <ctype.h>
56#endif
57#ifdef HAVE_STDLIB_H
58#include <stdlib.h>
59#endif
60#ifdef HAVE_SYS_STAT_H
61#include <sys/stat.h>
62#endif
63#ifdef HAVE_FCNTL_H
64#include <fcntl.h>
65#endif
66#ifdef HAVE_UNISTD_H
67#include <unistd.h>
68#endif
69#ifdef HAVE_ZLIB_H
70#include <zlib.h>
71#endif
72
73
Daniel Veillard21a0f912001-02-25 19:54:14 +000074#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000075#define XML_PARSER_BUFFER_SIZE 100
76
77/*
78 * Various global defaults for parsing
79 */
Owen Taylor3473f882001-02-23 17:55:21 +000080int xmlParserDebugEntities = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000081
82/*
83 * List of XML prefixed PI allowed by W3C specs
84 */
85
86const char *xmlW3CPIs[] = {
87 "xml-stylesheet",
88 NULL
89};
90
91/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
92void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
93xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
94 const xmlChar **str);
95
Daniel Veillard257d9102001-05-08 10:41:44 +000096static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000097xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
98 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000099 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000100 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000101
102/************************************************************************
103 * *
104 * Parser stacks related functions and macros *
105 * *
106 ************************************************************************/
107
108xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
109 const xmlChar ** str);
110
111/*
112 * Generic function for accessing stacks in the Parser Context
113 */
114
115#define PUSH_AND_POP(scope, type, name) \
116scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
117 if (ctxt->name##Nr >= ctxt->name##Max) { \
118 ctxt->name##Max *= 2; \
119 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
120 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
121 if (ctxt->name##Tab == NULL) { \
122 xmlGenericError(xmlGenericErrorContext, \
123 "realloc failed !\n"); \
124 return(0); \
125 } \
126 } \
127 ctxt->name##Tab[ctxt->name##Nr] = value; \
128 ctxt->name = value; \
129 return(ctxt->name##Nr++); \
130} \
131scope type name##Pop(xmlParserCtxtPtr ctxt) { \
132 type ret; \
133 if (ctxt->name##Nr <= 0) return(0); \
134 ctxt->name##Nr--; \
135 if (ctxt->name##Nr > 0) \
136 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
137 else \
138 ctxt->name = NULL; \
139 ret = ctxt->name##Tab[ctxt->name##Nr]; \
140 ctxt->name##Tab[ctxt->name##Nr] = 0; \
141 return(ret); \
142} \
143
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000144/**
145 * inputPop:
146 * @ctxt: an XML parser context
147 *
148 * Pops the top parser input from the input stack
149 *
150 * Returns the input just removed
151 */
152/**
153 * inputPush:
154 * @ctxt: an XML parser context
155 * @input: the parser input
156 *
157 * Pushes a new parser input on top of the input stack
158 */
159/**
160 * namePop:
161 * @ctxt: an XML parser context
162 *
163 * Pops the top element name from the name stack
164 *
165 * Returns the name just removed
166 */
167/**
168 * namePush:
169 * @ctxt: an XML parser context
170 * @name: the element name
171 *
172 * Pushes a new element name on top of the name stack
173 */
174/**
175 * nodePop:
176 * @ctxt: an XML parser context
177 *
178 * Pops the top element node from the node stack
179 *
180 * Returns the node just removed
181 */
182/**
183 * nodePush:
184 * @ctxt: an XML parser context
185 * @node: the element node
186 *
187 * Pushes a new element node on top of the node stack
188 */
Owen Taylor3473f882001-02-23 17:55:21 +0000189/*
190 * Those macros actually generate the functions
191 */
192PUSH_AND_POP(extern, xmlParserInputPtr, input)
193PUSH_AND_POP(extern, xmlNodePtr, node)
194PUSH_AND_POP(extern, xmlChar*, name)
195
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000196static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000197 if (ctxt->spaceNr >= ctxt->spaceMax) {
198 ctxt->spaceMax *= 2;
199 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
200 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
201 if (ctxt->spaceTab == NULL) {
202 xmlGenericError(xmlGenericErrorContext,
203 "realloc failed !\n");
204 return(0);
205 }
206 }
207 ctxt->spaceTab[ctxt->spaceNr] = val;
208 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
209 return(ctxt->spaceNr++);
210}
211
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000212static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000213 int ret;
214 if (ctxt->spaceNr <= 0) return(0);
215 ctxt->spaceNr--;
216 if (ctxt->spaceNr > 0)
217 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
218 else
219 ctxt->space = NULL;
220 ret = ctxt->spaceTab[ctxt->spaceNr];
221 ctxt->spaceTab[ctxt->spaceNr] = -1;
222 return(ret);
223}
224
225/*
226 * Macros for accessing the content. Those should be used only by the parser,
227 * and not exported.
228 *
229 * Dirty macros, i.e. one often need to make assumption on the context to
230 * use them
231 *
232 * CUR_PTR return the current pointer to the xmlChar to be parsed.
233 * To be used with extreme caution since operations consuming
234 * characters may move the input buffer to a different location !
235 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
236 * This should be used internally by the parser
237 * only to compare to ASCII values otherwise it would break when
238 * running with UTF-8 encoding.
239 * RAW same as CUR but in the input buffer, bypass any token
240 * extraction that may have been done
241 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
242 * to compare on ASCII based substring.
243 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
244 * strings within the parser.
245 *
246 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
247 *
248 * NEXT Skip to the next character, this does the proper decoding
249 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
250 * NEXTL(l) Skip l xmlChars in the input buffer
251 * CUR_CHAR(l) returns the current unicode character (int), set l
252 * to the number of xmlChars used for the encoding [0-5].
253 * CUR_SCHAR same but operate on a string instead of the context
254 * COPY_BUF copy the current unicode char to the target buffer, increment
255 * the index
256 * GROW, SHRINK handling of input buffers
257 */
258
259#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
260#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
261#define NXT(val) ctxt->input->cur[(val)]
262#define CUR_PTR ctxt->input->cur
263
264#define SKIP(val) do { \
265 ctxt->nbChars += (val),ctxt->input->cur += (val); \
266 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000267 if ((*ctxt->input->cur == 0) && \
268 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
269 xmlPopInput(ctxt); \
270 } while (0)
271
Daniel Veillard48b2f892001-02-25 16:11:03 +0000272#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000273 xmlParserInputShrink(ctxt->input); \
274 if ((*ctxt->input->cur == 0) && \
275 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
276 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000277 }
Owen Taylor3473f882001-02-23 17:55:21 +0000278
Daniel Veillard48b2f892001-02-25 16:11:03 +0000279#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000280 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
281 if ((*ctxt->input->cur == 0) && \
282 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
283 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000284 }
Owen Taylor3473f882001-02-23 17:55:21 +0000285
286#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
287
288#define NEXT xmlNextChar(ctxt)
289
Daniel Veillard21a0f912001-02-25 19:54:14 +0000290#define NEXT1 { \
291 ctxt->input->cur++; \
292 ctxt->nbChars++; \
293 if (*ctxt->input->cur == 0) \
294 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
295 }
296
Owen Taylor3473f882001-02-23 17:55:21 +0000297#define NEXTL(l) do { \
298 if (*(ctxt->input->cur) == '\n') { \
299 ctxt->input->line++; ctxt->input->col = 1; \
300 } else ctxt->input->col++; \
301 ctxt->token = 0; ctxt->input->cur += l; \
302 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000303 } while (0)
304
305#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
306#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
307
308#define COPY_BUF(l,b,i,v) \
309 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000311
312/**
313 * xmlSkipBlankChars:
314 * @ctxt: the XML parser context
315 *
316 * skip all blanks character found at that point in the input streams.
317 * It pops up finished entities in the process if allowable at that point.
318 *
319 * Returns the number of space chars skipped
320 */
321
322int
323xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000324 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000325
Daniel Veillard02141ea2001-04-30 11:46:40 +0000326 if (ctxt->token != 0) {
327 if (!IS_BLANK(ctxt->token))
328 return(0);
329 ctxt->token = 0;
330 res++;
331 }
Owen Taylor3473f882001-02-23 17:55:21 +0000332 /*
333 * It's Okay to use CUR/NEXT here since all the blanks are on
334 * the ASCII range.
335 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000336 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
337 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000338 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000339 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000340 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000341 cur = ctxt->input->cur;
342 while (IS_BLANK(*cur)) {
343 if (*cur == '\n') {
344 ctxt->input->line++; ctxt->input->col = 1;
345 }
346 cur++;
347 res++;
348 if (*cur == 0) {
349 ctxt->input->cur = cur;
350 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
351 cur = ctxt->input->cur;
352 }
353 }
354 ctxt->input->cur = cur;
355 } else {
356 int cur;
357 do {
358 cur = CUR;
359 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
360 NEXT;
361 cur = CUR;
362 res++;
363 }
364 while ((cur == 0) && (ctxt->inputNr > 1) &&
365 (ctxt->instate != XML_PARSER_COMMENT)) {
366 xmlPopInput(ctxt);
367 cur = CUR;
368 }
369 /*
370 * Need to handle support of entities branching here
371 */
372 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
373 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
374 }
Owen Taylor3473f882001-02-23 17:55:21 +0000375 return(res);
376}
377
378/************************************************************************
379 * *
380 * Commodity functions to handle entities *
381 * *
382 ************************************************************************/
383
384/**
385 * xmlPopInput:
386 * @ctxt: an XML parser context
387 *
388 * xmlPopInput: the current input pointed by ctxt->input came to an end
389 * pop it and return the next char.
390 *
391 * Returns the current xmlChar in the parser context
392 */
393xmlChar
394xmlPopInput(xmlParserCtxtPtr ctxt) {
395 if (ctxt->inputNr == 1) return(0); /* End of main Input */
396 if (xmlParserDebugEntities)
397 xmlGenericError(xmlGenericErrorContext,
398 "Popping input %d\n", ctxt->inputNr);
399 xmlFreeInputStream(inputPop(ctxt));
400 if ((*ctxt->input->cur == 0) &&
401 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
402 return(xmlPopInput(ctxt));
403 return(CUR);
404}
405
406/**
407 * xmlPushInput:
408 * @ctxt: an XML parser context
409 * @input: an XML parser input fragment (entity, XML fragment ...).
410 *
411 * xmlPushInput: switch to a new input stream which is stacked on top
412 * of the previous one(s).
413 */
414void
415xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
416 if (input == NULL) return;
417
418 if (xmlParserDebugEntities) {
419 if ((ctxt->input != NULL) && (ctxt->input->filename))
420 xmlGenericError(xmlGenericErrorContext,
421 "%s(%d): ", ctxt->input->filename,
422 ctxt->input->line);
423 xmlGenericError(xmlGenericErrorContext,
424 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
425 }
426 inputPush(ctxt, input);
427 GROW;
428}
429
430/**
431 * xmlParseCharRef:
432 * @ctxt: an XML parser context
433 *
434 * parse Reference declarations
435 *
436 * [66] CharRef ::= '&#' [0-9]+ ';' |
437 * '&#x' [0-9a-fA-F]+ ';'
438 *
439 * [ WFC: Legal Character ]
440 * Characters referred to using character references must match the
441 * production for Char.
442 *
443 * Returns the value parsed (as an int), 0 in case of error
444 */
445int
446xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000447 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000448 int count = 0;
449
450 if (ctxt->token != 0) {
451 val = ctxt->token;
452 ctxt->token = 0;
453 return(val);
454 }
455 /*
456 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
457 */
458 if ((RAW == '&') && (NXT(1) == '#') &&
459 (NXT(2) == 'x')) {
460 SKIP(3);
461 GROW;
462 while (RAW != ';') { /* loop blocked by count */
463 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
464 val = val * 16 + (CUR - '0');
465 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
466 val = val * 16 + (CUR - 'a') + 10;
467 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
468 val = val * 16 + (CUR - 'A') + 10;
469 else {
470 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
472 ctxt->sax->error(ctxt->userData,
473 "xmlParseCharRef: invalid hexadecimal value\n");
474 ctxt->wellFormed = 0;
475 ctxt->disableSAX = 1;
476 val = 0;
477 break;
478 }
479 NEXT;
480 count++;
481 }
482 if (RAW == ';') {
483 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
484 ctxt->nbChars ++;
485 ctxt->input->cur++;
486 }
487 } else if ((RAW == '&') && (NXT(1) == '#')) {
488 SKIP(2);
489 GROW;
490 while (RAW != ';') { /* loop blocked by count */
491 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
492 val = val * 10 + (CUR - '0');
493 else {
494 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
495 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
496 ctxt->sax->error(ctxt->userData,
497 "xmlParseCharRef: invalid decimal value\n");
498 ctxt->wellFormed = 0;
499 ctxt->disableSAX = 1;
500 val = 0;
501 break;
502 }
503 NEXT;
504 count++;
505 }
506 if (RAW == ';') {
507 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
508 ctxt->nbChars ++;
509 ctxt->input->cur++;
510 }
511 } else {
512 ctxt->errNo = XML_ERR_INVALID_CHARREF;
513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
514 ctxt->sax->error(ctxt->userData,
515 "xmlParseCharRef: invalid value\n");
516 ctxt->wellFormed = 0;
517 ctxt->disableSAX = 1;
518 }
519
520 /*
521 * [ WFC: Legal Character ]
522 * Characters referred to using character references must match the
523 * production for Char.
524 */
525 if (IS_CHAR(val)) {
526 return(val);
527 } else {
528 ctxt->errNo = XML_ERR_INVALID_CHAR;
529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
530 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
531 val);
532 ctxt->wellFormed = 0;
533 ctxt->disableSAX = 1;
534 }
535 return(0);
536}
537
538/**
539 * xmlParseStringCharRef:
540 * @ctxt: an XML parser context
541 * @str: a pointer to an index in the string
542 *
543 * parse Reference declarations, variant parsing from a string rather
544 * than an an input flow.
545 *
546 * [66] CharRef ::= '&#' [0-9]+ ';' |
547 * '&#x' [0-9a-fA-F]+ ';'
548 *
549 * [ WFC: Legal Character ]
550 * Characters referred to using character references must match the
551 * production for Char.
552 *
553 * Returns the value parsed (as an int), 0 in case of error, str will be
554 * updated to the current value of the index
555 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000556static int
Owen Taylor3473f882001-02-23 17:55:21 +0000557xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
558 const xmlChar *ptr;
559 xmlChar cur;
560 int val = 0;
561
562 if ((str == NULL) || (*str == NULL)) return(0);
563 ptr = *str;
564 cur = *ptr;
565 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
566 ptr += 3;
567 cur = *ptr;
568 while (cur != ';') { /* Non input consuming loop */
569 if ((cur >= '0') && (cur <= '9'))
570 val = val * 16 + (cur - '0');
571 else if ((cur >= 'a') && (cur <= 'f'))
572 val = val * 16 + (cur - 'a') + 10;
573 else if ((cur >= 'A') && (cur <= 'F'))
574 val = val * 16 + (cur - 'A') + 10;
575 else {
576 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
578 ctxt->sax->error(ctxt->userData,
579 "xmlParseStringCharRef: invalid hexadecimal value\n");
580 ctxt->wellFormed = 0;
581 ctxt->disableSAX = 1;
582 val = 0;
583 break;
584 }
585 ptr++;
586 cur = *ptr;
587 }
588 if (cur == ';')
589 ptr++;
590 } else if ((cur == '&') && (ptr[1] == '#')){
591 ptr += 2;
592 cur = *ptr;
593 while (cur != ';') { /* Non input consuming loops */
594 if ((cur >= '0') && (cur <= '9'))
595 val = val * 10 + (cur - '0');
596 else {
597 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
599 ctxt->sax->error(ctxt->userData,
600 "xmlParseStringCharRef: invalid decimal value\n");
601 ctxt->wellFormed = 0;
602 ctxt->disableSAX = 1;
603 val = 0;
604 break;
605 }
606 ptr++;
607 cur = *ptr;
608 }
609 if (cur == ';')
610 ptr++;
611 } else {
612 ctxt->errNo = XML_ERR_INVALID_CHARREF;
613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
614 ctxt->sax->error(ctxt->userData,
615 "xmlParseCharRef: invalid value\n");
616 ctxt->wellFormed = 0;
617 ctxt->disableSAX = 1;
618 return(0);
619 }
620 *str = ptr;
621
622 /*
623 * [ WFC: Legal Character ]
624 * Characters referred to using character references must match the
625 * production for Char.
626 */
627 if (IS_CHAR(val)) {
628 return(val);
629 } else {
630 ctxt->errNo = XML_ERR_INVALID_CHAR;
631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
632 ctxt->sax->error(ctxt->userData,
633 "CharRef: invalid xmlChar value %d\n", val);
634 ctxt->wellFormed = 0;
635 ctxt->disableSAX = 1;
636 }
637 return(0);
638}
639
640/**
641 * xmlParserHandlePEReference:
642 * @ctxt: the parser context
643 *
644 * [69] PEReference ::= '%' Name ';'
645 *
646 * [ WFC: No Recursion ]
647 * A parsed entity must not contain a recursive
648 * reference to itself, either directly or indirectly.
649 *
650 * [ WFC: Entity Declared ]
651 * In a document without any DTD, a document with only an internal DTD
652 * subset which contains no parameter entity references, or a document
653 * with "standalone='yes'", ... ... The declaration of a parameter
654 * entity must precede any reference to it...
655 *
656 * [ VC: Entity Declared ]
657 * In a document with an external subset or external parameter entities
658 * with "standalone='no'", ... ... The declaration of a parameter entity
659 * must precede any reference to it...
660 *
661 * [ WFC: In DTD ]
662 * Parameter-entity references may only appear in the DTD.
663 * NOTE: misleading but this is handled.
664 *
665 * A PEReference may have been detected in the current input stream
666 * the handling is done accordingly to
667 * http://www.w3.org/TR/REC-xml#entproc
668 * i.e.
669 * - Included in literal in entity values
670 * - Included as Paraemeter Entity reference within DTDs
671 */
672void
673xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
674 xmlChar *name;
675 xmlEntityPtr entity = NULL;
676 xmlParserInputPtr input;
677
678 if (ctxt->token != 0) {
679 return;
680 }
681 if (RAW != '%') return;
682 switch(ctxt->instate) {
683 case XML_PARSER_CDATA_SECTION:
684 return;
685 case XML_PARSER_COMMENT:
686 return;
687 case XML_PARSER_START_TAG:
688 return;
689 case XML_PARSER_END_TAG:
690 return;
691 case XML_PARSER_EOF:
692 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
694 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
695 ctxt->wellFormed = 0;
696 ctxt->disableSAX = 1;
697 return;
698 case XML_PARSER_PROLOG:
699 case XML_PARSER_START:
700 case XML_PARSER_MISC:
701 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
703 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
704 ctxt->wellFormed = 0;
705 ctxt->disableSAX = 1;
706 return;
707 case XML_PARSER_ENTITY_DECL:
708 case XML_PARSER_CONTENT:
709 case XML_PARSER_ATTRIBUTE_VALUE:
710 case XML_PARSER_PI:
711 case XML_PARSER_SYSTEM_LITERAL:
712 /* we just ignore it there */
713 return;
714 case XML_PARSER_EPILOG:
715 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
717 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
718 ctxt->wellFormed = 0;
719 ctxt->disableSAX = 1;
720 return;
721 case XML_PARSER_ENTITY_VALUE:
722 /*
723 * NOTE: in the case of entity values, we don't do the
724 * substitution here since we need the literal
725 * entity value to be able to save the internal
726 * subset of the document.
727 * This will be handled by xmlStringDecodeEntities
728 */
729 return;
730 case XML_PARSER_DTD:
731 /*
732 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
733 * In the internal DTD subset, parameter-entity references
734 * can occur only where markup declarations can occur, not
735 * within markup declarations.
736 * In that case this is handled in xmlParseMarkupDecl
737 */
738 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
739 return;
740 break;
741 case XML_PARSER_IGNORE:
742 return;
743 }
744
745 NEXT;
746 name = xmlParseName(ctxt);
747 if (xmlParserDebugEntities)
748 xmlGenericError(xmlGenericErrorContext,
749 "PE Reference: %s\n", name);
750 if (name == NULL) {
751 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
753 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
754 ctxt->wellFormed = 0;
755 ctxt->disableSAX = 1;
756 } else {
757 if (RAW == ';') {
758 NEXT;
759 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
760 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
761 if (entity == NULL) {
762
763 /*
764 * [ WFC: Entity Declared ]
765 * In a document without any DTD, a document with only an
766 * internal DTD subset which contains no parameter entity
767 * references, or a document with "standalone='yes'", ...
768 * ... The declaration of a parameter entity must precede
769 * any reference to it...
770 */
771 if ((ctxt->standalone == 1) ||
772 ((ctxt->hasExternalSubset == 0) &&
773 (ctxt->hasPErefs == 0))) {
774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
775 ctxt->sax->error(ctxt->userData,
776 "PEReference: %%%s; not found\n", name);
777 ctxt->wellFormed = 0;
778 ctxt->disableSAX = 1;
779 } else {
780 /*
781 * [ VC: Entity Declared ]
782 * In a document with an external subset or external
783 * parameter entities with "standalone='no'", ...
784 * ... The declaration of a parameter entity must precede
785 * any reference to it...
786 */
787 if ((!ctxt->disableSAX) &&
788 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
789 ctxt->vctxt.error(ctxt->vctxt.userData,
790 "PEReference: %%%s; not found\n", name);
791 } else if ((!ctxt->disableSAX) &&
792 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
793 ctxt->sax->warning(ctxt->userData,
794 "PEReference: %%%s; not found\n", name);
795 ctxt->valid = 0;
796 }
797 } else {
798 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
799 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000800 xmlChar start[4];
801 xmlCharEncoding enc;
802
Owen Taylor3473f882001-02-23 17:55:21 +0000803 /*
804 * handle the extra spaces added before and after
805 * c.f. http://www.w3.org/TR/REC-xml#as-PE
806 * this is done independantly.
807 */
808 input = xmlNewEntityInputStream(ctxt, entity);
809 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000810
811 /*
812 * Get the 4 first bytes and decode the charset
813 * if enc != XML_CHAR_ENCODING_NONE
814 * plug some encoding conversion routines.
815 */
816 GROW
817 start[0] = RAW;
818 start[1] = NXT(1);
819 start[2] = NXT(2);
820 start[3] = NXT(3);
821 enc = xmlDetectCharEncoding(start, 4);
822 if (enc != XML_CHAR_ENCODING_NONE) {
823 xmlSwitchEncoding(ctxt, enc);
824 }
825
Owen Taylor3473f882001-02-23 17:55:21 +0000826 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
827 (RAW == '<') && (NXT(1) == '?') &&
828 (NXT(2) == 'x') && (NXT(3) == 'm') &&
829 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
830 xmlParseTextDecl(ctxt);
831 }
832 if (ctxt->token == 0)
833 ctxt->token = ' ';
834 } else {
835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
836 ctxt->sax->error(ctxt->userData,
837 "xmlHandlePEReference: %s is not a parameter entity\n",
838 name);
839 ctxt->wellFormed = 0;
840 ctxt->disableSAX = 1;
841 }
842 }
843 } else {
844 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
846 ctxt->sax->error(ctxt->userData,
847 "xmlHandlePEReference: expecting ';'\n");
848 ctxt->wellFormed = 0;
849 ctxt->disableSAX = 1;
850 }
851 xmlFree(name);
852 }
853}
854
855/*
856 * Macro used to grow the current buffer.
857 */
858#define growBuffer(buffer) { \
859 buffer##_size *= 2; \
860 buffer = (xmlChar *) \
861 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
862 if (buffer == NULL) { \
863 perror("realloc failed"); \
864 return(NULL); \
865 } \
866}
867
868/**
869 * xmlStringDecodeEntities:
870 * @ctxt: the parser context
871 * @str: the input string
872 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
873 * @end: an end marker xmlChar, 0 if none
874 * @end2: an end marker xmlChar, 0 if none
875 * @end3: an end marker xmlChar, 0 if none
876 *
877 * Takes a entity string content and process to do the adequate subtitutions.
878 *
879 * [67] Reference ::= EntityRef | CharRef
880 *
881 * [69] PEReference ::= '%' Name ';'
882 *
883 * Returns A newly allocated string with the substitution done. The caller
884 * must deallocate it !
885 */
886xmlChar *
887xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
888 xmlChar end, xmlChar end2, xmlChar end3) {
889 xmlChar *buffer = NULL;
890 int buffer_size = 0;
891
892 xmlChar *current = NULL;
893 xmlEntityPtr ent;
894 int c,l;
895 int nbchars = 0;
896
897 if (str == NULL)
898 return(NULL);
899
900 if (ctxt->depth > 40) {
901 ctxt->errNo = XML_ERR_ENTITY_LOOP;
902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
903 ctxt->sax->error(ctxt->userData,
904 "Detected entity reference loop\n");
905 ctxt->wellFormed = 0;
906 ctxt->disableSAX = 1;
907 return(NULL);
908 }
909
910 /*
911 * allocate a translation buffer.
912 */
913 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
914 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
915 if (buffer == NULL) {
916 perror("xmlDecodeEntities: malloc failed");
917 return(NULL);
918 }
919
920 /*
921 * Ok loop until we reach one of the ending char or a size limit.
922 * we are operating on already parsed values.
923 */
924 c = CUR_SCHAR(str, l);
925 while ((c != 0) && (c != end) && /* non input consuming loop */
926 (c != end2) && (c != end3)) {
927
928 if (c == 0) break;
929 if ((c == '&') && (str[1] == '#')) {
930 int val = xmlParseStringCharRef(ctxt, &str);
931 if (val != 0) {
932 COPY_BUF(0,buffer,nbchars,val);
933 }
934 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
935 if (xmlParserDebugEntities)
936 xmlGenericError(xmlGenericErrorContext,
937 "String decoding Entity Reference: %.30s\n",
938 str);
939 ent = xmlParseStringEntityRef(ctxt, &str);
940 if ((ent != NULL) &&
941 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
942 if (ent->content != NULL) {
943 COPY_BUF(0,buffer,nbchars,ent->content[0]);
944 } else {
945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
946 ctxt->sax->error(ctxt->userData,
947 "internal error entity has no content\n");
948 }
949 } else if ((ent != NULL) && (ent->content != NULL)) {
950 xmlChar *rep;
951
952 ctxt->depth++;
953 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
954 0, 0, 0);
955 ctxt->depth--;
956 if (rep != NULL) {
957 current = rep;
958 while (*current != 0) { /* non input consuming loop */
959 buffer[nbchars++] = *current++;
960 if (nbchars >
961 buffer_size - XML_PARSER_BUFFER_SIZE) {
962 growBuffer(buffer);
963 }
964 }
965 xmlFree(rep);
966 }
967 } else if (ent != NULL) {
968 int i = xmlStrlen(ent->name);
969 const xmlChar *cur = ent->name;
970
971 buffer[nbchars++] = '&';
972 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
973 growBuffer(buffer);
974 }
975 for (;i > 0;i--)
976 buffer[nbchars++] = *cur++;
977 buffer[nbchars++] = ';';
978 }
979 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
980 if (xmlParserDebugEntities)
981 xmlGenericError(xmlGenericErrorContext,
982 "String decoding PE Reference: %.30s\n", str);
983 ent = xmlParseStringPEReference(ctxt, &str);
984 if (ent != NULL) {
985 xmlChar *rep;
986
987 ctxt->depth++;
988 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
989 0, 0, 0);
990 ctxt->depth--;
991 if (rep != NULL) {
992 current = rep;
993 while (*current != 0) { /* non input consuming loop */
994 buffer[nbchars++] = *current++;
995 if (nbchars >
996 buffer_size - XML_PARSER_BUFFER_SIZE) {
997 growBuffer(buffer);
998 }
999 }
1000 xmlFree(rep);
1001 }
1002 }
1003 } else {
1004 COPY_BUF(l,buffer,nbchars,c);
1005 str += l;
1006 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1007 growBuffer(buffer);
1008 }
1009 }
1010 c = CUR_SCHAR(str, l);
1011 }
1012 buffer[nbchars++] = 0;
1013 return(buffer);
1014}
1015
1016
1017/************************************************************************
1018 * *
1019 * Commodity functions to handle xmlChars *
1020 * *
1021 ************************************************************************/
1022
1023/**
1024 * xmlStrndup:
1025 * @cur: the input xmlChar *
1026 * @len: the len of @cur
1027 *
1028 * a strndup for array of xmlChar's
1029 *
1030 * Returns a new xmlChar * or NULL
1031 */
1032xmlChar *
1033xmlStrndup(const xmlChar *cur, int len) {
1034 xmlChar *ret;
1035
1036 if ((cur == NULL) || (len < 0)) return(NULL);
1037 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1038 if (ret == NULL) {
1039 xmlGenericError(xmlGenericErrorContext,
1040 "malloc of %ld byte failed\n",
1041 (len + 1) * (long)sizeof(xmlChar));
1042 return(NULL);
1043 }
1044 memcpy(ret, cur, len * sizeof(xmlChar));
1045 ret[len] = 0;
1046 return(ret);
1047}
1048
1049/**
1050 * xmlStrdup:
1051 * @cur: the input xmlChar *
1052 *
1053 * a strdup for array of xmlChar's. Since they are supposed to be
1054 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1055 * a termination mark of '0'.
1056 *
1057 * Returns a new xmlChar * or NULL
1058 */
1059xmlChar *
1060xmlStrdup(const xmlChar *cur) {
1061 const xmlChar *p = cur;
1062
1063 if (cur == NULL) return(NULL);
1064 while (*p != 0) p++; /* non input consuming */
1065 return(xmlStrndup(cur, p - cur));
1066}
1067
1068/**
1069 * xmlCharStrndup:
1070 * @cur: the input char *
1071 * @len: the len of @cur
1072 *
1073 * a strndup for char's to xmlChar's
1074 *
1075 * Returns a new xmlChar * or NULL
1076 */
1077
1078xmlChar *
1079xmlCharStrndup(const char *cur, int len) {
1080 int i;
1081 xmlChar *ret;
1082
1083 if ((cur == NULL) || (len < 0)) return(NULL);
1084 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1085 if (ret == NULL) {
1086 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1087 (len + 1) * (long)sizeof(xmlChar));
1088 return(NULL);
1089 }
1090 for (i = 0;i < len;i++)
1091 ret[i] = (xmlChar) cur[i];
1092 ret[len] = 0;
1093 return(ret);
1094}
1095
1096/**
1097 * xmlCharStrdup:
1098 * @cur: the input char *
1099 * @len: the len of @cur
1100 *
1101 * a strdup for char's to xmlChar's
1102 *
1103 * Returns a new xmlChar * or NULL
1104 */
1105
1106xmlChar *
1107xmlCharStrdup(const char *cur) {
1108 const char *p = cur;
1109
1110 if (cur == NULL) return(NULL);
1111 while (*p != '\0') p++; /* non input consuming */
1112 return(xmlCharStrndup(cur, p - cur));
1113}
1114
1115/**
1116 * xmlStrcmp:
1117 * @str1: the first xmlChar *
1118 * @str2: the second xmlChar *
1119 *
1120 * a strcmp for xmlChar's
1121 *
1122 * Returns the integer result of the comparison
1123 */
1124
1125int
1126xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1127 register int tmp;
1128
1129 if (str1 == str2) return(0);
1130 if (str1 == NULL) return(-1);
1131 if (str2 == NULL) return(1);
1132 do {
1133 tmp = *str1++ - *str2;
1134 if (tmp != 0) return(tmp);
1135 } while (*str2++ != 0);
1136 return 0;
1137}
1138
1139/**
1140 * xmlStrEqual:
1141 * @str1: the first xmlChar *
1142 * @str2: the second xmlChar *
1143 *
1144 * Check if both string are equal of have same content
1145 * Should be a bit more readable and faster than xmlStrEqual()
1146 *
1147 * Returns 1 if they are equal, 0 if they are different
1148 */
1149
1150int
1151xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1152 if (str1 == str2) return(1);
1153 if (str1 == NULL) return(0);
1154 if (str2 == NULL) return(0);
1155 do {
1156 if (*str1++ != *str2) return(0);
1157 } while (*str2++);
1158 return(1);
1159}
1160
1161/**
1162 * xmlStrncmp:
1163 * @str1: the first xmlChar *
1164 * @str2: the second xmlChar *
1165 * @len: the max comparison length
1166 *
1167 * a strncmp for xmlChar's
1168 *
1169 * Returns the integer result of the comparison
1170 */
1171
1172int
1173xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1174 register int tmp;
1175
1176 if (len <= 0) return(0);
1177 if (str1 == str2) return(0);
1178 if (str1 == NULL) return(-1);
1179 if (str2 == NULL) return(1);
1180 do {
1181 tmp = *str1++ - *str2;
1182 if (tmp != 0 || --len == 0) return(tmp);
1183 } while (*str2++ != 0);
1184 return 0;
1185}
1186
1187static xmlChar casemap[256] = {
1188 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1189 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1190 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1191 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1192 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1193 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1194 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1195 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1196 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1197 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1198 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1199 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1200 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1201 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1202 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1203 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1204 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1205 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1206 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1207 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1208 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1209 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1210 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1211 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1212 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1213 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1214 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1215 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1216 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1217 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1218 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1219 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1220};
1221
1222/**
1223 * xmlStrcasecmp:
1224 * @str1: the first xmlChar *
1225 * @str2: the second xmlChar *
1226 *
1227 * a strcasecmp for xmlChar's
1228 *
1229 * Returns the integer result of the comparison
1230 */
1231
1232int
1233xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1234 register int tmp;
1235
1236 if (str1 == str2) return(0);
1237 if (str1 == NULL) return(-1);
1238 if (str2 == NULL) return(1);
1239 do {
1240 tmp = casemap[*str1++] - casemap[*str2];
1241 if (tmp != 0) return(tmp);
1242 } while (*str2++ != 0);
1243 return 0;
1244}
1245
1246/**
1247 * xmlStrncasecmp:
1248 * @str1: the first xmlChar *
1249 * @str2: the second xmlChar *
1250 * @len: the max comparison length
1251 *
1252 * a strncasecmp for xmlChar's
1253 *
1254 * Returns the integer result of the comparison
1255 */
1256
1257int
1258xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1259 register int tmp;
1260
1261 if (len <= 0) return(0);
1262 if (str1 == str2) return(0);
1263 if (str1 == NULL) return(-1);
1264 if (str2 == NULL) return(1);
1265 do {
1266 tmp = casemap[*str1++] - casemap[*str2];
1267 if (tmp != 0 || --len == 0) return(tmp);
1268 } while (*str2++ != 0);
1269 return 0;
1270}
1271
1272/**
1273 * xmlStrchr:
1274 * @str: the xmlChar * array
1275 * @val: the xmlChar to search
1276 *
1277 * a strchr for xmlChar's
1278 *
1279 * Returns the xmlChar * for the first occurence or NULL.
1280 */
1281
1282const xmlChar *
1283xmlStrchr(const xmlChar *str, xmlChar val) {
1284 if (str == NULL) return(NULL);
1285 while (*str != 0) { /* non input consuming */
1286 if (*str == val) return((xmlChar *) str);
1287 str++;
1288 }
1289 return(NULL);
1290}
1291
1292/**
1293 * xmlStrstr:
1294 * @str: the xmlChar * array (haystack)
1295 * @val: the xmlChar to search (needle)
1296 *
1297 * a strstr for xmlChar's
1298 *
1299 * Returns the xmlChar * for the first occurence or NULL.
1300 */
1301
1302const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001303xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001304 int n;
1305
1306 if (str == NULL) return(NULL);
1307 if (val == NULL) return(NULL);
1308 n = xmlStrlen(val);
1309
1310 if (n == 0) return(str);
1311 while (*str != 0) { /* non input consuming */
1312 if (*str == *val) {
1313 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1314 }
1315 str++;
1316 }
1317 return(NULL);
1318}
1319
1320/**
1321 * xmlStrcasestr:
1322 * @str: the xmlChar * array (haystack)
1323 * @val: the xmlChar to search (needle)
1324 *
1325 * a case-ignoring strstr for xmlChar's
1326 *
1327 * Returns the xmlChar * for the first occurence or NULL.
1328 */
1329
1330const xmlChar *
1331xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1332 int n;
1333
1334 if (str == NULL) return(NULL);
1335 if (val == NULL) return(NULL);
1336 n = xmlStrlen(val);
1337
1338 if (n == 0) return(str);
1339 while (*str != 0) { /* non input consuming */
1340 if (casemap[*str] == casemap[*val])
1341 if (!xmlStrncasecmp(str, val, n)) return(str);
1342 str++;
1343 }
1344 return(NULL);
1345}
1346
1347/**
1348 * xmlStrsub:
1349 * @str: the xmlChar * array (haystack)
1350 * @start: the index of the first char (zero based)
1351 * @len: the length of the substring
1352 *
1353 * Extract a substring of a given string
1354 *
1355 * Returns the xmlChar * for the first occurence or NULL.
1356 */
1357
1358xmlChar *
1359xmlStrsub(const xmlChar *str, int start, int len) {
1360 int i;
1361
1362 if (str == NULL) return(NULL);
1363 if (start < 0) return(NULL);
1364 if (len < 0) return(NULL);
1365
1366 for (i = 0;i < start;i++) {
1367 if (*str == 0) return(NULL);
1368 str++;
1369 }
1370 if (*str == 0) return(NULL);
1371 return(xmlStrndup(str, len));
1372}
1373
1374/**
1375 * xmlStrlen:
1376 * @str: the xmlChar * array
1377 *
1378 * length of a xmlChar's string
1379 *
1380 * Returns the number of xmlChar contained in the ARRAY.
1381 */
1382
1383int
1384xmlStrlen(const xmlChar *str) {
1385 int len = 0;
1386
1387 if (str == NULL) return(0);
1388 while (*str != 0) { /* non input consuming */
1389 str++;
1390 len++;
1391 }
1392 return(len);
1393}
1394
1395/**
1396 * xmlStrncat:
1397 * @cur: the original xmlChar * array
1398 * @add: the xmlChar * array added
1399 * @len: the length of @add
1400 *
1401 * a strncat for array of xmlChar's, it will extend cur with the len
1402 * first bytes of @add.
1403 *
1404 * Returns a new xmlChar *, the original @cur is reallocated if needed
1405 * and should not be freed
1406 */
1407
1408xmlChar *
1409xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1410 int size;
1411 xmlChar *ret;
1412
1413 if ((add == NULL) || (len == 0))
1414 return(cur);
1415 if (cur == NULL)
1416 return(xmlStrndup(add, len));
1417
1418 size = xmlStrlen(cur);
1419 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1420 if (ret == NULL) {
1421 xmlGenericError(xmlGenericErrorContext,
1422 "xmlStrncat: realloc of %ld byte failed\n",
1423 (size + len + 1) * (long)sizeof(xmlChar));
1424 return(cur);
1425 }
1426 memcpy(&ret[size], add, len * sizeof(xmlChar));
1427 ret[size + len] = 0;
1428 return(ret);
1429}
1430
1431/**
1432 * xmlStrcat:
1433 * @cur: the original xmlChar * array
1434 * @add: the xmlChar * array added
1435 *
1436 * a strcat for array of xmlChar's. Since they are supposed to be
1437 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1438 * a termination mark of '0'.
1439 *
1440 * Returns a new xmlChar * containing the concatenated string.
1441 */
1442xmlChar *
1443xmlStrcat(xmlChar *cur, const xmlChar *add) {
1444 const xmlChar *p = add;
1445
1446 if (add == NULL) return(cur);
1447 if (cur == NULL)
1448 return(xmlStrdup(add));
1449
1450 while (*p != 0) p++; /* non input consuming */
1451 return(xmlStrncat(cur, add, p - add));
1452}
1453
1454/************************************************************************
1455 * *
1456 * Commodity functions, cleanup needed ? *
1457 * *
1458 ************************************************************************/
1459
1460/**
1461 * areBlanks:
1462 * @ctxt: an XML parser context
1463 * @str: a xmlChar *
1464 * @len: the size of @str
1465 *
1466 * Is this a sequence of blank chars that one can ignore ?
1467 *
1468 * Returns 1 if ignorable 0 otherwise.
1469 */
1470
1471static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1472 int i, ret;
1473 xmlNodePtr lastChild;
1474
Daniel Veillard2f362242001-03-02 17:36:21 +00001475 if (ctxt->keepBlanks)
1476 return(0);
1477
Owen Taylor3473f882001-02-23 17:55:21 +00001478 /*
1479 * Check for xml:space value.
1480 */
1481 if (*(ctxt->space) == 1)
1482 return(0);
1483
1484 /*
1485 * Check that the string is made of blanks
1486 */
1487 for (i = 0;i < len;i++)
1488 if (!(IS_BLANK(str[i]))) return(0);
1489
1490 /*
1491 * Look if the element is mixed content in the Dtd if available
1492 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001493 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001494 if (ctxt->myDoc != NULL) {
1495 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1496 if (ret == 0) return(1);
1497 if (ret == 1) return(0);
1498 }
1499
1500 /*
1501 * Otherwise, heuristic :-\
1502 */
Owen Taylor3473f882001-02-23 17:55:21 +00001503 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001504 if ((ctxt->node->children == NULL) &&
1505 (RAW == '<') && (NXT(1) == '/')) return(0);
1506
1507 lastChild = xmlGetLastChild(ctxt->node);
1508 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001509 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1510 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001511 } else if (xmlNodeIsText(lastChild))
1512 return(0);
1513 else if ((ctxt->node->children != NULL) &&
1514 (xmlNodeIsText(ctxt->node->children)))
1515 return(0);
1516 return(1);
1517}
1518
1519/*
1520 * Forward definition for recusive behaviour.
1521 */
1522void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1523void xmlParseReference(xmlParserCtxtPtr ctxt);
1524
1525/************************************************************************
1526 * *
1527 * Extra stuff for namespace support *
1528 * Relates to http://www.w3.org/TR/WD-xml-names *
1529 * *
1530 ************************************************************************/
1531
1532/**
1533 * xmlSplitQName:
1534 * @ctxt: an XML parser context
1535 * @name: an XML parser context
1536 * @prefix: a xmlChar **
1537 *
1538 * parse an UTF8 encoded XML qualified name string
1539 *
1540 * [NS 5] QName ::= (Prefix ':')? LocalPart
1541 *
1542 * [NS 6] Prefix ::= NCName
1543 *
1544 * [NS 7] LocalPart ::= NCName
1545 *
1546 * Returns the local part, and prefix is updated
1547 * to get the Prefix if any.
1548 */
1549
1550xmlChar *
1551xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1552 xmlChar buf[XML_MAX_NAMELEN + 5];
1553 xmlChar *buffer = NULL;
1554 int len = 0;
1555 int max = XML_MAX_NAMELEN;
1556 xmlChar *ret = NULL;
1557 const xmlChar *cur = name;
1558 int c;
1559
1560 *prefix = NULL;
1561
1562#ifndef XML_XML_NAMESPACE
1563 /* xml: prefix is not really a namespace */
1564 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1565 (cur[2] == 'l') && (cur[3] == ':'))
1566 return(xmlStrdup(name));
1567#endif
1568
1569 /* nasty but valid */
1570 if (cur[0] == ':')
1571 return(xmlStrdup(name));
1572
1573 c = *cur++;
1574 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1575 buf[len++] = c;
1576 c = *cur++;
1577 }
1578 if (len >= max) {
1579 /*
1580 * Okay someone managed to make a huge name, so he's ready to pay
1581 * for the processing speed.
1582 */
1583 max = len * 2;
1584
1585 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1586 if (buffer == NULL) {
1587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1588 ctxt->sax->error(ctxt->userData,
1589 "xmlSplitQName: out of memory\n");
1590 return(NULL);
1591 }
1592 memcpy(buffer, buf, len);
1593 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1594 if (len + 10 > max) {
1595 max *= 2;
1596 buffer = (xmlChar *) xmlRealloc(buffer,
1597 max * sizeof(xmlChar));
1598 if (buffer == NULL) {
1599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1600 ctxt->sax->error(ctxt->userData,
1601 "xmlSplitQName: out of memory\n");
1602 return(NULL);
1603 }
1604 }
1605 buffer[len++] = c;
1606 c = *cur++;
1607 }
1608 buffer[len] = 0;
1609 }
1610
1611 if (buffer == NULL)
1612 ret = xmlStrndup(buf, len);
1613 else {
1614 ret = buffer;
1615 buffer = NULL;
1616 max = XML_MAX_NAMELEN;
1617 }
1618
1619
1620 if (c == ':') {
1621 c = *cur++;
1622 if (c == 0) return(ret);
1623 *prefix = ret;
1624 len = 0;
1625
1626 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1627 buf[len++] = c;
1628 c = *cur++;
1629 }
1630 if (len >= max) {
1631 /*
1632 * Okay someone managed to make a huge name, so he's ready to pay
1633 * for the processing speed.
1634 */
1635 max = len * 2;
1636
1637 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1638 if (buffer == NULL) {
1639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1640 ctxt->sax->error(ctxt->userData,
1641 "xmlSplitQName: out of memory\n");
1642 return(NULL);
1643 }
1644 memcpy(buffer, buf, len);
1645 while (c != 0) { /* tested bigname2.xml */
1646 if (len + 10 > max) {
1647 max *= 2;
1648 buffer = (xmlChar *) xmlRealloc(buffer,
1649 max * sizeof(xmlChar));
1650 if (buffer == NULL) {
1651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1652 ctxt->sax->error(ctxt->userData,
1653 "xmlSplitQName: out of memory\n");
1654 return(NULL);
1655 }
1656 }
1657 buffer[len++] = c;
1658 c = *cur++;
1659 }
1660 buffer[len] = 0;
1661 }
1662
1663 if (buffer == NULL)
1664 ret = xmlStrndup(buf, len);
1665 else {
1666 ret = buffer;
1667 }
1668 }
1669
1670 return(ret);
1671}
1672
1673/************************************************************************
1674 * *
1675 * The parser itself *
1676 * Relates to http://www.w3.org/TR/REC-xml *
1677 * *
1678 ************************************************************************/
1679
Daniel Veillard76d66f42001-05-16 21:05:17 +00001680static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001681/**
1682 * xmlParseName:
1683 * @ctxt: an XML parser context
1684 *
1685 * parse an XML name.
1686 *
1687 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1688 * CombiningChar | Extender
1689 *
1690 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1691 *
1692 * [6] Names ::= Name (S Name)*
1693 *
1694 * Returns the Name parsed or NULL
1695 */
1696
1697xmlChar *
1698xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001699 const xmlChar *in;
1700 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001701 int count = 0;
1702
1703 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001704
1705 /*
1706 * Accelerator for simple ASCII names
1707 */
1708 in = ctxt->input->cur;
1709 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1710 ((*in >= 0x41) && (*in <= 0x5A)) ||
1711 (*in == '_') || (*in == ':')) {
1712 in++;
1713 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1714 ((*in >= 0x41) && (*in <= 0x5A)) ||
1715 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001716 (*in == '_') || (*in == '-') ||
1717 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001718 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001719 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001720 count = in - ctxt->input->cur;
1721 ret = xmlStrndup(ctxt->input->cur, count);
1722 ctxt->input->cur = in;
1723 return(ret);
1724 }
1725 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001726 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001727}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001728
Daniel Veillard76d66f42001-05-16 21:05:17 +00001729static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001730xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1731 xmlChar buf[XML_MAX_NAMELEN + 5];
1732 int len = 0, l;
1733 int c;
1734 int count = 0;
1735
1736 /*
1737 * Handler for more complex cases
1738 */
1739 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001740 c = CUR_CHAR(l);
1741 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1742 (!IS_LETTER(c) && (c != '_') &&
1743 (c != ':'))) {
1744 return(NULL);
1745 }
1746
1747 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1748 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1749 (c == '.') || (c == '-') ||
1750 (c == '_') || (c == ':') ||
1751 (IS_COMBINING(c)) ||
1752 (IS_EXTENDER(c)))) {
1753 if (count++ > 100) {
1754 count = 0;
1755 GROW;
1756 }
1757 COPY_BUF(l,buf,len,c);
1758 NEXTL(l);
1759 c = CUR_CHAR(l);
1760 if (len >= XML_MAX_NAMELEN) {
1761 /*
1762 * Okay someone managed to make a huge name, so he's ready to pay
1763 * for the processing speed.
1764 */
1765 xmlChar *buffer;
1766 int max = len * 2;
1767
1768 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1769 if (buffer == NULL) {
1770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1771 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001772 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001773 return(NULL);
1774 }
1775 memcpy(buffer, buf, len);
1776 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1777 (c == '.') || (c == '-') ||
1778 (c == '_') || (c == ':') ||
1779 (IS_COMBINING(c)) ||
1780 (IS_EXTENDER(c))) {
1781 if (count++ > 100) {
1782 count = 0;
1783 GROW;
1784 }
1785 if (len + 10 > max) {
1786 max *= 2;
1787 buffer = (xmlChar *) xmlRealloc(buffer,
1788 max * sizeof(xmlChar));
1789 if (buffer == NULL) {
1790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1791 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001792 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001793 return(NULL);
1794 }
1795 }
1796 COPY_BUF(l,buffer,len,c);
1797 NEXTL(l);
1798 c = CUR_CHAR(l);
1799 }
1800 buffer[len] = 0;
1801 return(buffer);
1802 }
1803 }
1804 return(xmlStrndup(buf, len));
1805}
1806
1807/**
1808 * xmlParseStringName:
1809 * @ctxt: an XML parser context
1810 * @str: a pointer to the string pointer (IN/OUT)
1811 *
1812 * parse an XML name.
1813 *
1814 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1815 * CombiningChar | Extender
1816 *
1817 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1818 *
1819 * [6] Names ::= Name (S Name)*
1820 *
1821 * Returns the Name parsed or NULL. The str pointer
1822 * is updated to the current location in the string.
1823 */
1824
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001825static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001826xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1827 xmlChar buf[XML_MAX_NAMELEN + 5];
1828 const xmlChar *cur = *str;
1829 int len = 0, l;
1830 int c;
1831
1832 c = CUR_SCHAR(cur, l);
1833 if (!IS_LETTER(c) && (c != '_') &&
1834 (c != ':')) {
1835 return(NULL);
1836 }
1837
1838 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1839 (c == '.') || (c == '-') ||
1840 (c == '_') || (c == ':') ||
1841 (IS_COMBINING(c)) ||
1842 (IS_EXTENDER(c))) {
1843 COPY_BUF(l,buf,len,c);
1844 cur += l;
1845 c = CUR_SCHAR(cur, l);
1846 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1847 /*
1848 * Okay someone managed to make a huge name, so he's ready to pay
1849 * for the processing speed.
1850 */
1851 xmlChar *buffer;
1852 int max = len * 2;
1853
1854 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1855 if (buffer == NULL) {
1856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1857 ctxt->sax->error(ctxt->userData,
1858 "xmlParseStringName: out of memory\n");
1859 return(NULL);
1860 }
1861 memcpy(buffer, buf, len);
1862 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1863 (c == '.') || (c == '-') ||
1864 (c == '_') || (c == ':') ||
1865 (IS_COMBINING(c)) ||
1866 (IS_EXTENDER(c))) {
1867 if (len + 10 > max) {
1868 max *= 2;
1869 buffer = (xmlChar *) xmlRealloc(buffer,
1870 max * sizeof(xmlChar));
1871 if (buffer == NULL) {
1872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1873 ctxt->sax->error(ctxt->userData,
1874 "xmlParseStringName: out of memory\n");
1875 return(NULL);
1876 }
1877 }
1878 COPY_BUF(l,buffer,len,c);
1879 cur += l;
1880 c = CUR_SCHAR(cur, l);
1881 }
1882 buffer[len] = 0;
1883 *str = cur;
1884 return(buffer);
1885 }
1886 }
1887 *str = cur;
1888 return(xmlStrndup(buf, len));
1889}
1890
1891/**
1892 * xmlParseNmtoken:
1893 * @ctxt: an XML parser context
1894 *
1895 * parse an XML Nmtoken.
1896 *
1897 * [7] Nmtoken ::= (NameChar)+
1898 *
1899 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1900 *
1901 * Returns the Nmtoken parsed or NULL
1902 */
1903
1904xmlChar *
1905xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1906 xmlChar buf[XML_MAX_NAMELEN + 5];
1907 int len = 0, l;
1908 int c;
1909 int count = 0;
1910
1911 GROW;
1912 c = CUR_CHAR(l);
1913
1914 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1915 (c == '.') || (c == '-') ||
1916 (c == '_') || (c == ':') ||
1917 (IS_COMBINING(c)) ||
1918 (IS_EXTENDER(c))) {
1919 if (count++ > 100) {
1920 count = 0;
1921 GROW;
1922 }
1923 COPY_BUF(l,buf,len,c);
1924 NEXTL(l);
1925 c = CUR_CHAR(l);
1926 if (len >= XML_MAX_NAMELEN) {
1927 /*
1928 * Okay someone managed to make a huge token, so he's ready to pay
1929 * for the processing speed.
1930 */
1931 xmlChar *buffer;
1932 int max = len * 2;
1933
1934 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1935 if (buffer == NULL) {
1936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1937 ctxt->sax->error(ctxt->userData,
1938 "xmlParseNmtoken: out of memory\n");
1939 return(NULL);
1940 }
1941 memcpy(buffer, buf, len);
1942 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1943 (c == '.') || (c == '-') ||
1944 (c == '_') || (c == ':') ||
1945 (IS_COMBINING(c)) ||
1946 (IS_EXTENDER(c))) {
1947 if (count++ > 100) {
1948 count = 0;
1949 GROW;
1950 }
1951 if (len + 10 > max) {
1952 max *= 2;
1953 buffer = (xmlChar *) xmlRealloc(buffer,
1954 max * sizeof(xmlChar));
1955 if (buffer == NULL) {
1956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1957 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001958 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001959 return(NULL);
1960 }
1961 }
1962 COPY_BUF(l,buffer,len,c);
1963 NEXTL(l);
1964 c = CUR_CHAR(l);
1965 }
1966 buffer[len] = 0;
1967 return(buffer);
1968 }
1969 }
1970 if (len == 0)
1971 return(NULL);
1972 return(xmlStrndup(buf, len));
1973}
1974
1975/**
1976 * xmlParseEntityValue:
1977 * @ctxt: an XML parser context
1978 * @orig: if non-NULL store a copy of the original entity value
1979 *
1980 * parse a value for ENTITY declarations
1981 *
1982 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1983 * "'" ([^%&'] | PEReference | Reference)* "'"
1984 *
1985 * Returns the EntityValue parsed with reference substitued or NULL
1986 */
1987
1988xmlChar *
1989xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1990 xmlChar *buf = NULL;
1991 int len = 0;
1992 int size = XML_PARSER_BUFFER_SIZE;
1993 int c, l;
1994 xmlChar stop;
1995 xmlChar *ret = NULL;
1996 const xmlChar *cur = NULL;
1997 xmlParserInputPtr input;
1998
1999 if (RAW == '"') stop = '"';
2000 else if (RAW == '\'') stop = '\'';
2001 else {
2002 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2004 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2005 ctxt->wellFormed = 0;
2006 ctxt->disableSAX = 1;
2007 return(NULL);
2008 }
2009 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2010 if (buf == NULL) {
2011 xmlGenericError(xmlGenericErrorContext,
2012 "malloc of %d byte failed\n", size);
2013 return(NULL);
2014 }
2015
2016 /*
2017 * The content of the entity definition is copied in a buffer.
2018 */
2019
2020 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2021 input = ctxt->input;
2022 GROW;
2023 NEXT;
2024 c = CUR_CHAR(l);
2025 /*
2026 * NOTE: 4.4.5 Included in Literal
2027 * When a parameter entity reference appears in a literal entity
2028 * value, ... a single or double quote character in the replacement
2029 * text is always treated as a normal data character and will not
2030 * terminate the literal.
2031 * In practice it means we stop the loop only when back at parsing
2032 * the initial entity and the quote is found
2033 */
2034 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2035 (ctxt->input != input))) {
2036 if (len + 5 >= size) {
2037 size *= 2;
2038 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2039 if (buf == NULL) {
2040 xmlGenericError(xmlGenericErrorContext,
2041 "realloc of %d byte failed\n", size);
2042 return(NULL);
2043 }
2044 }
2045 COPY_BUF(l,buf,len,c);
2046 NEXTL(l);
2047 /*
2048 * Pop-up of finished entities.
2049 */
2050 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2051 xmlPopInput(ctxt);
2052
2053 GROW;
2054 c = CUR_CHAR(l);
2055 if (c == 0) {
2056 GROW;
2057 c = CUR_CHAR(l);
2058 }
2059 }
2060 buf[len] = 0;
2061
2062 /*
2063 * Raise problem w.r.t. '&' and '%' being used in non-entities
2064 * reference constructs. Note Charref will be handled in
2065 * xmlStringDecodeEntities()
2066 */
2067 cur = buf;
2068 while (*cur != 0) { /* non input consuming */
2069 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2070 xmlChar *name;
2071 xmlChar tmp = *cur;
2072
2073 cur++;
2074 name = xmlParseStringName(ctxt, &cur);
2075 if ((name == NULL) || (*cur != ';')) {
2076 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2077 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2078 ctxt->sax->error(ctxt->userData,
2079 "EntityValue: '%c' forbidden except for entities references\n",
2080 tmp);
2081 ctxt->wellFormed = 0;
2082 ctxt->disableSAX = 1;
2083 }
2084 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2085 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2086 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2087 ctxt->sax->error(ctxt->userData,
2088 "EntityValue: PEReferences forbidden in internal subset\n",
2089 tmp);
2090 ctxt->wellFormed = 0;
2091 ctxt->disableSAX = 1;
2092 }
2093 if (name != NULL)
2094 xmlFree(name);
2095 }
2096 cur++;
2097 }
2098
2099 /*
2100 * Then PEReference entities are substituted.
2101 */
2102 if (c != stop) {
2103 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2105 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2106 ctxt->wellFormed = 0;
2107 ctxt->disableSAX = 1;
2108 xmlFree(buf);
2109 } else {
2110 NEXT;
2111 /*
2112 * NOTE: 4.4.7 Bypassed
2113 * When a general entity reference appears in the EntityValue in
2114 * an entity declaration, it is bypassed and left as is.
2115 * so XML_SUBSTITUTE_REF is not set here.
2116 */
2117 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2118 0, 0, 0);
2119 if (orig != NULL)
2120 *orig = buf;
2121 else
2122 xmlFree(buf);
2123 }
2124
2125 return(ret);
2126}
2127
2128/**
2129 * xmlParseAttValue:
2130 * @ctxt: an XML parser context
2131 *
2132 * parse a value for an attribute
2133 * Note: the parser won't do substitution of entities here, this
2134 * will be handled later in xmlStringGetNodeList
2135 *
2136 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2137 * "'" ([^<&'] | Reference)* "'"
2138 *
2139 * 3.3.3 Attribute-Value Normalization:
2140 * Before the value of an attribute is passed to the application or
2141 * checked for validity, the XML processor must normalize it as follows:
2142 * - a character reference is processed by appending the referenced
2143 * character to the attribute value
2144 * - an entity reference is processed by recursively processing the
2145 * replacement text of the entity
2146 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2147 * appending #x20 to the normalized value, except that only a single
2148 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2149 * parsed entity or the literal entity value of an internal parsed entity
2150 * - other characters are processed by appending them to the normalized value
2151 * If the declared value is not CDATA, then the XML processor must further
2152 * process the normalized attribute value by discarding any leading and
2153 * trailing space (#x20) characters, and by replacing sequences of space
2154 * (#x20) characters by a single space (#x20) character.
2155 * All attributes for which no declaration has been read should be treated
2156 * by a non-validating parser as if declared CDATA.
2157 *
2158 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2159 */
2160
2161xmlChar *
2162xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2163 xmlChar limit = 0;
2164 xmlChar *buf = NULL;
2165 int len = 0;
2166 int buf_size = 0;
2167 int c, l;
2168 xmlChar *current = NULL;
2169 xmlEntityPtr ent;
2170
2171
2172 SHRINK;
2173 if (NXT(0) == '"') {
2174 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2175 limit = '"';
2176 NEXT;
2177 } else if (NXT(0) == '\'') {
2178 limit = '\'';
2179 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2180 NEXT;
2181 } else {
2182 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2184 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2185 ctxt->wellFormed = 0;
2186 ctxt->disableSAX = 1;
2187 return(NULL);
2188 }
2189
2190 /*
2191 * allocate a translation buffer.
2192 */
2193 buf_size = XML_PARSER_BUFFER_SIZE;
2194 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2195 if (buf == NULL) {
2196 perror("xmlParseAttValue: malloc failed");
2197 return(NULL);
2198 }
2199
2200 /*
2201 * Ok loop until we reach one of the ending char or a size limit.
2202 */
2203 c = CUR_CHAR(l);
2204 while (((NXT(0) != limit) && /* checked */
2205 (c != '<')) || (ctxt->token != 0)) {
2206 if (c == 0) break;
2207 if (ctxt->token == '&') {
2208 /*
2209 * The reparsing will be done in xmlStringGetNodeList()
2210 * called by the attribute() function in SAX.c
2211 */
2212 static xmlChar buffer[6] = "&#38;";
2213
2214 if (len > buf_size - 10) {
2215 growBuffer(buf);
2216 }
2217 current = &buffer[0];
2218 while (*current != 0) { /* non input consuming */
2219 buf[len++] = *current++;
2220 }
2221 ctxt->token = 0;
2222 } else if (c == '&') {
2223 if (NXT(1) == '#') {
2224 int val = xmlParseCharRef(ctxt);
2225 if (val == '&') {
2226 /*
2227 * The reparsing will be done in xmlStringGetNodeList()
2228 * called by the attribute() function in SAX.c
2229 */
2230 static xmlChar buffer[6] = "&#38;";
2231
2232 if (len > buf_size - 10) {
2233 growBuffer(buf);
2234 }
2235 current = &buffer[0];
2236 while (*current != 0) { /* non input consuming */
2237 buf[len++] = *current++;
2238 }
2239 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002240 if (len > buf_size - 10) {
2241 growBuffer(buf);
2242 }
Owen Taylor3473f882001-02-23 17:55:21 +00002243 len += xmlCopyChar(0, &buf[len], val);
2244 }
2245 } else {
2246 ent = xmlParseEntityRef(ctxt);
2247 if ((ent != NULL) &&
2248 (ctxt->replaceEntities != 0)) {
2249 xmlChar *rep;
2250
2251 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2252 rep = xmlStringDecodeEntities(ctxt, ent->content,
2253 XML_SUBSTITUTE_REF, 0, 0, 0);
2254 if (rep != NULL) {
2255 current = rep;
2256 while (*current != 0) { /* non input consuming */
2257 buf[len++] = *current++;
2258 if (len > buf_size - 10) {
2259 growBuffer(buf);
2260 }
2261 }
2262 xmlFree(rep);
2263 }
2264 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002265 if (len > buf_size - 10) {
2266 growBuffer(buf);
2267 }
Owen Taylor3473f882001-02-23 17:55:21 +00002268 if (ent->content != NULL)
2269 buf[len++] = ent->content[0];
2270 }
2271 } else if (ent != NULL) {
2272 int i = xmlStrlen(ent->name);
2273 const xmlChar *cur = ent->name;
2274
2275 /*
2276 * This may look absurd but is needed to detect
2277 * entities problems
2278 */
2279 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2280 (ent->content != NULL)) {
2281 xmlChar *rep;
2282 rep = xmlStringDecodeEntities(ctxt, ent->content,
2283 XML_SUBSTITUTE_REF, 0, 0, 0);
2284 if (rep != NULL)
2285 xmlFree(rep);
2286 }
2287
2288 /*
2289 * Just output the reference
2290 */
2291 buf[len++] = '&';
2292 if (len > buf_size - i - 10) {
2293 growBuffer(buf);
2294 }
2295 for (;i > 0;i--)
2296 buf[len++] = *cur++;
2297 buf[len++] = ';';
2298 }
2299 }
2300 } else {
2301 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2302 COPY_BUF(l,buf,len,0x20);
2303 if (len > buf_size - 10) {
2304 growBuffer(buf);
2305 }
2306 } else {
2307 COPY_BUF(l,buf,len,c);
2308 if (len > buf_size - 10) {
2309 growBuffer(buf);
2310 }
2311 }
2312 NEXTL(l);
2313 }
2314 GROW;
2315 c = CUR_CHAR(l);
2316 }
2317 buf[len++] = 0;
2318 if (RAW == '<') {
2319 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2320 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2321 ctxt->sax->error(ctxt->userData,
2322 "Unescaped '<' not allowed in attributes values\n");
2323 ctxt->wellFormed = 0;
2324 ctxt->disableSAX = 1;
2325 } else if (RAW != limit) {
2326 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2328 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2329 ctxt->wellFormed = 0;
2330 ctxt->disableSAX = 1;
2331 } else
2332 NEXT;
2333 return(buf);
2334}
2335
2336/**
2337 * xmlParseSystemLiteral:
2338 * @ctxt: an XML parser context
2339 *
2340 * parse an XML Literal
2341 *
2342 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2343 *
2344 * Returns the SystemLiteral parsed or NULL
2345 */
2346
2347xmlChar *
2348xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2349 xmlChar *buf = NULL;
2350 int len = 0;
2351 int size = XML_PARSER_BUFFER_SIZE;
2352 int cur, l;
2353 xmlChar stop;
2354 int state = ctxt->instate;
2355 int count = 0;
2356
2357 SHRINK;
2358 if (RAW == '"') {
2359 NEXT;
2360 stop = '"';
2361 } else if (RAW == '\'') {
2362 NEXT;
2363 stop = '\'';
2364 } else {
2365 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2367 ctxt->sax->error(ctxt->userData,
2368 "SystemLiteral \" or ' expected\n");
2369 ctxt->wellFormed = 0;
2370 ctxt->disableSAX = 1;
2371 return(NULL);
2372 }
2373
2374 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2375 if (buf == NULL) {
2376 xmlGenericError(xmlGenericErrorContext,
2377 "malloc of %d byte failed\n", size);
2378 return(NULL);
2379 }
2380 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2381 cur = CUR_CHAR(l);
2382 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2383 if (len + 5 >= size) {
2384 size *= 2;
2385 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2386 if (buf == NULL) {
2387 xmlGenericError(xmlGenericErrorContext,
2388 "realloc of %d byte failed\n", size);
2389 ctxt->instate = (xmlParserInputState) state;
2390 return(NULL);
2391 }
2392 }
2393 count++;
2394 if (count > 50) {
2395 GROW;
2396 count = 0;
2397 }
2398 COPY_BUF(l,buf,len,cur);
2399 NEXTL(l);
2400 cur = CUR_CHAR(l);
2401 if (cur == 0) {
2402 GROW;
2403 SHRINK;
2404 cur = CUR_CHAR(l);
2405 }
2406 }
2407 buf[len] = 0;
2408 ctxt->instate = (xmlParserInputState) state;
2409 if (!IS_CHAR(cur)) {
2410 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2412 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2413 ctxt->wellFormed = 0;
2414 ctxt->disableSAX = 1;
2415 } else {
2416 NEXT;
2417 }
2418 return(buf);
2419}
2420
2421/**
2422 * xmlParsePubidLiteral:
2423 * @ctxt: an XML parser context
2424 *
2425 * parse an XML public literal
2426 *
2427 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2428 *
2429 * Returns the PubidLiteral parsed or NULL.
2430 */
2431
2432xmlChar *
2433xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2434 xmlChar *buf = NULL;
2435 int len = 0;
2436 int size = XML_PARSER_BUFFER_SIZE;
2437 xmlChar cur;
2438 xmlChar stop;
2439 int count = 0;
2440
2441 SHRINK;
2442 if (RAW == '"') {
2443 NEXT;
2444 stop = '"';
2445 } else if (RAW == '\'') {
2446 NEXT;
2447 stop = '\'';
2448 } else {
2449 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2450 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2451 ctxt->sax->error(ctxt->userData,
2452 "SystemLiteral \" or ' expected\n");
2453 ctxt->wellFormed = 0;
2454 ctxt->disableSAX = 1;
2455 return(NULL);
2456 }
2457 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2458 if (buf == NULL) {
2459 xmlGenericError(xmlGenericErrorContext,
2460 "malloc of %d byte failed\n", size);
2461 return(NULL);
2462 }
2463 cur = CUR;
2464 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2465 if (len + 1 >= size) {
2466 size *= 2;
2467 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2468 if (buf == NULL) {
2469 xmlGenericError(xmlGenericErrorContext,
2470 "realloc of %d byte failed\n", size);
2471 return(NULL);
2472 }
2473 }
2474 buf[len++] = cur;
2475 count++;
2476 if (count > 50) {
2477 GROW;
2478 count = 0;
2479 }
2480 NEXT;
2481 cur = CUR;
2482 if (cur == 0) {
2483 GROW;
2484 SHRINK;
2485 cur = CUR;
2486 }
2487 }
2488 buf[len] = 0;
2489 if (cur != stop) {
2490 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2492 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2493 ctxt->wellFormed = 0;
2494 ctxt->disableSAX = 1;
2495 } else {
2496 NEXT;
2497 }
2498 return(buf);
2499}
2500
Daniel Veillard48b2f892001-02-25 16:11:03 +00002501void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002502/**
2503 * xmlParseCharData:
2504 * @ctxt: an XML parser context
2505 * @cdata: int indicating whether we are within a CDATA section
2506 *
2507 * parse a CharData section.
2508 * if we are within a CDATA section ']]>' marks an end of section.
2509 *
2510 * The right angle bracket (>) may be represented using the string "&gt;",
2511 * and must, for compatibility, be escaped using "&gt;" or a character
2512 * reference when it appears in the string "]]>" in content, when that
2513 * string is not marking the end of a CDATA section.
2514 *
2515 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2516 */
2517
2518void
2519xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002520 const xmlChar *in;
2521 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002522 int line = ctxt->input->line;
2523 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002524
2525 SHRINK;
2526 GROW;
2527 /*
2528 * Accelerated common case where input don't need to be
2529 * modified before passing it to the handler.
2530 */
2531 if ((ctxt->token == 0) && (!cdata)) {
2532 in = ctxt->input->cur;
2533 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002534get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002535 while (((*in >= 0x20) && (*in != '<') &&
2536 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2537 in++;
2538 if (*in == 0xA) {
2539 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002540 in++;
2541 while (*in == 0xA) {
2542 ctxt->input->line++;
2543 in++;
2544 }
2545 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002546 }
2547 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002548 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002549 if (IS_BLANK(*ctxt->input->cur)) {
2550 const xmlChar *tmp = ctxt->input->cur;
2551 ctxt->input->cur = in;
2552 if (areBlanks(ctxt, tmp, nbchar)) {
2553 if (ctxt->sax->ignorableWhitespace != NULL)
2554 ctxt->sax->ignorableWhitespace(ctxt->userData,
2555 tmp, nbchar);
2556 } else {
2557 if (ctxt->sax->characters != NULL)
2558 ctxt->sax->characters(ctxt->userData,
2559 tmp, nbchar);
2560 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002561 line = ctxt->input->line;
2562 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002563 } else {
2564 if (ctxt->sax->characters != NULL)
2565 ctxt->sax->characters(ctxt->userData,
2566 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002567 line = ctxt->input->line;
2568 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002569 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002570 }
2571 ctxt->input->cur = in;
2572 if (*in == 0xD) {
2573 in++;
2574 if (*in == 0xA) {
2575 ctxt->input->cur = in;
2576 in++;
2577 ctxt->input->line++;
2578 continue; /* while */
2579 }
2580 in--;
2581 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002582 if (*in == '<') {
2583 return;
2584 }
2585 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002586 return;
2587 }
2588 SHRINK;
2589 GROW;
2590 in = ctxt->input->cur;
2591 } while ((*in >= 0x20) && (*in <= 0x7F));
2592 nbchar = 0;
2593 }
Daniel Veillard50582112001-03-26 22:52:16 +00002594 ctxt->input->line = line;
2595 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002596 xmlParseCharDataComplex(ctxt, cdata);
2597}
2598
2599void
2600xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002601 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2602 int nbchar = 0;
2603 int cur, l;
2604 int count = 0;
2605
2606 SHRINK;
2607 GROW;
2608 cur = CUR_CHAR(l);
2609 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2610 ((cur != '&') || (ctxt->token == '&')) &&
2611 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2612 if ((cur == ']') && (NXT(1) == ']') &&
2613 (NXT(2) == '>')) {
2614 if (cdata) break;
2615 else {
2616 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2618 ctxt->sax->error(ctxt->userData,
2619 "Sequence ']]>' not allowed in content\n");
2620 /* Should this be relaxed ??? I see a "must here */
2621 ctxt->wellFormed = 0;
2622 ctxt->disableSAX = 1;
2623 }
2624 }
2625 COPY_BUF(l,buf,nbchar,cur);
2626 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2627 /*
2628 * Ok the segment is to be consumed as chars.
2629 */
2630 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2631 if (areBlanks(ctxt, buf, nbchar)) {
2632 if (ctxt->sax->ignorableWhitespace != NULL)
2633 ctxt->sax->ignorableWhitespace(ctxt->userData,
2634 buf, nbchar);
2635 } else {
2636 if (ctxt->sax->characters != NULL)
2637 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2638 }
2639 }
2640 nbchar = 0;
2641 }
2642 count++;
2643 if (count > 50) {
2644 GROW;
2645 count = 0;
2646 }
2647 NEXTL(l);
2648 cur = CUR_CHAR(l);
2649 }
2650 if (nbchar != 0) {
2651 /*
2652 * Ok the segment is to be consumed as chars.
2653 */
2654 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2655 if (areBlanks(ctxt, buf, nbchar)) {
2656 if (ctxt->sax->ignorableWhitespace != NULL)
2657 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2658 } else {
2659 if (ctxt->sax->characters != NULL)
2660 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2661 }
2662 }
2663 }
2664}
2665
2666/**
2667 * xmlParseExternalID:
2668 * @ctxt: an XML parser context
2669 * @publicID: a xmlChar** receiving PubidLiteral
2670 * @strict: indicate whether we should restrict parsing to only
2671 * production [75], see NOTE below
2672 *
2673 * Parse an External ID or a Public ID
2674 *
2675 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2676 * 'PUBLIC' S PubidLiteral S SystemLiteral
2677 *
2678 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2679 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2680 *
2681 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2682 *
2683 * Returns the function returns SystemLiteral and in the second
2684 * case publicID receives PubidLiteral, is strict is off
2685 * it is possible to return NULL and have publicID set.
2686 */
2687
2688xmlChar *
2689xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2690 xmlChar *URI = NULL;
2691
2692 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002693
2694 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002695 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2696 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2697 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2698 SKIP(6);
2699 if (!IS_BLANK(CUR)) {
2700 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2701 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2702 ctxt->sax->error(ctxt->userData,
2703 "Space required after 'SYSTEM'\n");
2704 ctxt->wellFormed = 0;
2705 ctxt->disableSAX = 1;
2706 }
2707 SKIP_BLANKS;
2708 URI = xmlParseSystemLiteral(ctxt);
2709 if (URI == NULL) {
2710 ctxt->errNo = XML_ERR_URI_REQUIRED;
2711 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2712 ctxt->sax->error(ctxt->userData,
2713 "xmlParseExternalID: SYSTEM, no URI\n");
2714 ctxt->wellFormed = 0;
2715 ctxt->disableSAX = 1;
2716 }
2717 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2718 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2719 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2720 SKIP(6);
2721 if (!IS_BLANK(CUR)) {
2722 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2724 ctxt->sax->error(ctxt->userData,
2725 "Space required after 'PUBLIC'\n");
2726 ctxt->wellFormed = 0;
2727 ctxt->disableSAX = 1;
2728 }
2729 SKIP_BLANKS;
2730 *publicID = xmlParsePubidLiteral(ctxt);
2731 if (*publicID == NULL) {
2732 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2734 ctxt->sax->error(ctxt->userData,
2735 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2736 ctxt->wellFormed = 0;
2737 ctxt->disableSAX = 1;
2738 }
2739 if (strict) {
2740 /*
2741 * We don't handle [83] so "S SystemLiteral" is required.
2742 */
2743 if (!IS_BLANK(CUR)) {
2744 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2746 ctxt->sax->error(ctxt->userData,
2747 "Space required after the Public Identifier\n");
2748 ctxt->wellFormed = 0;
2749 ctxt->disableSAX = 1;
2750 }
2751 } else {
2752 /*
2753 * We handle [83] so we return immediately, if
2754 * "S SystemLiteral" is not detected. From a purely parsing
2755 * point of view that's a nice mess.
2756 */
2757 const xmlChar *ptr;
2758 GROW;
2759
2760 ptr = CUR_PTR;
2761 if (!IS_BLANK(*ptr)) return(NULL);
2762
2763 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2764 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2765 }
2766 SKIP_BLANKS;
2767 URI = xmlParseSystemLiteral(ctxt);
2768 if (URI == NULL) {
2769 ctxt->errNo = XML_ERR_URI_REQUIRED;
2770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2771 ctxt->sax->error(ctxt->userData,
2772 "xmlParseExternalID: PUBLIC, no URI\n");
2773 ctxt->wellFormed = 0;
2774 ctxt->disableSAX = 1;
2775 }
2776 }
2777 return(URI);
2778}
2779
2780/**
2781 * xmlParseComment:
2782 * @ctxt: an XML parser context
2783 *
2784 * Skip an XML (SGML) comment <!-- .... -->
2785 * The spec says that "For compatibility, the string "--" (double-hyphen)
2786 * must not occur within comments. "
2787 *
2788 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2789 */
2790void
2791xmlParseComment(xmlParserCtxtPtr ctxt) {
2792 xmlChar *buf = NULL;
2793 int len;
2794 int size = XML_PARSER_BUFFER_SIZE;
2795 int q, ql;
2796 int r, rl;
2797 int cur, l;
2798 xmlParserInputState state;
2799 xmlParserInputPtr input = ctxt->input;
2800 int count = 0;
2801
2802 /*
2803 * Check that there is a comment right here.
2804 */
2805 if ((RAW != '<') || (NXT(1) != '!') ||
2806 (NXT(2) != '-') || (NXT(3) != '-')) return;
2807
2808 state = ctxt->instate;
2809 ctxt->instate = XML_PARSER_COMMENT;
2810 SHRINK;
2811 SKIP(4);
2812 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2813 if (buf == NULL) {
2814 xmlGenericError(xmlGenericErrorContext,
2815 "malloc of %d byte failed\n", size);
2816 ctxt->instate = state;
2817 return;
2818 }
2819 q = CUR_CHAR(ql);
2820 NEXTL(ql);
2821 r = CUR_CHAR(rl);
2822 NEXTL(rl);
2823 cur = CUR_CHAR(l);
2824 len = 0;
2825 while (IS_CHAR(cur) && /* checked */
2826 ((cur != '>') ||
2827 (r != '-') || (q != '-'))) {
2828 if ((r == '-') && (q == '-') && (len > 1)) {
2829 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2831 ctxt->sax->error(ctxt->userData,
2832 "Comment must not contain '--' (double-hyphen)`\n");
2833 ctxt->wellFormed = 0;
2834 ctxt->disableSAX = 1;
2835 }
2836 if (len + 5 >= size) {
2837 size *= 2;
2838 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2839 if (buf == NULL) {
2840 xmlGenericError(xmlGenericErrorContext,
2841 "realloc of %d byte failed\n", size);
2842 ctxt->instate = state;
2843 return;
2844 }
2845 }
2846 COPY_BUF(ql,buf,len,q);
2847 q = r;
2848 ql = rl;
2849 r = cur;
2850 rl = l;
2851
2852 count++;
2853 if (count > 50) {
2854 GROW;
2855 count = 0;
2856 }
2857 NEXTL(l);
2858 cur = CUR_CHAR(l);
2859 if (cur == 0) {
2860 SHRINK;
2861 GROW;
2862 cur = CUR_CHAR(l);
2863 }
2864 }
2865 buf[len] = 0;
2866 if (!IS_CHAR(cur)) {
2867 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2869 ctxt->sax->error(ctxt->userData,
2870 "Comment not terminated \n<!--%.50s\n", buf);
2871 ctxt->wellFormed = 0;
2872 ctxt->disableSAX = 1;
2873 xmlFree(buf);
2874 } else {
2875 if (input != ctxt->input) {
2876 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2878 ctxt->sax->error(ctxt->userData,
2879"Comment doesn't start and stop in the same entity\n");
2880 ctxt->wellFormed = 0;
2881 ctxt->disableSAX = 1;
2882 }
2883 NEXT;
2884 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2885 (!ctxt->disableSAX))
2886 ctxt->sax->comment(ctxt->userData, buf);
2887 xmlFree(buf);
2888 }
2889 ctxt->instate = state;
2890}
2891
2892/**
2893 * xmlParsePITarget:
2894 * @ctxt: an XML parser context
2895 *
2896 * parse the name of a PI
2897 *
2898 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2899 *
2900 * Returns the PITarget name or NULL
2901 */
2902
2903xmlChar *
2904xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2905 xmlChar *name;
2906
2907 name = xmlParseName(ctxt);
2908 if ((name != NULL) &&
2909 ((name[0] == 'x') || (name[0] == 'X')) &&
2910 ((name[1] == 'm') || (name[1] == 'M')) &&
2911 ((name[2] == 'l') || (name[2] == 'L'))) {
2912 int i;
2913 if ((name[0] == 'x') && (name[1] == 'm') &&
2914 (name[2] == 'l') && (name[3] == 0)) {
2915 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2917 ctxt->sax->error(ctxt->userData,
2918 "XML declaration allowed only at the start of the document\n");
2919 ctxt->wellFormed = 0;
2920 ctxt->disableSAX = 1;
2921 return(name);
2922 } else if (name[3] == 0) {
2923 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2924 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2925 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2926 ctxt->wellFormed = 0;
2927 ctxt->disableSAX = 1;
2928 return(name);
2929 }
2930 for (i = 0;;i++) {
2931 if (xmlW3CPIs[i] == NULL) break;
2932 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2933 return(name);
2934 }
2935 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2936 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2937 ctxt->sax->warning(ctxt->userData,
2938 "xmlParsePItarget: invalid name prefix 'xml'\n");
2939 }
2940 }
2941 return(name);
2942}
2943
2944/**
2945 * xmlParsePI:
2946 * @ctxt: an XML parser context
2947 *
2948 * parse an XML Processing Instruction.
2949 *
2950 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2951 *
2952 * The processing is transfered to SAX once parsed.
2953 */
2954
2955void
2956xmlParsePI(xmlParserCtxtPtr ctxt) {
2957 xmlChar *buf = NULL;
2958 int len = 0;
2959 int size = XML_PARSER_BUFFER_SIZE;
2960 int cur, l;
2961 xmlChar *target;
2962 xmlParserInputState state;
2963 int count = 0;
2964
2965 if ((RAW == '<') && (NXT(1) == '?')) {
2966 xmlParserInputPtr input = ctxt->input;
2967 state = ctxt->instate;
2968 ctxt->instate = XML_PARSER_PI;
2969 /*
2970 * this is a Processing Instruction.
2971 */
2972 SKIP(2);
2973 SHRINK;
2974
2975 /*
2976 * Parse the target name and check for special support like
2977 * namespace.
2978 */
2979 target = xmlParsePITarget(ctxt);
2980 if (target != NULL) {
2981 if ((RAW == '?') && (NXT(1) == '>')) {
2982 if (input != ctxt->input) {
2983 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2985 ctxt->sax->error(ctxt->userData,
2986 "PI declaration doesn't start and stop in the same entity\n");
2987 ctxt->wellFormed = 0;
2988 ctxt->disableSAX = 1;
2989 }
2990 SKIP(2);
2991
2992 /*
2993 * SAX: PI detected.
2994 */
2995 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2996 (ctxt->sax->processingInstruction != NULL))
2997 ctxt->sax->processingInstruction(ctxt->userData,
2998 target, NULL);
2999 ctxt->instate = state;
3000 xmlFree(target);
3001 return;
3002 }
3003 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3004 if (buf == NULL) {
3005 xmlGenericError(xmlGenericErrorContext,
3006 "malloc of %d byte failed\n", size);
3007 ctxt->instate = state;
3008 return;
3009 }
3010 cur = CUR;
3011 if (!IS_BLANK(cur)) {
3012 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3014 ctxt->sax->error(ctxt->userData,
3015 "xmlParsePI: PI %s space expected\n", target);
3016 ctxt->wellFormed = 0;
3017 ctxt->disableSAX = 1;
3018 }
3019 SKIP_BLANKS;
3020 cur = CUR_CHAR(l);
3021 while (IS_CHAR(cur) && /* checked */
3022 ((cur != '?') || (NXT(1) != '>'))) {
3023 if (len + 5 >= size) {
3024 size *= 2;
3025 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3026 if (buf == NULL) {
3027 xmlGenericError(xmlGenericErrorContext,
3028 "realloc of %d byte failed\n", size);
3029 ctxt->instate = state;
3030 return;
3031 }
3032 }
3033 count++;
3034 if (count > 50) {
3035 GROW;
3036 count = 0;
3037 }
3038 COPY_BUF(l,buf,len,cur);
3039 NEXTL(l);
3040 cur = CUR_CHAR(l);
3041 if (cur == 0) {
3042 SHRINK;
3043 GROW;
3044 cur = CUR_CHAR(l);
3045 }
3046 }
3047 buf[len] = 0;
3048 if (cur != '?') {
3049 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3051 ctxt->sax->error(ctxt->userData,
3052 "xmlParsePI: PI %s never end ...\n", target);
3053 ctxt->wellFormed = 0;
3054 ctxt->disableSAX = 1;
3055 } else {
3056 if (input != ctxt->input) {
3057 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3059 ctxt->sax->error(ctxt->userData,
3060 "PI declaration doesn't start and stop in the same entity\n");
3061 ctxt->wellFormed = 0;
3062 ctxt->disableSAX = 1;
3063 }
3064 SKIP(2);
3065
3066 /*
3067 * SAX: PI detected.
3068 */
3069 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3070 (ctxt->sax->processingInstruction != NULL))
3071 ctxt->sax->processingInstruction(ctxt->userData,
3072 target, buf);
3073 }
3074 xmlFree(buf);
3075 xmlFree(target);
3076 } else {
3077 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3079 ctxt->sax->error(ctxt->userData,
3080 "xmlParsePI : no target name\n");
3081 ctxt->wellFormed = 0;
3082 ctxt->disableSAX = 1;
3083 }
3084 ctxt->instate = state;
3085 }
3086}
3087
3088/**
3089 * xmlParseNotationDecl:
3090 * @ctxt: an XML parser context
3091 *
3092 * parse a notation declaration
3093 *
3094 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3095 *
3096 * Hence there is actually 3 choices:
3097 * 'PUBLIC' S PubidLiteral
3098 * 'PUBLIC' S PubidLiteral S SystemLiteral
3099 * and 'SYSTEM' S SystemLiteral
3100 *
3101 * See the NOTE on xmlParseExternalID().
3102 */
3103
3104void
3105xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3106 xmlChar *name;
3107 xmlChar *Pubid;
3108 xmlChar *Systemid;
3109
3110 if ((RAW == '<') && (NXT(1) == '!') &&
3111 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3112 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3113 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3114 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3115 xmlParserInputPtr input = ctxt->input;
3116 SHRINK;
3117 SKIP(10);
3118 if (!IS_BLANK(CUR)) {
3119 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3120 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3121 ctxt->sax->error(ctxt->userData,
3122 "Space required after '<!NOTATION'\n");
3123 ctxt->wellFormed = 0;
3124 ctxt->disableSAX = 1;
3125 return;
3126 }
3127 SKIP_BLANKS;
3128
Daniel Veillard76d66f42001-05-16 21:05:17 +00003129 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003130 if (name == NULL) {
3131 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3133 ctxt->sax->error(ctxt->userData,
3134 "NOTATION: Name expected here\n");
3135 ctxt->wellFormed = 0;
3136 ctxt->disableSAX = 1;
3137 return;
3138 }
3139 if (!IS_BLANK(CUR)) {
3140 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3142 ctxt->sax->error(ctxt->userData,
3143 "Space required after the NOTATION name'\n");
3144 ctxt->wellFormed = 0;
3145 ctxt->disableSAX = 1;
3146 return;
3147 }
3148 SKIP_BLANKS;
3149
3150 /*
3151 * Parse the IDs.
3152 */
3153 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3154 SKIP_BLANKS;
3155
3156 if (RAW == '>') {
3157 if (input != ctxt->input) {
3158 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3159 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3160 ctxt->sax->error(ctxt->userData,
3161"Notation declaration doesn't start and stop in the same entity\n");
3162 ctxt->wellFormed = 0;
3163 ctxt->disableSAX = 1;
3164 }
3165 NEXT;
3166 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3167 (ctxt->sax->notationDecl != NULL))
3168 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3169 } else {
3170 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3172 ctxt->sax->error(ctxt->userData,
3173 "'>' required to close NOTATION declaration\n");
3174 ctxt->wellFormed = 0;
3175 ctxt->disableSAX = 1;
3176 }
3177 xmlFree(name);
3178 if (Systemid != NULL) xmlFree(Systemid);
3179 if (Pubid != NULL) xmlFree(Pubid);
3180 }
3181}
3182
3183/**
3184 * xmlParseEntityDecl:
3185 * @ctxt: an XML parser context
3186 *
3187 * parse <!ENTITY declarations
3188 *
3189 * [70] EntityDecl ::= GEDecl | PEDecl
3190 *
3191 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3192 *
3193 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3194 *
3195 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3196 *
3197 * [74] PEDef ::= EntityValue | ExternalID
3198 *
3199 * [76] NDataDecl ::= S 'NDATA' S Name
3200 *
3201 * [ VC: Notation Declared ]
3202 * The Name must match the declared name of a notation.
3203 */
3204
3205void
3206xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3207 xmlChar *name = NULL;
3208 xmlChar *value = NULL;
3209 xmlChar *URI = NULL, *literal = NULL;
3210 xmlChar *ndata = NULL;
3211 int isParameter = 0;
3212 xmlChar *orig = NULL;
3213
3214 GROW;
3215 if ((RAW == '<') && (NXT(1) == '!') &&
3216 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3217 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3218 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3219 xmlParserInputPtr input = ctxt->input;
3220 ctxt->instate = XML_PARSER_ENTITY_DECL;
3221 SHRINK;
3222 SKIP(8);
3223 if (!IS_BLANK(CUR)) {
3224 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3226 ctxt->sax->error(ctxt->userData,
3227 "Space required after '<!ENTITY'\n");
3228 ctxt->wellFormed = 0;
3229 ctxt->disableSAX = 1;
3230 }
3231 SKIP_BLANKS;
3232
3233 if (RAW == '%') {
3234 NEXT;
3235 if (!IS_BLANK(CUR)) {
3236 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3238 ctxt->sax->error(ctxt->userData,
3239 "Space required after '%'\n");
3240 ctxt->wellFormed = 0;
3241 ctxt->disableSAX = 1;
3242 }
3243 SKIP_BLANKS;
3244 isParameter = 1;
3245 }
3246
Daniel Veillard76d66f42001-05-16 21:05:17 +00003247 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003248 if (name == NULL) {
3249 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3250 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3251 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3252 ctxt->wellFormed = 0;
3253 ctxt->disableSAX = 1;
3254 return;
3255 }
3256 if (!IS_BLANK(CUR)) {
3257 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3258 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3259 ctxt->sax->error(ctxt->userData,
3260 "Space required after the entity name\n");
3261 ctxt->wellFormed = 0;
3262 ctxt->disableSAX = 1;
3263 }
3264 SKIP_BLANKS;
3265
3266 /*
3267 * handle the various case of definitions...
3268 */
3269 if (isParameter) {
3270 if ((RAW == '"') || (RAW == '\'')) {
3271 value = xmlParseEntityValue(ctxt, &orig);
3272 if (value) {
3273 if ((ctxt->sax != NULL) &&
3274 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3275 ctxt->sax->entityDecl(ctxt->userData, name,
3276 XML_INTERNAL_PARAMETER_ENTITY,
3277 NULL, NULL, value);
3278 }
3279 } else {
3280 URI = xmlParseExternalID(ctxt, &literal, 1);
3281 if ((URI == NULL) && (literal == NULL)) {
3282 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3284 ctxt->sax->error(ctxt->userData,
3285 "Entity value required\n");
3286 ctxt->wellFormed = 0;
3287 ctxt->disableSAX = 1;
3288 }
3289 if (URI) {
3290 xmlURIPtr uri;
3291
3292 uri = xmlParseURI((const char *) URI);
3293 if (uri == NULL) {
3294 ctxt->errNo = XML_ERR_INVALID_URI;
3295 if ((ctxt->sax != NULL) &&
3296 (!ctxt->disableSAX) &&
3297 (ctxt->sax->error != NULL))
3298 ctxt->sax->error(ctxt->userData,
3299 "Invalid URI: %s\n", URI);
3300 ctxt->wellFormed = 0;
3301 } else {
3302 if (uri->fragment != NULL) {
3303 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3304 if ((ctxt->sax != NULL) &&
3305 (!ctxt->disableSAX) &&
3306 (ctxt->sax->error != NULL))
3307 ctxt->sax->error(ctxt->userData,
3308 "Fragment not allowed: %s\n", URI);
3309 ctxt->wellFormed = 0;
3310 } else {
3311 if ((ctxt->sax != NULL) &&
3312 (!ctxt->disableSAX) &&
3313 (ctxt->sax->entityDecl != NULL))
3314 ctxt->sax->entityDecl(ctxt->userData, name,
3315 XML_EXTERNAL_PARAMETER_ENTITY,
3316 literal, URI, NULL);
3317 }
3318 xmlFreeURI(uri);
3319 }
3320 }
3321 }
3322 } else {
3323 if ((RAW == '"') || (RAW == '\'')) {
3324 value = xmlParseEntityValue(ctxt, &orig);
3325 if ((ctxt->sax != NULL) &&
3326 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3327 ctxt->sax->entityDecl(ctxt->userData, name,
3328 XML_INTERNAL_GENERAL_ENTITY,
3329 NULL, NULL, value);
3330 } else {
3331 URI = xmlParseExternalID(ctxt, &literal, 1);
3332 if ((URI == NULL) && (literal == NULL)) {
3333 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3335 ctxt->sax->error(ctxt->userData,
3336 "Entity value required\n");
3337 ctxt->wellFormed = 0;
3338 ctxt->disableSAX = 1;
3339 }
3340 if (URI) {
3341 xmlURIPtr uri;
3342
3343 uri = xmlParseURI((const char *)URI);
3344 if (uri == NULL) {
3345 ctxt->errNo = XML_ERR_INVALID_URI;
3346 if ((ctxt->sax != NULL) &&
3347 (!ctxt->disableSAX) &&
3348 (ctxt->sax->error != NULL))
3349 ctxt->sax->error(ctxt->userData,
3350 "Invalid URI: %s\n", URI);
3351 ctxt->wellFormed = 0;
3352 } else {
3353 if (uri->fragment != NULL) {
3354 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3355 if ((ctxt->sax != NULL) &&
3356 (!ctxt->disableSAX) &&
3357 (ctxt->sax->error != NULL))
3358 ctxt->sax->error(ctxt->userData,
3359 "Fragment not allowed: %s\n", URI);
3360 ctxt->wellFormed = 0;
3361 }
3362 xmlFreeURI(uri);
3363 }
3364 }
3365 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3366 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3367 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3368 ctxt->sax->error(ctxt->userData,
3369 "Space required before 'NDATA'\n");
3370 ctxt->wellFormed = 0;
3371 ctxt->disableSAX = 1;
3372 }
3373 SKIP_BLANKS;
3374 if ((RAW == 'N') && (NXT(1) == 'D') &&
3375 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3376 (NXT(4) == 'A')) {
3377 SKIP(5);
3378 if (!IS_BLANK(CUR)) {
3379 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3381 ctxt->sax->error(ctxt->userData,
3382 "Space required after 'NDATA'\n");
3383 ctxt->wellFormed = 0;
3384 ctxt->disableSAX = 1;
3385 }
3386 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003387 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003388 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3389 (ctxt->sax->unparsedEntityDecl != NULL))
3390 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3391 literal, URI, ndata);
3392 } else {
3393 if ((ctxt->sax != NULL) &&
3394 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3395 ctxt->sax->entityDecl(ctxt->userData, name,
3396 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3397 literal, URI, NULL);
3398 }
3399 }
3400 }
3401 SKIP_BLANKS;
3402 if (RAW != '>') {
3403 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3405 ctxt->sax->error(ctxt->userData,
3406 "xmlParseEntityDecl: entity %s not terminated\n", name);
3407 ctxt->wellFormed = 0;
3408 ctxt->disableSAX = 1;
3409 } else {
3410 if (input != ctxt->input) {
3411 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3413 ctxt->sax->error(ctxt->userData,
3414"Entity declaration doesn't start and stop in the same entity\n");
3415 ctxt->wellFormed = 0;
3416 ctxt->disableSAX = 1;
3417 }
3418 NEXT;
3419 }
3420 if (orig != NULL) {
3421 /*
3422 * Ugly mechanism to save the raw entity value.
3423 */
3424 xmlEntityPtr cur = NULL;
3425
3426 if (isParameter) {
3427 if ((ctxt->sax != NULL) &&
3428 (ctxt->sax->getParameterEntity != NULL))
3429 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3430 } else {
3431 if ((ctxt->sax != NULL) &&
3432 (ctxt->sax->getEntity != NULL))
3433 cur = ctxt->sax->getEntity(ctxt->userData, name);
3434 }
3435 if (cur != NULL) {
3436 if (cur->orig != NULL)
3437 xmlFree(orig);
3438 else
3439 cur->orig = orig;
3440 } else
3441 xmlFree(orig);
3442 }
3443 if (name != NULL) xmlFree(name);
3444 if (value != NULL) xmlFree(value);
3445 if (URI != NULL) xmlFree(URI);
3446 if (literal != NULL) xmlFree(literal);
3447 if (ndata != NULL) xmlFree(ndata);
3448 }
3449}
3450
3451/**
3452 * xmlParseDefaultDecl:
3453 * @ctxt: an XML parser context
3454 * @value: Receive a possible fixed default value for the attribute
3455 *
3456 * Parse an attribute default declaration
3457 *
3458 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3459 *
3460 * [ VC: Required Attribute ]
3461 * if the default declaration is the keyword #REQUIRED, then the
3462 * attribute must be specified for all elements of the type in the
3463 * attribute-list declaration.
3464 *
3465 * [ VC: Attribute Default Legal ]
3466 * The declared default value must meet the lexical constraints of
3467 * the declared attribute type c.f. xmlValidateAttributeDecl()
3468 *
3469 * [ VC: Fixed Attribute Default ]
3470 * if an attribute has a default value declared with the #FIXED
3471 * keyword, instances of that attribute must match the default value.
3472 *
3473 * [ WFC: No < in Attribute Values ]
3474 * handled in xmlParseAttValue()
3475 *
3476 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3477 * or XML_ATTRIBUTE_FIXED.
3478 */
3479
3480int
3481xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3482 int val;
3483 xmlChar *ret;
3484
3485 *value = NULL;
3486 if ((RAW == '#') && (NXT(1) == 'R') &&
3487 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3488 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3489 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3490 (NXT(8) == 'D')) {
3491 SKIP(9);
3492 return(XML_ATTRIBUTE_REQUIRED);
3493 }
3494 if ((RAW == '#') && (NXT(1) == 'I') &&
3495 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3496 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3497 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3498 SKIP(8);
3499 return(XML_ATTRIBUTE_IMPLIED);
3500 }
3501 val = XML_ATTRIBUTE_NONE;
3502 if ((RAW == '#') && (NXT(1) == 'F') &&
3503 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3504 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3505 SKIP(6);
3506 val = XML_ATTRIBUTE_FIXED;
3507 if (!IS_BLANK(CUR)) {
3508 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3510 ctxt->sax->error(ctxt->userData,
3511 "Space required after '#FIXED'\n");
3512 ctxt->wellFormed = 0;
3513 ctxt->disableSAX = 1;
3514 }
3515 SKIP_BLANKS;
3516 }
3517 ret = xmlParseAttValue(ctxt);
3518 ctxt->instate = XML_PARSER_DTD;
3519 if (ret == NULL) {
3520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3521 ctxt->sax->error(ctxt->userData,
3522 "Attribute default value declaration error\n");
3523 ctxt->wellFormed = 0;
3524 ctxt->disableSAX = 1;
3525 } else
3526 *value = ret;
3527 return(val);
3528}
3529
3530/**
3531 * xmlParseNotationType:
3532 * @ctxt: an XML parser context
3533 *
3534 * parse an Notation attribute type.
3535 *
3536 * Note: the leading 'NOTATION' S part has already being parsed...
3537 *
3538 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3539 *
3540 * [ VC: Notation Attributes ]
3541 * Values of this type must match one of the notation names included
3542 * in the declaration; all notation names in the declaration must be declared.
3543 *
3544 * Returns: the notation attribute tree built while parsing
3545 */
3546
3547xmlEnumerationPtr
3548xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3549 xmlChar *name;
3550 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3551
3552 if (RAW != '(') {
3553 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3555 ctxt->sax->error(ctxt->userData,
3556 "'(' required to start 'NOTATION'\n");
3557 ctxt->wellFormed = 0;
3558 ctxt->disableSAX = 1;
3559 return(NULL);
3560 }
3561 SHRINK;
3562 do {
3563 NEXT;
3564 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003565 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003566 if (name == NULL) {
3567 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3569 ctxt->sax->error(ctxt->userData,
3570 "Name expected in NOTATION declaration\n");
3571 ctxt->wellFormed = 0;
3572 ctxt->disableSAX = 1;
3573 return(ret);
3574 }
3575 cur = xmlCreateEnumeration(name);
3576 xmlFree(name);
3577 if (cur == NULL) return(ret);
3578 if (last == NULL) ret = last = cur;
3579 else {
3580 last->next = cur;
3581 last = cur;
3582 }
3583 SKIP_BLANKS;
3584 } while (RAW == '|');
3585 if (RAW != ')') {
3586 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3588 ctxt->sax->error(ctxt->userData,
3589 "')' required to finish NOTATION declaration\n");
3590 ctxt->wellFormed = 0;
3591 ctxt->disableSAX = 1;
3592 if ((last != NULL) && (last != ret))
3593 xmlFreeEnumeration(last);
3594 return(ret);
3595 }
3596 NEXT;
3597 return(ret);
3598}
3599
3600/**
3601 * xmlParseEnumerationType:
3602 * @ctxt: an XML parser context
3603 *
3604 * parse an Enumeration attribute type.
3605 *
3606 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3607 *
3608 * [ VC: Enumeration ]
3609 * Values of this type must match one of the Nmtoken tokens in
3610 * the declaration
3611 *
3612 * Returns: the enumeration attribute tree built while parsing
3613 */
3614
3615xmlEnumerationPtr
3616xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3617 xmlChar *name;
3618 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3619
3620 if (RAW != '(') {
3621 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3623 ctxt->sax->error(ctxt->userData,
3624 "'(' required to start ATTLIST enumeration\n");
3625 ctxt->wellFormed = 0;
3626 ctxt->disableSAX = 1;
3627 return(NULL);
3628 }
3629 SHRINK;
3630 do {
3631 NEXT;
3632 SKIP_BLANKS;
3633 name = xmlParseNmtoken(ctxt);
3634 if (name == NULL) {
3635 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3637 ctxt->sax->error(ctxt->userData,
3638 "NmToken expected in ATTLIST enumeration\n");
3639 ctxt->wellFormed = 0;
3640 ctxt->disableSAX = 1;
3641 return(ret);
3642 }
3643 cur = xmlCreateEnumeration(name);
3644 xmlFree(name);
3645 if (cur == NULL) return(ret);
3646 if (last == NULL) ret = last = cur;
3647 else {
3648 last->next = cur;
3649 last = cur;
3650 }
3651 SKIP_BLANKS;
3652 } while (RAW == '|');
3653 if (RAW != ')') {
3654 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3656 ctxt->sax->error(ctxt->userData,
3657 "')' required to finish ATTLIST enumeration\n");
3658 ctxt->wellFormed = 0;
3659 ctxt->disableSAX = 1;
3660 return(ret);
3661 }
3662 NEXT;
3663 return(ret);
3664}
3665
3666/**
3667 * xmlParseEnumeratedType:
3668 * @ctxt: an XML parser context
3669 * @tree: the enumeration tree built while parsing
3670 *
3671 * parse an Enumerated attribute type.
3672 *
3673 * [57] EnumeratedType ::= NotationType | Enumeration
3674 *
3675 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3676 *
3677 *
3678 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3679 */
3680
3681int
3682xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3683 if ((RAW == 'N') && (NXT(1) == 'O') &&
3684 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3685 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3686 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3687 SKIP(8);
3688 if (!IS_BLANK(CUR)) {
3689 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3690 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3691 ctxt->sax->error(ctxt->userData,
3692 "Space required after 'NOTATION'\n");
3693 ctxt->wellFormed = 0;
3694 ctxt->disableSAX = 1;
3695 return(0);
3696 }
3697 SKIP_BLANKS;
3698 *tree = xmlParseNotationType(ctxt);
3699 if (*tree == NULL) return(0);
3700 return(XML_ATTRIBUTE_NOTATION);
3701 }
3702 *tree = xmlParseEnumerationType(ctxt);
3703 if (*tree == NULL) return(0);
3704 return(XML_ATTRIBUTE_ENUMERATION);
3705}
3706
3707/**
3708 * xmlParseAttributeType:
3709 * @ctxt: an XML parser context
3710 * @tree: the enumeration tree built while parsing
3711 *
3712 * parse the Attribute list def for an element
3713 *
3714 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3715 *
3716 * [55] StringType ::= 'CDATA'
3717 *
3718 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3719 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3720 *
3721 * Validity constraints for attribute values syntax are checked in
3722 * xmlValidateAttributeValue()
3723 *
3724 * [ VC: ID ]
3725 * Values of type ID must match the Name production. A name must not
3726 * appear more than once in an XML document as a value of this type;
3727 * i.e., ID values must uniquely identify the elements which bear them.
3728 *
3729 * [ VC: One ID per Element Type ]
3730 * No element type may have more than one ID attribute specified.
3731 *
3732 * [ VC: ID Attribute Default ]
3733 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3734 *
3735 * [ VC: IDREF ]
3736 * Values of type IDREF must match the Name production, and values
3737 * of type IDREFS must match Names; each IDREF Name must match the value
3738 * of an ID attribute on some element in the XML document; i.e. IDREF
3739 * values must match the value of some ID attribute.
3740 *
3741 * [ VC: Entity Name ]
3742 * Values of type ENTITY must match the Name production, values
3743 * of type ENTITIES must match Names; each Entity Name must match the
3744 * name of an unparsed entity declared in the DTD.
3745 *
3746 * [ VC: Name Token ]
3747 * Values of type NMTOKEN must match the Nmtoken production; values
3748 * of type NMTOKENS must match Nmtokens.
3749 *
3750 * Returns the attribute type
3751 */
3752int
3753xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3754 SHRINK;
3755 if ((RAW == 'C') && (NXT(1) == 'D') &&
3756 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3757 (NXT(4) == 'A')) {
3758 SKIP(5);
3759 return(XML_ATTRIBUTE_CDATA);
3760 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3761 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3762 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3763 SKIP(6);
3764 return(XML_ATTRIBUTE_IDREFS);
3765 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3766 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3767 (NXT(4) == 'F')) {
3768 SKIP(5);
3769 return(XML_ATTRIBUTE_IDREF);
3770 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3771 SKIP(2);
3772 return(XML_ATTRIBUTE_ID);
3773 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3774 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3775 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3776 SKIP(6);
3777 return(XML_ATTRIBUTE_ENTITY);
3778 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3779 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3780 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3781 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3782 SKIP(8);
3783 return(XML_ATTRIBUTE_ENTITIES);
3784 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3785 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3786 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3787 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3788 SKIP(8);
3789 return(XML_ATTRIBUTE_NMTOKENS);
3790 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3791 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3792 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3793 (NXT(6) == 'N')) {
3794 SKIP(7);
3795 return(XML_ATTRIBUTE_NMTOKEN);
3796 }
3797 return(xmlParseEnumeratedType(ctxt, tree));
3798}
3799
3800/**
3801 * xmlParseAttributeListDecl:
3802 * @ctxt: an XML parser context
3803 *
3804 * : parse the Attribute list def for an element
3805 *
3806 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3807 *
3808 * [53] AttDef ::= S Name S AttType S DefaultDecl
3809 *
3810 */
3811void
3812xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3813 xmlChar *elemName;
3814 xmlChar *attrName;
3815 xmlEnumerationPtr tree;
3816
3817 if ((RAW == '<') && (NXT(1) == '!') &&
3818 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3819 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3820 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3821 (NXT(8) == 'T')) {
3822 xmlParserInputPtr input = ctxt->input;
3823
3824 SKIP(9);
3825 if (!IS_BLANK(CUR)) {
3826 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3828 ctxt->sax->error(ctxt->userData,
3829 "Space required after '<!ATTLIST'\n");
3830 ctxt->wellFormed = 0;
3831 ctxt->disableSAX = 1;
3832 }
3833 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003834 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003835 if (elemName == NULL) {
3836 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3838 ctxt->sax->error(ctxt->userData,
3839 "ATTLIST: no name for Element\n");
3840 ctxt->wellFormed = 0;
3841 ctxt->disableSAX = 1;
3842 return;
3843 }
3844 SKIP_BLANKS;
3845 GROW;
3846 while (RAW != '>') {
3847 const xmlChar *check = CUR_PTR;
3848 int type;
3849 int def;
3850 xmlChar *defaultValue = NULL;
3851
3852 GROW;
3853 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003854 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003855 if (attrName == NULL) {
3856 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3858 ctxt->sax->error(ctxt->userData,
3859 "ATTLIST: no name for Attribute\n");
3860 ctxt->wellFormed = 0;
3861 ctxt->disableSAX = 1;
3862 break;
3863 }
3864 GROW;
3865 if (!IS_BLANK(CUR)) {
3866 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3868 ctxt->sax->error(ctxt->userData,
3869 "Space required after the attribute name\n");
3870 ctxt->wellFormed = 0;
3871 ctxt->disableSAX = 1;
3872 if (attrName != NULL)
3873 xmlFree(attrName);
3874 if (defaultValue != NULL)
3875 xmlFree(defaultValue);
3876 break;
3877 }
3878 SKIP_BLANKS;
3879
3880 type = xmlParseAttributeType(ctxt, &tree);
3881 if (type <= 0) {
3882 if (attrName != NULL)
3883 xmlFree(attrName);
3884 if (defaultValue != NULL)
3885 xmlFree(defaultValue);
3886 break;
3887 }
3888
3889 GROW;
3890 if (!IS_BLANK(CUR)) {
3891 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3893 ctxt->sax->error(ctxt->userData,
3894 "Space required after the attribute type\n");
3895 ctxt->wellFormed = 0;
3896 ctxt->disableSAX = 1;
3897 if (attrName != NULL)
3898 xmlFree(attrName);
3899 if (defaultValue != NULL)
3900 xmlFree(defaultValue);
3901 if (tree != NULL)
3902 xmlFreeEnumeration(tree);
3903 break;
3904 }
3905 SKIP_BLANKS;
3906
3907 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3908 if (def <= 0) {
3909 if (attrName != NULL)
3910 xmlFree(attrName);
3911 if (defaultValue != NULL)
3912 xmlFree(defaultValue);
3913 if (tree != NULL)
3914 xmlFreeEnumeration(tree);
3915 break;
3916 }
3917
3918 GROW;
3919 if (RAW != '>') {
3920 if (!IS_BLANK(CUR)) {
3921 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3923 ctxt->sax->error(ctxt->userData,
3924 "Space required after the attribute default value\n");
3925 ctxt->wellFormed = 0;
3926 ctxt->disableSAX = 1;
3927 if (attrName != NULL)
3928 xmlFree(attrName);
3929 if (defaultValue != NULL)
3930 xmlFree(defaultValue);
3931 if (tree != NULL)
3932 xmlFreeEnumeration(tree);
3933 break;
3934 }
3935 SKIP_BLANKS;
3936 }
3937 if (check == CUR_PTR) {
3938 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3940 ctxt->sax->error(ctxt->userData,
3941 "xmlParseAttributeListDecl: detected internal error\n");
3942 if (attrName != NULL)
3943 xmlFree(attrName);
3944 if (defaultValue != NULL)
3945 xmlFree(defaultValue);
3946 if (tree != NULL)
3947 xmlFreeEnumeration(tree);
3948 break;
3949 }
3950 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3951 (ctxt->sax->attributeDecl != NULL))
3952 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3953 type, def, defaultValue, tree);
3954 if (attrName != NULL)
3955 xmlFree(attrName);
3956 if (defaultValue != NULL)
3957 xmlFree(defaultValue);
3958 GROW;
3959 }
3960 if (RAW == '>') {
3961 if (input != ctxt->input) {
3962 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3964 ctxt->sax->error(ctxt->userData,
3965"Attribute list declaration doesn't start and stop in the same entity\n");
3966 ctxt->wellFormed = 0;
3967 ctxt->disableSAX = 1;
3968 }
3969 NEXT;
3970 }
3971
3972 xmlFree(elemName);
3973 }
3974}
3975
3976/**
3977 * xmlParseElementMixedContentDecl:
3978 * @ctxt: an XML parser context
3979 *
3980 * parse the declaration for a Mixed Element content
3981 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3982 *
3983 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3984 * '(' S? '#PCDATA' S? ')'
3985 *
3986 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3987 *
3988 * [ VC: No Duplicate Types ]
3989 * The same name must not appear more than once in a single
3990 * mixed-content declaration.
3991 *
3992 * returns: the list of the xmlElementContentPtr describing the element choices
3993 */
3994xmlElementContentPtr
3995xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3996 xmlElementContentPtr ret = NULL, cur = NULL, n;
3997 xmlChar *elem = NULL;
3998
3999 GROW;
4000 if ((RAW == '#') && (NXT(1) == 'P') &&
4001 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4002 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4003 (NXT(6) == 'A')) {
4004 SKIP(7);
4005 SKIP_BLANKS;
4006 SHRINK;
4007 if (RAW == ')') {
4008 ctxt->entity = ctxt->input;
4009 NEXT;
4010 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4011 if (RAW == '*') {
4012 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4013 NEXT;
4014 }
4015 return(ret);
4016 }
4017 if ((RAW == '(') || (RAW == '|')) {
4018 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4019 if (ret == NULL) return(NULL);
4020 }
4021 while (RAW == '|') {
4022 NEXT;
4023 if (elem == NULL) {
4024 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4025 if (ret == NULL) return(NULL);
4026 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004027 if (cur != NULL)
4028 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004029 cur = ret;
4030 } else {
4031 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4032 if (n == NULL) return(NULL);
4033 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004034 if (n->c1 != NULL)
4035 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004036 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004037 if (n != NULL)
4038 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004039 cur = n;
4040 xmlFree(elem);
4041 }
4042 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004043 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004044 if (elem == NULL) {
4045 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4047 ctxt->sax->error(ctxt->userData,
4048 "xmlParseElementMixedContentDecl : Name expected\n");
4049 ctxt->wellFormed = 0;
4050 ctxt->disableSAX = 1;
4051 xmlFreeElementContent(cur);
4052 return(NULL);
4053 }
4054 SKIP_BLANKS;
4055 GROW;
4056 }
4057 if ((RAW == ')') && (NXT(1) == '*')) {
4058 if (elem != NULL) {
4059 cur->c2 = xmlNewElementContent(elem,
4060 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004061 if (cur->c2 != NULL)
4062 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004063 xmlFree(elem);
4064 }
4065 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4066 ctxt->entity = ctxt->input;
4067 SKIP(2);
4068 } else {
4069 if (elem != NULL) xmlFree(elem);
4070 xmlFreeElementContent(ret);
4071 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4073 ctxt->sax->error(ctxt->userData,
4074 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4075 ctxt->wellFormed = 0;
4076 ctxt->disableSAX = 1;
4077 return(NULL);
4078 }
4079
4080 } else {
4081 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4082 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4083 ctxt->sax->error(ctxt->userData,
4084 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4085 ctxt->wellFormed = 0;
4086 ctxt->disableSAX = 1;
4087 }
4088 return(ret);
4089}
4090
4091/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004092 * xmlParseElementChildrenContentD:
4093 * @ctxt: an XML parser context
4094 *
4095 * VMS version of xmlParseElementChildrenContentDecl()
4096 *
4097 * Returns the tree of xmlElementContentPtr describing the element
4098 * hierarchy.
4099 */
4100/**
Owen Taylor3473f882001-02-23 17:55:21 +00004101 * xmlParseElementChildrenContentDecl:
4102 * @ctxt: an XML parser context
4103 *
4104 * parse the declaration for a Mixed Element content
4105 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4106 *
4107 *
4108 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4109 *
4110 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4111 *
4112 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4113 *
4114 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4115 *
4116 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4117 * TODO Parameter-entity replacement text must be properly nested
4118 * with parenthetized groups. That is to say, if either of the
4119 * opening or closing parentheses in a choice, seq, or Mixed
4120 * construct is contained in the replacement text for a parameter
4121 * entity, both must be contained in the same replacement text. For
4122 * interoperability, if a parameter-entity reference appears in a
4123 * choice, seq, or Mixed construct, its replacement text should not
4124 * be empty, and neither the first nor last non-blank character of
4125 * the replacement text should be a connector (| or ,).
4126 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004127 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004128 * hierarchy.
4129 */
4130xmlElementContentPtr
4131#ifdef VMS
4132xmlParseElementChildrenContentD
4133#else
4134xmlParseElementChildrenContentDecl
4135#endif
4136(xmlParserCtxtPtr ctxt) {
4137 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4138 xmlChar *elem;
4139 xmlChar type = 0;
4140
4141 SKIP_BLANKS;
4142 GROW;
4143 if (RAW == '(') {
4144 /* Recurse on first child */
4145 NEXT;
4146 SKIP_BLANKS;
4147 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4148 SKIP_BLANKS;
4149 GROW;
4150 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004151 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004152 if (elem == NULL) {
4153 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4154 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4155 ctxt->sax->error(ctxt->userData,
4156 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4157 ctxt->wellFormed = 0;
4158 ctxt->disableSAX = 1;
4159 return(NULL);
4160 }
4161 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4162 GROW;
4163 if (RAW == '?') {
4164 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4165 NEXT;
4166 } else if (RAW == '*') {
4167 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4168 NEXT;
4169 } else if (RAW == '+') {
4170 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4171 NEXT;
4172 } else {
4173 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4174 }
4175 xmlFree(elem);
4176 GROW;
4177 }
4178 SKIP_BLANKS;
4179 SHRINK;
4180 while (RAW != ')') {
4181 /*
4182 * Each loop we parse one separator and one element.
4183 */
4184 if (RAW == ',') {
4185 if (type == 0) type = CUR;
4186
4187 /*
4188 * Detect "Name | Name , Name" error
4189 */
4190 else if (type != CUR) {
4191 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4193 ctxt->sax->error(ctxt->userData,
4194 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4195 type);
4196 ctxt->wellFormed = 0;
4197 ctxt->disableSAX = 1;
4198 if ((op != NULL) && (op != ret))
4199 xmlFreeElementContent(op);
4200 if ((last != NULL) && (last != ret) &&
4201 (last != ret->c1) && (last != ret->c2))
4202 xmlFreeElementContent(last);
4203 if (ret != NULL)
4204 xmlFreeElementContent(ret);
4205 return(NULL);
4206 }
4207 NEXT;
4208
4209 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4210 if (op == NULL) {
4211 xmlFreeElementContent(ret);
4212 return(NULL);
4213 }
4214 if (last == NULL) {
4215 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004216 if (ret != NULL)
4217 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004218 ret = cur = op;
4219 } else {
4220 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004221 if (op != NULL)
4222 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004223 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004224 if (last != NULL)
4225 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004226 cur =op;
4227 last = NULL;
4228 }
4229 } else if (RAW == '|') {
4230 if (type == 0) type = CUR;
4231
4232 /*
4233 * Detect "Name , Name | Name" error
4234 */
4235 else if (type != CUR) {
4236 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4238 ctxt->sax->error(ctxt->userData,
4239 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4240 type);
4241 ctxt->wellFormed = 0;
4242 ctxt->disableSAX = 1;
4243 if ((op != NULL) && (op != ret) && (op != last))
4244 xmlFreeElementContent(op);
4245 if ((last != NULL) && (last != ret) &&
4246 (last != ret->c1) && (last != ret->c2))
4247 xmlFreeElementContent(last);
4248 if (ret != NULL)
4249 xmlFreeElementContent(ret);
4250 return(NULL);
4251 }
4252 NEXT;
4253
4254 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4255 if (op == NULL) {
4256 if ((op != NULL) && (op != ret))
4257 xmlFreeElementContent(op);
4258 if ((last != NULL) && (last != ret) &&
4259 (last != ret->c1) && (last != ret->c2))
4260 xmlFreeElementContent(last);
4261 if (ret != NULL)
4262 xmlFreeElementContent(ret);
4263 return(NULL);
4264 }
4265 if (last == NULL) {
4266 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004267 if (ret != NULL)
4268 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004269 ret = cur = op;
4270 } else {
4271 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004272 if (op != NULL)
4273 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004274 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004275 if (last != NULL)
4276 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004277 cur =op;
4278 last = NULL;
4279 }
4280 } else {
4281 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4282 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4283 ctxt->sax->error(ctxt->userData,
4284 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4285 ctxt->wellFormed = 0;
4286 ctxt->disableSAX = 1;
4287 if ((op != NULL) && (op != ret))
4288 xmlFreeElementContent(op);
4289 if ((last != NULL) && (last != ret) &&
4290 (last != ret->c1) && (last != ret->c2))
4291 xmlFreeElementContent(last);
4292 if (ret != NULL)
4293 xmlFreeElementContent(ret);
4294 return(NULL);
4295 }
4296 GROW;
4297 SKIP_BLANKS;
4298 GROW;
4299 if (RAW == '(') {
4300 /* Recurse on second child */
4301 NEXT;
4302 SKIP_BLANKS;
4303 last = xmlParseElementChildrenContentDecl(ctxt);
4304 SKIP_BLANKS;
4305 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004306 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004307 if (elem == NULL) {
4308 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4310 ctxt->sax->error(ctxt->userData,
4311 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4312 ctxt->wellFormed = 0;
4313 ctxt->disableSAX = 1;
4314 if ((op != NULL) && (op != ret))
4315 xmlFreeElementContent(op);
4316 if ((last != NULL) && (last != ret) &&
4317 (last != ret->c1) && (last != ret->c2))
4318 xmlFreeElementContent(last);
4319 if (ret != NULL)
4320 xmlFreeElementContent(ret);
4321 return(NULL);
4322 }
4323 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4324 xmlFree(elem);
4325 if (RAW == '?') {
4326 last->ocur = XML_ELEMENT_CONTENT_OPT;
4327 NEXT;
4328 } else if (RAW == '*') {
4329 last->ocur = XML_ELEMENT_CONTENT_MULT;
4330 NEXT;
4331 } else if (RAW == '+') {
4332 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4333 NEXT;
4334 } else {
4335 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4336 }
4337 }
4338 SKIP_BLANKS;
4339 GROW;
4340 }
4341 if ((cur != NULL) && (last != NULL)) {
4342 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004343 if (last != NULL)
4344 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004345 }
4346 ctxt->entity = ctxt->input;
4347 NEXT;
4348 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004349 if (ret != NULL)
4350 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004351 NEXT;
4352 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004353 if (ret != NULL)
4354 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004355 NEXT;
4356 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004357 if (ret != NULL)
4358 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004359 NEXT;
4360 }
4361 return(ret);
4362}
4363
4364/**
4365 * xmlParseElementContentDecl:
4366 * @ctxt: an XML parser context
4367 * @name: the name of the element being defined.
4368 * @result: the Element Content pointer will be stored here if any
4369 *
4370 * parse the declaration for an Element content either Mixed or Children,
4371 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4372 *
4373 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4374 *
4375 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4376 */
4377
4378int
4379xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4380 xmlElementContentPtr *result) {
4381
4382 xmlElementContentPtr tree = NULL;
4383 xmlParserInputPtr input = ctxt->input;
4384 int res;
4385
4386 *result = NULL;
4387
4388 if (RAW != '(') {
4389 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4390 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4391 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004392 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004393 ctxt->wellFormed = 0;
4394 ctxt->disableSAX = 1;
4395 return(-1);
4396 }
4397 NEXT;
4398 GROW;
4399 SKIP_BLANKS;
4400 if ((RAW == '#') && (NXT(1) == 'P') &&
4401 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4402 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4403 (NXT(6) == 'A')) {
4404 tree = xmlParseElementMixedContentDecl(ctxt);
4405 res = XML_ELEMENT_TYPE_MIXED;
4406 } else {
4407 tree = xmlParseElementChildrenContentDecl(ctxt);
4408 res = XML_ELEMENT_TYPE_ELEMENT;
4409 }
4410 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4411 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4413 ctxt->sax->error(ctxt->userData,
4414"Element content declaration doesn't start and stop in the same entity\n");
4415 ctxt->wellFormed = 0;
4416 ctxt->disableSAX = 1;
4417 }
4418 SKIP_BLANKS;
4419 *result = tree;
4420 return(res);
4421}
4422
4423/**
4424 * xmlParseElementDecl:
4425 * @ctxt: an XML parser context
4426 *
4427 * parse an Element declaration.
4428 *
4429 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4430 *
4431 * [ VC: Unique Element Type Declaration ]
4432 * No element type may be declared more than once
4433 *
4434 * Returns the type of the element, or -1 in case of error
4435 */
4436int
4437xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4438 xmlChar *name;
4439 int ret = -1;
4440 xmlElementContentPtr content = NULL;
4441
4442 GROW;
4443 if ((RAW == '<') && (NXT(1) == '!') &&
4444 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4445 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4446 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4447 (NXT(8) == 'T')) {
4448 xmlParserInputPtr input = ctxt->input;
4449
4450 SKIP(9);
4451 if (!IS_BLANK(CUR)) {
4452 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4454 ctxt->sax->error(ctxt->userData,
4455 "Space required after 'ELEMENT'\n");
4456 ctxt->wellFormed = 0;
4457 ctxt->disableSAX = 1;
4458 }
4459 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004460 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004461 if (name == NULL) {
4462 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4463 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4464 ctxt->sax->error(ctxt->userData,
4465 "xmlParseElementDecl: no name for Element\n");
4466 ctxt->wellFormed = 0;
4467 ctxt->disableSAX = 1;
4468 return(-1);
4469 }
4470 while ((RAW == 0) && (ctxt->inputNr > 1))
4471 xmlPopInput(ctxt);
4472 if (!IS_BLANK(CUR)) {
4473 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4475 ctxt->sax->error(ctxt->userData,
4476 "Space required after the element name\n");
4477 ctxt->wellFormed = 0;
4478 ctxt->disableSAX = 1;
4479 }
4480 SKIP_BLANKS;
4481 if ((RAW == 'E') && (NXT(1) == 'M') &&
4482 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4483 (NXT(4) == 'Y')) {
4484 SKIP(5);
4485 /*
4486 * Element must always be empty.
4487 */
4488 ret = XML_ELEMENT_TYPE_EMPTY;
4489 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4490 (NXT(2) == 'Y')) {
4491 SKIP(3);
4492 /*
4493 * Element is a generic container.
4494 */
4495 ret = XML_ELEMENT_TYPE_ANY;
4496 } else if (RAW == '(') {
4497 ret = xmlParseElementContentDecl(ctxt, name, &content);
4498 } else {
4499 /*
4500 * [ WFC: PEs in Internal Subset ] error handling.
4501 */
4502 if ((RAW == '%') && (ctxt->external == 0) &&
4503 (ctxt->inputNr == 1)) {
4504 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4506 ctxt->sax->error(ctxt->userData,
4507 "PEReference: forbidden within markup decl in internal subset\n");
4508 } else {
4509 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4511 ctxt->sax->error(ctxt->userData,
4512 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4513 }
4514 ctxt->wellFormed = 0;
4515 ctxt->disableSAX = 1;
4516 if (name != NULL) xmlFree(name);
4517 return(-1);
4518 }
4519
4520 SKIP_BLANKS;
4521 /*
4522 * Pop-up of finished entities.
4523 */
4524 while ((RAW == 0) && (ctxt->inputNr > 1))
4525 xmlPopInput(ctxt);
4526 SKIP_BLANKS;
4527
4528 if (RAW != '>') {
4529 ctxt->errNo = XML_ERR_GT_REQUIRED;
4530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4531 ctxt->sax->error(ctxt->userData,
4532 "xmlParseElementDecl: expected '>' at the end\n");
4533 ctxt->wellFormed = 0;
4534 ctxt->disableSAX = 1;
4535 } else {
4536 if (input != ctxt->input) {
4537 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4539 ctxt->sax->error(ctxt->userData,
4540"Element declaration doesn't start and stop in the same entity\n");
4541 ctxt->wellFormed = 0;
4542 ctxt->disableSAX = 1;
4543 }
4544
4545 NEXT;
4546 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4547 (ctxt->sax->elementDecl != NULL))
4548 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4549 content);
4550 }
4551 if (content != NULL) {
4552 xmlFreeElementContent(content);
4553 }
4554 if (name != NULL) {
4555 xmlFree(name);
4556 }
4557 }
4558 return(ret);
4559}
4560
4561/**
Owen Taylor3473f882001-02-23 17:55:21 +00004562 * xmlParseConditionalSections
4563 * @ctxt: an XML parser context
4564 *
4565 * [61] conditionalSect ::= includeSect | ignoreSect
4566 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4567 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4568 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4569 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4570 */
4571
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004572static void
Owen Taylor3473f882001-02-23 17:55:21 +00004573xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4574 SKIP(3);
4575 SKIP_BLANKS;
4576 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4577 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4578 (NXT(6) == 'E')) {
4579 SKIP(7);
4580 SKIP_BLANKS;
4581 if (RAW != '[') {
4582 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4584 ctxt->sax->error(ctxt->userData,
4585 "XML conditional section '[' expected\n");
4586 ctxt->wellFormed = 0;
4587 ctxt->disableSAX = 1;
4588 } else {
4589 NEXT;
4590 }
4591 if (xmlParserDebugEntities) {
4592 if ((ctxt->input != NULL) && (ctxt->input->filename))
4593 xmlGenericError(xmlGenericErrorContext,
4594 "%s(%d): ", ctxt->input->filename,
4595 ctxt->input->line);
4596 xmlGenericError(xmlGenericErrorContext,
4597 "Entering INCLUDE Conditional Section\n");
4598 }
4599
4600 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4601 (NXT(2) != '>'))) {
4602 const xmlChar *check = CUR_PTR;
4603 int cons = ctxt->input->consumed;
4604 int tok = ctxt->token;
4605
4606 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4607 xmlParseConditionalSections(ctxt);
4608 } else if (IS_BLANK(CUR)) {
4609 NEXT;
4610 } else if (RAW == '%') {
4611 xmlParsePEReference(ctxt);
4612 } else
4613 xmlParseMarkupDecl(ctxt);
4614
4615 /*
4616 * Pop-up of finished entities.
4617 */
4618 while ((RAW == 0) && (ctxt->inputNr > 1))
4619 xmlPopInput(ctxt);
4620
4621 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4622 (tok == ctxt->token)) {
4623 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4625 ctxt->sax->error(ctxt->userData,
4626 "Content error in the external subset\n");
4627 ctxt->wellFormed = 0;
4628 ctxt->disableSAX = 1;
4629 break;
4630 }
4631 }
4632 if (xmlParserDebugEntities) {
4633 if ((ctxt->input != NULL) && (ctxt->input->filename))
4634 xmlGenericError(xmlGenericErrorContext,
4635 "%s(%d): ", ctxt->input->filename,
4636 ctxt->input->line);
4637 xmlGenericError(xmlGenericErrorContext,
4638 "Leaving INCLUDE Conditional Section\n");
4639 }
4640
4641 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4642 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4643 int state;
4644 int instate;
4645 int depth = 0;
4646
4647 SKIP(6);
4648 SKIP_BLANKS;
4649 if (RAW != '[') {
4650 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4652 ctxt->sax->error(ctxt->userData,
4653 "XML conditional section '[' expected\n");
4654 ctxt->wellFormed = 0;
4655 ctxt->disableSAX = 1;
4656 } else {
4657 NEXT;
4658 }
4659 if (xmlParserDebugEntities) {
4660 if ((ctxt->input != NULL) && (ctxt->input->filename))
4661 xmlGenericError(xmlGenericErrorContext,
4662 "%s(%d): ", ctxt->input->filename,
4663 ctxt->input->line);
4664 xmlGenericError(xmlGenericErrorContext,
4665 "Entering IGNORE Conditional Section\n");
4666 }
4667
4668 /*
4669 * Parse up to the end of the conditionnal section
4670 * But disable SAX event generating DTD building in the meantime
4671 */
4672 state = ctxt->disableSAX;
4673 instate = ctxt->instate;
4674 ctxt->disableSAX = 1;
4675 ctxt->instate = XML_PARSER_IGNORE;
4676
4677 while (depth >= 0) {
4678 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4679 depth++;
4680 SKIP(3);
4681 continue;
4682 }
4683 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4684 if (--depth >= 0) SKIP(3);
4685 continue;
4686 }
4687 NEXT;
4688 continue;
4689 }
4690
4691 ctxt->disableSAX = state;
4692 ctxt->instate = instate;
4693
4694 if (xmlParserDebugEntities) {
4695 if ((ctxt->input != NULL) && (ctxt->input->filename))
4696 xmlGenericError(xmlGenericErrorContext,
4697 "%s(%d): ", ctxt->input->filename,
4698 ctxt->input->line);
4699 xmlGenericError(xmlGenericErrorContext,
4700 "Leaving IGNORE Conditional Section\n");
4701 }
4702
4703 } else {
4704 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4706 ctxt->sax->error(ctxt->userData,
4707 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4708 ctxt->wellFormed = 0;
4709 ctxt->disableSAX = 1;
4710 }
4711
4712 if (RAW == 0)
4713 SHRINK;
4714
4715 if (RAW == 0) {
4716 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4717 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4718 ctxt->sax->error(ctxt->userData,
4719 "XML conditional section not closed\n");
4720 ctxt->wellFormed = 0;
4721 ctxt->disableSAX = 1;
4722 } else {
4723 SKIP(3);
4724 }
4725}
4726
4727/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004728 * xmlParseMarkupDecl:
4729 * @ctxt: an XML parser context
4730 *
4731 * parse Markup declarations
4732 *
4733 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4734 * NotationDecl | PI | Comment
4735 *
4736 * [ VC: Proper Declaration/PE Nesting ]
4737 * Parameter-entity replacement text must be properly nested with
4738 * markup declarations. That is to say, if either the first character
4739 * or the last character of a markup declaration (markupdecl above) is
4740 * contained in the replacement text for a parameter-entity reference,
4741 * both must be contained in the same replacement text.
4742 *
4743 * [ WFC: PEs in Internal Subset ]
4744 * In the internal DTD subset, parameter-entity references can occur
4745 * only where markup declarations can occur, not within markup declarations.
4746 * (This does not apply to references that occur in external parameter
4747 * entities or to the external subset.)
4748 */
4749void
4750xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4751 GROW;
4752 xmlParseElementDecl(ctxt);
4753 xmlParseAttributeListDecl(ctxt);
4754 xmlParseEntityDecl(ctxt);
4755 xmlParseNotationDecl(ctxt);
4756 xmlParsePI(ctxt);
4757 xmlParseComment(ctxt);
4758 /*
4759 * This is only for internal subset. On external entities,
4760 * the replacement is done before parsing stage
4761 */
4762 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4763 xmlParsePEReference(ctxt);
4764
4765 /*
4766 * Conditional sections are allowed from entities included
4767 * by PE References in the internal subset.
4768 */
4769 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4770 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4771 xmlParseConditionalSections(ctxt);
4772 }
4773 }
4774
4775 ctxt->instate = XML_PARSER_DTD;
4776}
4777
4778/**
4779 * xmlParseTextDecl:
4780 * @ctxt: an XML parser context
4781 *
4782 * parse an XML declaration header for external entities
4783 *
4784 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4785 *
4786 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4787 */
4788
4789void
4790xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4791 xmlChar *version;
4792
4793 /*
4794 * We know that '<?xml' is here.
4795 */
4796 if ((RAW == '<') && (NXT(1) == '?') &&
4797 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4798 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4799 SKIP(5);
4800 } else {
4801 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4803 ctxt->sax->error(ctxt->userData,
4804 "Text declaration '<?xml' required\n");
4805 ctxt->wellFormed = 0;
4806 ctxt->disableSAX = 1;
4807
4808 return;
4809 }
4810
4811 if (!IS_BLANK(CUR)) {
4812 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4814 ctxt->sax->error(ctxt->userData,
4815 "Space needed after '<?xml'\n");
4816 ctxt->wellFormed = 0;
4817 ctxt->disableSAX = 1;
4818 }
4819 SKIP_BLANKS;
4820
4821 /*
4822 * We may have the VersionInfo here.
4823 */
4824 version = xmlParseVersionInfo(ctxt);
4825 if (version == NULL)
4826 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4827 ctxt->input->version = version;
4828
4829 /*
4830 * We must have the encoding declaration
4831 */
4832 if (!IS_BLANK(CUR)) {
4833 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4834 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4835 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4836 ctxt->wellFormed = 0;
4837 ctxt->disableSAX = 1;
4838 }
4839 xmlParseEncodingDecl(ctxt);
4840 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4841 /*
4842 * The XML REC instructs us to stop parsing right here
4843 */
4844 return;
4845 }
4846
4847 SKIP_BLANKS;
4848 if ((RAW == '?') && (NXT(1) == '>')) {
4849 SKIP(2);
4850 } else if (RAW == '>') {
4851 /* Deprecated old WD ... */
4852 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4854 ctxt->sax->error(ctxt->userData,
4855 "XML declaration must end-up with '?>'\n");
4856 ctxt->wellFormed = 0;
4857 ctxt->disableSAX = 1;
4858 NEXT;
4859 } else {
4860 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4862 ctxt->sax->error(ctxt->userData,
4863 "parsing XML declaration: '?>' expected\n");
4864 ctxt->wellFormed = 0;
4865 ctxt->disableSAX = 1;
4866 MOVETO_ENDTAG(CUR_PTR);
4867 NEXT;
4868 }
4869}
4870
4871/**
Owen Taylor3473f882001-02-23 17:55:21 +00004872 * xmlParseExternalSubset:
4873 * @ctxt: an XML parser context
4874 * @ExternalID: the external identifier
4875 * @SystemID: the system identifier (or URL)
4876 *
4877 * parse Markup declarations from an external subset
4878 *
4879 * [30] extSubset ::= textDecl? extSubsetDecl
4880 *
4881 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4882 */
4883void
4884xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4885 const xmlChar *SystemID) {
4886 GROW;
4887 if ((RAW == '<') && (NXT(1) == '?') &&
4888 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4889 (NXT(4) == 'l')) {
4890 xmlParseTextDecl(ctxt);
4891 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4892 /*
4893 * The XML REC instructs us to stop parsing right here
4894 */
4895 ctxt->instate = XML_PARSER_EOF;
4896 return;
4897 }
4898 }
4899 if (ctxt->myDoc == NULL) {
4900 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4901 }
4902 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4903 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4904
4905 ctxt->instate = XML_PARSER_DTD;
4906 ctxt->external = 1;
4907 while (((RAW == '<') && (NXT(1) == '?')) ||
4908 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00004909 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004910 const xmlChar *check = CUR_PTR;
4911 int cons = ctxt->input->consumed;
4912 int tok = ctxt->token;
4913
4914 GROW;
4915 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4916 xmlParseConditionalSections(ctxt);
4917 } else if (IS_BLANK(CUR)) {
4918 NEXT;
4919 } else if (RAW == '%') {
4920 xmlParsePEReference(ctxt);
4921 } else
4922 xmlParseMarkupDecl(ctxt);
4923
4924 /*
4925 * Pop-up of finished entities.
4926 */
4927 while ((RAW == 0) && (ctxt->inputNr > 1))
4928 xmlPopInput(ctxt);
4929
4930 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4931 (tok == ctxt->token)) {
4932 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4934 ctxt->sax->error(ctxt->userData,
4935 "Content error in the external subset\n");
4936 ctxt->wellFormed = 0;
4937 ctxt->disableSAX = 1;
4938 break;
4939 }
4940 }
4941
4942 if (RAW != 0) {
4943 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4945 ctxt->sax->error(ctxt->userData,
4946 "Extra content at the end of the document\n");
4947 ctxt->wellFormed = 0;
4948 ctxt->disableSAX = 1;
4949 }
4950
4951}
4952
4953/**
4954 * xmlParseReference:
4955 * @ctxt: an XML parser context
4956 *
4957 * parse and handle entity references in content, depending on the SAX
4958 * interface, this may end-up in a call to character() if this is a
4959 * CharRef, a predefined entity, if there is no reference() callback.
4960 * or if the parser was asked to switch to that mode.
4961 *
4962 * [67] Reference ::= EntityRef | CharRef
4963 */
4964void
4965xmlParseReference(xmlParserCtxtPtr ctxt) {
4966 xmlEntityPtr ent;
4967 xmlChar *val;
4968 if (RAW != '&') return;
4969
4970 if (NXT(1) == '#') {
4971 int i = 0;
4972 xmlChar out[10];
4973 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004974 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004975
4976 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4977 /*
4978 * So we are using non-UTF-8 buffers
4979 * Check that the char fit on 8bits, if not
4980 * generate a CharRef.
4981 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004982 if (value <= 0xFF) {
4983 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004984 out[1] = 0;
4985 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4986 (!ctxt->disableSAX))
4987 ctxt->sax->characters(ctxt->userData, out, 1);
4988 } else {
4989 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004990 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004991 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004992 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004993 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4994 (!ctxt->disableSAX))
4995 ctxt->sax->reference(ctxt->userData, out);
4996 }
4997 } else {
4998 /*
4999 * Just encode the value in UTF-8
5000 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005001 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005002 out[i] = 0;
5003 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5004 (!ctxt->disableSAX))
5005 ctxt->sax->characters(ctxt->userData, out, i);
5006 }
5007 } else {
5008 ent = xmlParseEntityRef(ctxt);
5009 if (ent == NULL) return;
5010 if ((ent->name != NULL) &&
5011 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5012 xmlNodePtr list = NULL;
5013 int ret;
5014
5015
5016 /*
5017 * The first reference to the entity trigger a parsing phase
5018 * where the ent->children is filled with the result from
5019 * the parsing.
5020 */
5021 if (ent->children == NULL) {
5022 xmlChar *value;
5023 value = ent->content;
5024
5025 /*
5026 * Check that this entity is well formed
5027 */
5028 if ((value != NULL) &&
5029 (value[1] == 0) && (value[0] == '<') &&
5030 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5031 /*
5032 * DONE: get definite answer on this !!!
5033 * Lots of entity decls are used to declare a single
5034 * char
5035 * <!ENTITY lt "<">
5036 * Which seems to be valid since
5037 * 2.4: The ampersand character (&) and the left angle
5038 * bracket (<) may appear in their literal form only
5039 * when used ... They are also legal within the literal
5040 * entity value of an internal entity declaration;i
5041 * see "4.3.2 Well-Formed Parsed Entities".
5042 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5043 * Looking at the OASIS test suite and James Clark
5044 * tests, this is broken. However the XML REC uses
5045 * it. Is the XML REC not well-formed ????
5046 * This is a hack to avoid this problem
5047 *
5048 * ANSWER: since lt gt amp .. are already defined,
5049 * this is a redefinition and hence the fact that the
5050 * contentis not well balanced is not a Wf error, this
5051 * is lousy but acceptable.
5052 */
5053 list = xmlNewDocText(ctxt->myDoc, value);
5054 if (list != NULL) {
5055 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5056 (ent->children == NULL)) {
5057 ent->children = list;
5058 ent->last = list;
5059 list->parent = (xmlNodePtr) ent;
5060 } else {
5061 xmlFreeNodeList(list);
5062 }
5063 } else if (list != NULL) {
5064 xmlFreeNodeList(list);
5065 }
5066 } else {
5067 /*
5068 * 4.3.2: An internal general parsed entity is well-formed
5069 * if its replacement text matches the production labeled
5070 * content.
5071 */
5072 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5073 ctxt->depth++;
5074 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5075 ctxt->sax, NULL, ctxt->depth,
5076 value, &list);
5077 ctxt->depth--;
5078 } else if (ent->etype ==
5079 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5080 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005081 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005082 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005083 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005084 ctxt->depth--;
5085 } else {
5086 ret = -1;
5087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5088 ctxt->sax->error(ctxt->userData,
5089 "Internal: invalid entity type\n");
5090 }
5091 if (ret == XML_ERR_ENTITY_LOOP) {
5092 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5094 ctxt->sax->error(ctxt->userData,
5095 "Detected entity reference loop\n");
5096 ctxt->wellFormed = 0;
5097 ctxt->disableSAX = 1;
5098 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005099 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5100 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005101 (ent->children == NULL)) {
5102 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005103 if (ctxt->replaceEntities) {
5104 /*
5105 * Prune it directly in the generated document
5106 * except for single text nodes.
5107 */
5108 if ((list->type == XML_TEXT_NODE) &&
5109 (list->next == NULL)) {
5110 list->parent = (xmlNodePtr) ent;
5111 list = NULL;
5112 } else {
5113 while (list != NULL) {
5114 list->parent = (xmlNodePtr) ctxt->node;
5115 if (list->next == NULL)
5116 ent->last = list;
5117 list = list->next;
5118 }
5119 list = ent->children;
5120 }
5121 } else {
5122 while (list != NULL) {
5123 list->parent = (xmlNodePtr) ent;
5124 if (list->next == NULL)
5125 ent->last = list;
5126 list = list->next;
5127 }
Owen Taylor3473f882001-02-23 17:55:21 +00005128 }
5129 } else {
5130 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005131 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005132 }
5133 } else if (ret > 0) {
5134 ctxt->errNo = ret;
5135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5136 ctxt->sax->error(ctxt->userData,
5137 "Entity value required\n");
5138 ctxt->wellFormed = 0;
5139 ctxt->disableSAX = 1;
5140 } else if (list != NULL) {
5141 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005142 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005143 }
5144 }
5145 }
5146 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5147 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5148 /*
5149 * Create a node.
5150 */
5151 ctxt->sax->reference(ctxt->userData, ent->name);
5152 return;
5153 } else if (ctxt->replaceEntities) {
5154 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5155 /*
5156 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005157 * a simple tree copy for all references except the first
5158 * In the first occurence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005159 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005160 if (list == NULL) {
5161 xmlNodePtr new, cur;
5162 cur = ent->children;
5163 while (cur != NULL) {
5164 new = xmlCopyNode(cur, 1);
5165 xmlAddChild(ctxt->node, new);
5166 if (cur == ent->last)
5167 break;
5168 cur = cur->next;
5169 }
5170 } else {
5171 /*
5172 * the name change is to avoid coalescing of the
5173 * node with a prossible previous text one which
5174 * would make ent->children a dandling pointer
5175 */
5176 if (ent->children->type == XML_TEXT_NODE)
5177 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5178 if ((ent->last != ent->children) &&
5179 (ent->last->type == XML_TEXT_NODE))
5180 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5181 xmlAddChildList(ctxt->node, ent->children);
5182 }
5183
Owen Taylor3473f882001-02-23 17:55:21 +00005184 /*
5185 * This is to avoid a nasty side effect, see
5186 * characters() in SAX.c
5187 */
5188 ctxt->nodemem = 0;
5189 ctxt->nodelen = 0;
5190 return;
5191 } else {
5192 /*
5193 * Probably running in SAX mode
5194 */
5195 xmlParserInputPtr input;
5196
5197 input = xmlNewEntityInputStream(ctxt, ent);
5198 xmlPushInput(ctxt, input);
5199 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5200 (RAW == '<') && (NXT(1) == '?') &&
5201 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5202 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5203 xmlParseTextDecl(ctxt);
5204 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5205 /*
5206 * The XML REC instructs us to stop parsing right here
5207 */
5208 ctxt->instate = XML_PARSER_EOF;
5209 return;
5210 }
5211 if (input->standalone == 1) {
5212 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5213 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5214 ctxt->sax->error(ctxt->userData,
5215 "external parsed entities cannot be standalone\n");
5216 ctxt->wellFormed = 0;
5217 ctxt->disableSAX = 1;
5218 }
5219 }
5220 return;
5221 }
5222 }
5223 } else {
5224 val = ent->content;
5225 if (val == NULL) return;
5226 /*
5227 * inline the entity.
5228 */
5229 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5230 (!ctxt->disableSAX))
5231 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5232 }
5233 }
5234}
5235
5236/**
5237 * xmlParseEntityRef:
5238 * @ctxt: an XML parser context
5239 *
5240 * parse ENTITY references declarations
5241 *
5242 * [68] EntityRef ::= '&' Name ';'
5243 *
5244 * [ WFC: Entity Declared ]
5245 * In a document without any DTD, a document with only an internal DTD
5246 * subset which contains no parameter entity references, or a document
5247 * with "standalone='yes'", the Name given in the entity reference
5248 * must match that in an entity declaration, except that well-formed
5249 * documents need not declare any of the following entities: amp, lt,
5250 * gt, apos, quot. The declaration of a parameter entity must precede
5251 * any reference to it. Similarly, the declaration of a general entity
5252 * must precede any reference to it which appears in a default value in an
5253 * attribute-list declaration. Note that if entities are declared in the
5254 * external subset or in external parameter entities, a non-validating
5255 * processor is not obligated to read and process their declarations;
5256 * for such documents, the rule that an entity must be declared is a
5257 * well-formedness constraint only if standalone='yes'.
5258 *
5259 * [ WFC: Parsed Entity ]
5260 * An entity reference must not contain the name of an unparsed entity
5261 *
5262 * Returns the xmlEntityPtr if found, or NULL otherwise.
5263 */
5264xmlEntityPtr
5265xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5266 xmlChar *name;
5267 xmlEntityPtr ent = NULL;
5268
5269 GROW;
5270
5271 if (RAW == '&') {
5272 NEXT;
5273 name = xmlParseName(ctxt);
5274 if (name == NULL) {
5275 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5277 ctxt->sax->error(ctxt->userData,
5278 "xmlParseEntityRef: no name\n");
5279 ctxt->wellFormed = 0;
5280 ctxt->disableSAX = 1;
5281 } else {
5282 if (RAW == ';') {
5283 NEXT;
5284 /*
5285 * Ask first SAX for entity resolution, otherwise try the
5286 * predefined set.
5287 */
5288 if (ctxt->sax != NULL) {
5289 if (ctxt->sax->getEntity != NULL)
5290 ent = ctxt->sax->getEntity(ctxt->userData, name);
5291 if (ent == NULL)
5292 ent = xmlGetPredefinedEntity(name);
5293 }
5294 /*
5295 * [ WFC: Entity Declared ]
5296 * In a document without any DTD, a document with only an
5297 * internal DTD subset which contains no parameter entity
5298 * references, or a document with "standalone='yes'", the
5299 * Name given in the entity reference must match that in an
5300 * entity declaration, except that well-formed documents
5301 * need not declare any of the following entities: amp, lt,
5302 * gt, apos, quot.
5303 * The declaration of a parameter entity must precede any
5304 * reference to it.
5305 * Similarly, the declaration of a general entity must
5306 * precede any reference to it which appears in a default
5307 * value in an attribute-list declaration. Note that if
5308 * entities are declared in the external subset or in
5309 * external parameter entities, a non-validating processor
5310 * is not obligated to read and process their declarations;
5311 * for such documents, the rule that an entity must be
5312 * declared is a well-formedness constraint only if
5313 * standalone='yes'.
5314 */
5315 if (ent == NULL) {
5316 if ((ctxt->standalone == 1) ||
5317 ((ctxt->hasExternalSubset == 0) &&
5318 (ctxt->hasPErefs == 0))) {
5319 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5320 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5321 ctxt->sax->error(ctxt->userData,
5322 "Entity '%s' not defined\n", name);
5323 ctxt->wellFormed = 0;
5324 ctxt->disableSAX = 1;
5325 } else {
5326 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5327 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005328 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005329 "Entity '%s' not defined\n", name);
5330 }
5331 }
5332
5333 /*
5334 * [ WFC: Parsed Entity ]
5335 * An entity reference must not contain the name of an
5336 * unparsed entity
5337 */
5338 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5339 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5341 ctxt->sax->error(ctxt->userData,
5342 "Entity reference to unparsed entity %s\n", name);
5343 ctxt->wellFormed = 0;
5344 ctxt->disableSAX = 1;
5345 }
5346
5347 /*
5348 * [ WFC: No External Entity References ]
5349 * Attribute values cannot contain direct or indirect
5350 * entity references to external entities.
5351 */
5352 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5353 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5354 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5356 ctxt->sax->error(ctxt->userData,
5357 "Attribute references external entity '%s'\n", name);
5358 ctxt->wellFormed = 0;
5359 ctxt->disableSAX = 1;
5360 }
5361 /*
5362 * [ WFC: No < in Attribute Values ]
5363 * The replacement text of any entity referred to directly or
5364 * indirectly in an attribute value (other than "&lt;") must
5365 * not contain a <.
5366 */
5367 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5368 (ent != NULL) &&
5369 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5370 (ent->content != NULL) &&
5371 (xmlStrchr(ent->content, '<'))) {
5372 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5374 ctxt->sax->error(ctxt->userData,
5375 "'<' in entity '%s' is not allowed in attributes values\n", name);
5376 ctxt->wellFormed = 0;
5377 ctxt->disableSAX = 1;
5378 }
5379
5380 /*
5381 * Internal check, no parameter entities here ...
5382 */
5383 else {
5384 switch (ent->etype) {
5385 case XML_INTERNAL_PARAMETER_ENTITY:
5386 case XML_EXTERNAL_PARAMETER_ENTITY:
5387 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5389 ctxt->sax->error(ctxt->userData,
5390 "Attempt to reference the parameter entity '%s'\n", name);
5391 ctxt->wellFormed = 0;
5392 ctxt->disableSAX = 1;
5393 break;
5394 default:
5395 break;
5396 }
5397 }
5398
5399 /*
5400 * [ WFC: No Recursion ]
5401 * A parsed entity must not contain a recursive reference
5402 * to itself, either directly or indirectly.
5403 * Done somewhere else
5404 */
5405
5406 } else {
5407 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5409 ctxt->sax->error(ctxt->userData,
5410 "xmlParseEntityRef: expecting ';'\n");
5411 ctxt->wellFormed = 0;
5412 ctxt->disableSAX = 1;
5413 }
5414 xmlFree(name);
5415 }
5416 }
5417 return(ent);
5418}
5419
5420/**
5421 * xmlParseStringEntityRef:
5422 * @ctxt: an XML parser context
5423 * @str: a pointer to an index in the string
5424 *
5425 * parse ENTITY references declarations, but this version parses it from
5426 * a string value.
5427 *
5428 * [68] EntityRef ::= '&' Name ';'
5429 *
5430 * [ WFC: Entity Declared ]
5431 * In a document without any DTD, a document with only an internal DTD
5432 * subset which contains no parameter entity references, or a document
5433 * with "standalone='yes'", the Name given in the entity reference
5434 * must match that in an entity declaration, except that well-formed
5435 * documents need not declare any of the following entities: amp, lt,
5436 * gt, apos, quot. The declaration of a parameter entity must precede
5437 * any reference to it. Similarly, the declaration of a general entity
5438 * must precede any reference to it which appears in a default value in an
5439 * attribute-list declaration. Note that if entities are declared in the
5440 * external subset or in external parameter entities, a non-validating
5441 * processor is not obligated to read and process their declarations;
5442 * for such documents, the rule that an entity must be declared is a
5443 * well-formedness constraint only if standalone='yes'.
5444 *
5445 * [ WFC: Parsed Entity ]
5446 * An entity reference must not contain the name of an unparsed entity
5447 *
5448 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5449 * is updated to the current location in the string.
5450 */
5451xmlEntityPtr
5452xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5453 xmlChar *name;
5454 const xmlChar *ptr;
5455 xmlChar cur;
5456 xmlEntityPtr ent = NULL;
5457
5458 if ((str == NULL) || (*str == NULL))
5459 return(NULL);
5460 ptr = *str;
5461 cur = *ptr;
5462 if (cur == '&') {
5463 ptr++;
5464 cur = *ptr;
5465 name = xmlParseStringName(ctxt, &ptr);
5466 if (name == NULL) {
5467 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5469 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005470 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005471 ctxt->wellFormed = 0;
5472 ctxt->disableSAX = 1;
5473 } else {
5474 if (*ptr == ';') {
5475 ptr++;
5476 /*
5477 * Ask first SAX for entity resolution, otherwise try the
5478 * predefined set.
5479 */
5480 if (ctxt->sax != NULL) {
5481 if (ctxt->sax->getEntity != NULL)
5482 ent = ctxt->sax->getEntity(ctxt->userData, name);
5483 if (ent == NULL)
5484 ent = xmlGetPredefinedEntity(name);
5485 }
5486 /*
5487 * [ WFC: Entity Declared ]
5488 * In a document without any DTD, a document with only an
5489 * internal DTD subset which contains no parameter entity
5490 * references, or a document with "standalone='yes'", the
5491 * Name given in the entity reference must match that in an
5492 * entity declaration, except that well-formed documents
5493 * need not declare any of the following entities: amp, lt,
5494 * gt, apos, quot.
5495 * The declaration of a parameter entity must precede any
5496 * reference to it.
5497 * Similarly, the declaration of a general entity must
5498 * precede any reference to it which appears in a default
5499 * value in an attribute-list declaration. Note that if
5500 * entities are declared in the external subset or in
5501 * external parameter entities, a non-validating processor
5502 * is not obligated to read and process their declarations;
5503 * for such documents, the rule that an entity must be
5504 * declared is a well-formedness constraint only if
5505 * standalone='yes'.
5506 */
5507 if (ent == NULL) {
5508 if ((ctxt->standalone == 1) ||
5509 ((ctxt->hasExternalSubset == 0) &&
5510 (ctxt->hasPErefs == 0))) {
5511 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5513 ctxt->sax->error(ctxt->userData,
5514 "Entity '%s' not defined\n", name);
5515 ctxt->wellFormed = 0;
5516 ctxt->disableSAX = 1;
5517 } else {
5518 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5519 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5520 ctxt->sax->warning(ctxt->userData,
5521 "Entity '%s' not defined\n", name);
5522 }
5523 }
5524
5525 /*
5526 * [ WFC: Parsed Entity ]
5527 * An entity reference must not contain the name of an
5528 * unparsed entity
5529 */
5530 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5531 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5533 ctxt->sax->error(ctxt->userData,
5534 "Entity reference to unparsed entity %s\n", name);
5535 ctxt->wellFormed = 0;
5536 ctxt->disableSAX = 1;
5537 }
5538
5539 /*
5540 * [ WFC: No External Entity References ]
5541 * Attribute values cannot contain direct or indirect
5542 * entity references to external entities.
5543 */
5544 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5545 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5546 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5548 ctxt->sax->error(ctxt->userData,
5549 "Attribute references external entity '%s'\n", name);
5550 ctxt->wellFormed = 0;
5551 ctxt->disableSAX = 1;
5552 }
5553 /*
5554 * [ WFC: No < in Attribute Values ]
5555 * The replacement text of any entity referred to directly or
5556 * indirectly in an attribute value (other than "&lt;") must
5557 * not contain a <.
5558 */
5559 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5560 (ent != NULL) &&
5561 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5562 (ent->content != NULL) &&
5563 (xmlStrchr(ent->content, '<'))) {
5564 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5565 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5566 ctxt->sax->error(ctxt->userData,
5567 "'<' in entity '%s' is not allowed in attributes values\n", name);
5568 ctxt->wellFormed = 0;
5569 ctxt->disableSAX = 1;
5570 }
5571
5572 /*
5573 * Internal check, no parameter entities here ...
5574 */
5575 else {
5576 switch (ent->etype) {
5577 case XML_INTERNAL_PARAMETER_ENTITY:
5578 case XML_EXTERNAL_PARAMETER_ENTITY:
5579 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5581 ctxt->sax->error(ctxt->userData,
5582 "Attempt to reference the parameter entity '%s'\n", name);
5583 ctxt->wellFormed = 0;
5584 ctxt->disableSAX = 1;
5585 break;
5586 default:
5587 break;
5588 }
5589 }
5590
5591 /*
5592 * [ WFC: No Recursion ]
5593 * A parsed entity must not contain a recursive reference
5594 * to itself, either directly or indirectly.
5595 * Done somewhwere else
5596 */
5597
5598 } else {
5599 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5601 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005602 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005603 ctxt->wellFormed = 0;
5604 ctxt->disableSAX = 1;
5605 }
5606 xmlFree(name);
5607 }
5608 }
5609 *str = ptr;
5610 return(ent);
5611}
5612
5613/**
5614 * xmlParsePEReference:
5615 * @ctxt: an XML parser context
5616 *
5617 * parse PEReference declarations
5618 * The entity content is handled directly by pushing it's content as
5619 * a new input stream.
5620 *
5621 * [69] PEReference ::= '%' Name ';'
5622 *
5623 * [ WFC: No Recursion ]
5624 * A parsed entity must not contain a recursive
5625 * reference to itself, either directly or indirectly.
5626 *
5627 * [ WFC: Entity Declared ]
5628 * In a document without any DTD, a document with only an internal DTD
5629 * subset which contains no parameter entity references, or a document
5630 * with "standalone='yes'", ... ... The declaration of a parameter
5631 * entity must precede any reference to it...
5632 *
5633 * [ VC: Entity Declared ]
5634 * In a document with an external subset or external parameter entities
5635 * with "standalone='no'", ... ... The declaration of a parameter entity
5636 * must precede any reference to it...
5637 *
5638 * [ WFC: In DTD ]
5639 * Parameter-entity references may only appear in the DTD.
5640 * NOTE: misleading but this is handled.
5641 */
5642void
5643xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5644 xmlChar *name;
5645 xmlEntityPtr entity = NULL;
5646 xmlParserInputPtr input;
5647
5648 if (RAW == '%') {
5649 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005650 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005651 if (name == NULL) {
5652 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5654 ctxt->sax->error(ctxt->userData,
5655 "xmlParsePEReference: no name\n");
5656 ctxt->wellFormed = 0;
5657 ctxt->disableSAX = 1;
5658 } else {
5659 if (RAW == ';') {
5660 NEXT;
5661 if ((ctxt->sax != NULL) &&
5662 (ctxt->sax->getParameterEntity != NULL))
5663 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5664 name);
5665 if (entity == NULL) {
5666 /*
5667 * [ WFC: Entity Declared ]
5668 * In a document without any DTD, a document with only an
5669 * internal DTD subset which contains no parameter entity
5670 * references, or a document with "standalone='yes'", ...
5671 * ... The declaration of a parameter entity must precede
5672 * any reference to it...
5673 */
5674 if ((ctxt->standalone == 1) ||
5675 ((ctxt->hasExternalSubset == 0) &&
5676 (ctxt->hasPErefs == 0))) {
5677 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5678 if ((!ctxt->disableSAX) &&
5679 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5680 ctxt->sax->error(ctxt->userData,
5681 "PEReference: %%%s; not found\n", name);
5682 ctxt->wellFormed = 0;
5683 ctxt->disableSAX = 1;
5684 } else {
5685 /*
5686 * [ VC: Entity Declared ]
5687 * In a document with an external subset or external
5688 * parameter entities with "standalone='no'", ...
5689 * ... The declaration of a parameter entity must precede
5690 * any reference to it...
5691 */
5692 if ((!ctxt->disableSAX) &&
5693 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5694 ctxt->sax->warning(ctxt->userData,
5695 "PEReference: %%%s; not found\n", name);
5696 ctxt->valid = 0;
5697 }
5698 } else {
5699 /*
5700 * Internal checking in case the entity quest barfed
5701 */
5702 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5703 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5704 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5705 ctxt->sax->warning(ctxt->userData,
5706 "Internal: %%%s; is not a parameter entity\n", name);
5707 } else {
5708 /*
5709 * TODO !!!
5710 * handle the extra spaces added before and after
5711 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5712 */
5713 input = xmlNewEntityInputStream(ctxt, entity);
5714 xmlPushInput(ctxt, input);
5715 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5716 (RAW == '<') && (NXT(1) == '?') &&
5717 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5718 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5719 xmlParseTextDecl(ctxt);
5720 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5721 /*
5722 * The XML REC instructs us to stop parsing
5723 * right here
5724 */
5725 ctxt->instate = XML_PARSER_EOF;
5726 xmlFree(name);
5727 return;
5728 }
5729 }
5730 if (ctxt->token == 0)
5731 ctxt->token = ' ';
5732 }
5733 }
5734 ctxt->hasPErefs = 1;
5735 } else {
5736 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5738 ctxt->sax->error(ctxt->userData,
5739 "xmlParsePEReference: expecting ';'\n");
5740 ctxt->wellFormed = 0;
5741 ctxt->disableSAX = 1;
5742 }
5743 xmlFree(name);
5744 }
5745 }
5746}
5747
5748/**
5749 * xmlParseStringPEReference:
5750 * @ctxt: an XML parser context
5751 * @str: a pointer to an index in the string
5752 *
5753 * parse PEReference declarations
5754 *
5755 * [69] PEReference ::= '%' Name ';'
5756 *
5757 * [ WFC: No Recursion ]
5758 * A parsed entity must not contain a recursive
5759 * reference to itself, either directly or indirectly.
5760 *
5761 * [ WFC: Entity Declared ]
5762 * In a document without any DTD, a document with only an internal DTD
5763 * subset which contains no parameter entity references, or a document
5764 * with "standalone='yes'", ... ... The declaration of a parameter
5765 * entity must precede any reference to it...
5766 *
5767 * [ VC: Entity Declared ]
5768 * In a document with an external subset or external parameter entities
5769 * with "standalone='no'", ... ... The declaration of a parameter entity
5770 * must precede any reference to it...
5771 *
5772 * [ WFC: In DTD ]
5773 * Parameter-entity references may only appear in the DTD.
5774 * NOTE: misleading but this is handled.
5775 *
5776 * Returns the string of the entity content.
5777 * str is updated to the current value of the index
5778 */
5779xmlEntityPtr
5780xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5781 const xmlChar *ptr;
5782 xmlChar cur;
5783 xmlChar *name;
5784 xmlEntityPtr entity = NULL;
5785
5786 if ((str == NULL) || (*str == NULL)) return(NULL);
5787 ptr = *str;
5788 cur = *ptr;
5789 if (cur == '%') {
5790 ptr++;
5791 cur = *ptr;
5792 name = xmlParseStringName(ctxt, &ptr);
5793 if (name == NULL) {
5794 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5796 ctxt->sax->error(ctxt->userData,
5797 "xmlParseStringPEReference: no name\n");
5798 ctxt->wellFormed = 0;
5799 ctxt->disableSAX = 1;
5800 } else {
5801 cur = *ptr;
5802 if (cur == ';') {
5803 ptr++;
5804 cur = *ptr;
5805 if ((ctxt->sax != NULL) &&
5806 (ctxt->sax->getParameterEntity != NULL))
5807 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5808 name);
5809 if (entity == NULL) {
5810 /*
5811 * [ WFC: Entity Declared ]
5812 * In a document without any DTD, a document with only an
5813 * internal DTD subset which contains no parameter entity
5814 * references, or a document with "standalone='yes'", ...
5815 * ... The declaration of a parameter entity must precede
5816 * any reference to it...
5817 */
5818 if ((ctxt->standalone == 1) ||
5819 ((ctxt->hasExternalSubset == 0) &&
5820 (ctxt->hasPErefs == 0))) {
5821 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5823 ctxt->sax->error(ctxt->userData,
5824 "PEReference: %%%s; not found\n", name);
5825 ctxt->wellFormed = 0;
5826 ctxt->disableSAX = 1;
5827 } else {
5828 /*
5829 * [ VC: Entity Declared ]
5830 * In a document with an external subset or external
5831 * parameter entities with "standalone='no'", ...
5832 * ... The declaration of a parameter entity must
5833 * precede any reference to it...
5834 */
5835 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5836 ctxt->sax->warning(ctxt->userData,
5837 "PEReference: %%%s; not found\n", name);
5838 ctxt->valid = 0;
5839 }
5840 } else {
5841 /*
5842 * Internal checking in case the entity quest barfed
5843 */
5844 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5845 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5846 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5847 ctxt->sax->warning(ctxt->userData,
5848 "Internal: %%%s; is not a parameter entity\n", name);
5849 }
5850 }
5851 ctxt->hasPErefs = 1;
5852 } else {
5853 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5855 ctxt->sax->error(ctxt->userData,
5856 "xmlParseStringPEReference: expecting ';'\n");
5857 ctxt->wellFormed = 0;
5858 ctxt->disableSAX = 1;
5859 }
5860 xmlFree(name);
5861 }
5862 }
5863 *str = ptr;
5864 return(entity);
5865}
5866
5867/**
5868 * xmlParseDocTypeDecl:
5869 * @ctxt: an XML parser context
5870 *
5871 * parse a DOCTYPE declaration
5872 *
5873 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5874 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5875 *
5876 * [ VC: Root Element Type ]
5877 * The Name in the document type declaration must match the element
5878 * type of the root element.
5879 */
5880
5881void
5882xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5883 xmlChar *name = NULL;
5884 xmlChar *ExternalID = NULL;
5885 xmlChar *URI = NULL;
5886
5887 /*
5888 * We know that '<!DOCTYPE' has been detected.
5889 */
5890 SKIP(9);
5891
5892 SKIP_BLANKS;
5893
5894 /*
5895 * Parse the DOCTYPE name.
5896 */
5897 name = xmlParseName(ctxt);
5898 if (name == NULL) {
5899 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5901 ctxt->sax->error(ctxt->userData,
5902 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5903 ctxt->wellFormed = 0;
5904 ctxt->disableSAX = 1;
5905 }
5906 ctxt->intSubName = name;
5907
5908 SKIP_BLANKS;
5909
5910 /*
5911 * Check for SystemID and ExternalID
5912 */
5913 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5914
5915 if ((URI != NULL) || (ExternalID != NULL)) {
5916 ctxt->hasExternalSubset = 1;
5917 }
5918 ctxt->extSubURI = URI;
5919 ctxt->extSubSystem = ExternalID;
5920
5921 SKIP_BLANKS;
5922
5923 /*
5924 * Create and update the internal subset.
5925 */
5926 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5927 (!ctxt->disableSAX))
5928 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5929
5930 /*
5931 * Is there any internal subset declarations ?
5932 * they are handled separately in xmlParseInternalSubset()
5933 */
5934 if (RAW == '[')
5935 return;
5936
5937 /*
5938 * We should be at the end of the DOCTYPE declaration.
5939 */
5940 if (RAW != '>') {
5941 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5943 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5944 ctxt->wellFormed = 0;
5945 ctxt->disableSAX = 1;
5946 }
5947 NEXT;
5948}
5949
5950/**
5951 * xmlParseInternalsubset:
5952 * @ctxt: an XML parser context
5953 *
5954 * parse the internal subset declaration
5955 *
5956 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5957 */
5958
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005959static void
Owen Taylor3473f882001-02-23 17:55:21 +00005960xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5961 /*
5962 * Is there any DTD definition ?
5963 */
5964 if (RAW == '[') {
5965 ctxt->instate = XML_PARSER_DTD;
5966 NEXT;
5967 /*
5968 * Parse the succession of Markup declarations and
5969 * PEReferences.
5970 * Subsequence (markupdecl | PEReference | S)*
5971 */
5972 while (RAW != ']') {
5973 const xmlChar *check = CUR_PTR;
5974 int cons = ctxt->input->consumed;
5975
5976 SKIP_BLANKS;
5977 xmlParseMarkupDecl(ctxt);
5978 xmlParsePEReference(ctxt);
5979
5980 /*
5981 * Pop-up of finished entities.
5982 */
5983 while ((RAW == 0) && (ctxt->inputNr > 1))
5984 xmlPopInput(ctxt);
5985
5986 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5987 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5988 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5989 ctxt->sax->error(ctxt->userData,
5990 "xmlParseInternalSubset: error detected in Markup declaration\n");
5991 ctxt->wellFormed = 0;
5992 ctxt->disableSAX = 1;
5993 break;
5994 }
5995 }
5996 if (RAW == ']') {
5997 NEXT;
5998 SKIP_BLANKS;
5999 }
6000 }
6001
6002 /*
6003 * We should be at the end of the DOCTYPE declaration.
6004 */
6005 if (RAW != '>') {
6006 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6008 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6009 ctxt->wellFormed = 0;
6010 ctxt->disableSAX = 1;
6011 }
6012 NEXT;
6013}
6014
6015/**
6016 * xmlParseAttribute:
6017 * @ctxt: an XML parser context
6018 * @value: a xmlChar ** used to store the value of the attribute
6019 *
6020 * parse an attribute
6021 *
6022 * [41] Attribute ::= Name Eq AttValue
6023 *
6024 * [ WFC: No External Entity References ]
6025 * Attribute values cannot contain direct or indirect entity references
6026 * to external entities.
6027 *
6028 * [ WFC: No < in Attribute Values ]
6029 * The replacement text of any entity referred to directly or indirectly in
6030 * an attribute value (other than "&lt;") must not contain a <.
6031 *
6032 * [ VC: Attribute Value Type ]
6033 * The attribute must have been declared; the value must be of the type
6034 * declared for it.
6035 *
6036 * [25] Eq ::= S? '=' S?
6037 *
6038 * With namespace:
6039 *
6040 * [NS 11] Attribute ::= QName Eq AttValue
6041 *
6042 * Also the case QName == xmlns:??? is handled independently as a namespace
6043 * definition.
6044 *
6045 * Returns the attribute name, and the value in *value.
6046 */
6047
6048xmlChar *
6049xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6050 xmlChar *name, *val;
6051
6052 *value = NULL;
6053 name = xmlParseName(ctxt);
6054 if (name == NULL) {
6055 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6057 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6058 ctxt->wellFormed = 0;
6059 ctxt->disableSAX = 1;
6060 return(NULL);
6061 }
6062
6063 /*
6064 * read the value
6065 */
6066 SKIP_BLANKS;
6067 if (RAW == '=') {
6068 NEXT;
6069 SKIP_BLANKS;
6070 val = xmlParseAttValue(ctxt);
6071 ctxt->instate = XML_PARSER_CONTENT;
6072 } else {
6073 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6074 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6075 ctxt->sax->error(ctxt->userData,
6076 "Specification mandate value for attribute %s\n", name);
6077 ctxt->wellFormed = 0;
6078 ctxt->disableSAX = 1;
6079 xmlFree(name);
6080 return(NULL);
6081 }
6082
6083 /*
6084 * Check that xml:lang conforms to the specification
6085 * No more registered as an error, just generate a warning now
6086 * since this was deprecated in XML second edition
6087 */
6088 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6089 if (!xmlCheckLanguageID(val)) {
6090 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6091 ctxt->sax->warning(ctxt->userData,
6092 "Malformed value for xml:lang : %s\n", val);
6093 }
6094 }
6095
6096 /*
6097 * Check that xml:space conforms to the specification
6098 */
6099 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6100 if (xmlStrEqual(val, BAD_CAST "default"))
6101 *(ctxt->space) = 0;
6102 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6103 *(ctxt->space) = 1;
6104 else {
6105 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6106 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6107 ctxt->sax->error(ctxt->userData,
6108"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6109 val);
6110 ctxt->wellFormed = 0;
6111 ctxt->disableSAX = 1;
6112 }
6113 }
6114
6115 *value = val;
6116 return(name);
6117}
6118
6119/**
6120 * xmlParseStartTag:
6121 * @ctxt: an XML parser context
6122 *
6123 * parse a start of tag either for rule element or
6124 * EmptyElement. In both case we don't parse the tag closing chars.
6125 *
6126 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6127 *
6128 * [ WFC: Unique Att Spec ]
6129 * No attribute name may appear more than once in the same start-tag or
6130 * empty-element tag.
6131 *
6132 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6133 *
6134 * [ WFC: Unique Att Spec ]
6135 * No attribute name may appear more than once in the same start-tag or
6136 * empty-element tag.
6137 *
6138 * With namespace:
6139 *
6140 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6141 *
6142 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6143 *
6144 * Returns the element name parsed
6145 */
6146
6147xmlChar *
6148xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6149 xmlChar *name;
6150 xmlChar *attname;
6151 xmlChar *attvalue;
6152 const xmlChar **atts = NULL;
6153 int nbatts = 0;
6154 int maxatts = 0;
6155 int i;
6156
6157 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006158 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006159
6160 name = xmlParseName(ctxt);
6161 if (name == NULL) {
6162 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6163 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6164 ctxt->sax->error(ctxt->userData,
6165 "xmlParseStartTag: invalid element name\n");
6166 ctxt->wellFormed = 0;
6167 ctxt->disableSAX = 1;
6168 return(NULL);
6169 }
6170
6171 /*
6172 * Now parse the attributes, it ends up with the ending
6173 *
6174 * (S Attribute)* S?
6175 */
6176 SKIP_BLANKS;
6177 GROW;
6178
Daniel Veillard21a0f912001-02-25 19:54:14 +00006179 while ((RAW != '>') &&
6180 ((RAW != '/') || (NXT(1) != '>')) &&
6181 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006182 const xmlChar *q = CUR_PTR;
6183 int cons = ctxt->input->consumed;
6184
6185 attname = xmlParseAttribute(ctxt, &attvalue);
6186 if ((attname != NULL) && (attvalue != NULL)) {
6187 /*
6188 * [ WFC: Unique Att Spec ]
6189 * No attribute name may appear more than once in the same
6190 * start-tag or empty-element tag.
6191 */
6192 for (i = 0; i < nbatts;i += 2) {
6193 if (xmlStrEqual(atts[i], attname)) {
6194 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6196 ctxt->sax->error(ctxt->userData,
6197 "Attribute %s redefined\n",
6198 attname);
6199 ctxt->wellFormed = 0;
6200 ctxt->disableSAX = 1;
6201 xmlFree(attname);
6202 xmlFree(attvalue);
6203 goto failed;
6204 }
6205 }
6206
6207 /*
6208 * Add the pair to atts
6209 */
6210 if (atts == NULL) {
6211 maxatts = 10;
6212 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6213 if (atts == NULL) {
6214 xmlGenericError(xmlGenericErrorContext,
6215 "malloc of %ld byte failed\n",
6216 maxatts * (long)sizeof(xmlChar *));
6217 return(NULL);
6218 }
6219 } else if (nbatts + 4 > maxatts) {
6220 maxatts *= 2;
6221 atts = (const xmlChar **) xmlRealloc((void *) atts,
6222 maxatts * sizeof(xmlChar *));
6223 if (atts == NULL) {
6224 xmlGenericError(xmlGenericErrorContext,
6225 "realloc of %ld byte failed\n",
6226 maxatts * (long)sizeof(xmlChar *));
6227 return(NULL);
6228 }
6229 }
6230 atts[nbatts++] = attname;
6231 atts[nbatts++] = attvalue;
6232 atts[nbatts] = NULL;
6233 atts[nbatts + 1] = NULL;
6234 } else {
6235 if (attname != NULL)
6236 xmlFree(attname);
6237 if (attvalue != NULL)
6238 xmlFree(attvalue);
6239 }
6240
6241failed:
6242
6243 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6244 break;
6245 if (!IS_BLANK(RAW)) {
6246 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6247 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6248 ctxt->sax->error(ctxt->userData,
6249 "attributes construct error\n");
6250 ctxt->wellFormed = 0;
6251 ctxt->disableSAX = 1;
6252 }
6253 SKIP_BLANKS;
6254 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6255 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6257 ctxt->sax->error(ctxt->userData,
6258 "xmlParseStartTag: problem parsing attributes\n");
6259 ctxt->wellFormed = 0;
6260 ctxt->disableSAX = 1;
6261 break;
6262 }
6263 GROW;
6264 }
6265
6266 /*
6267 * SAX: Start of Element !
6268 */
6269 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6270 (!ctxt->disableSAX))
6271 ctxt->sax->startElement(ctxt->userData, name, atts);
6272
6273 if (atts != NULL) {
6274 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6275 xmlFree((void *) atts);
6276 }
6277 return(name);
6278}
6279
6280/**
6281 * xmlParseEndTag:
6282 * @ctxt: an XML parser context
6283 *
6284 * parse an end of tag
6285 *
6286 * [42] ETag ::= '</' Name S? '>'
6287 *
6288 * With namespace
6289 *
6290 * [NS 9] ETag ::= '</' QName S? '>'
6291 */
6292
6293void
6294xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6295 xmlChar *name;
6296 xmlChar *oldname;
6297
6298 GROW;
6299 if ((RAW != '<') || (NXT(1) != '/')) {
6300 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6302 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6303 ctxt->wellFormed = 0;
6304 ctxt->disableSAX = 1;
6305 return;
6306 }
6307 SKIP(2);
6308
6309 name = xmlParseName(ctxt);
6310
6311 /*
6312 * We should definitely be at the ending "S? '>'" part
6313 */
6314 GROW;
6315 SKIP_BLANKS;
6316 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6317 ctxt->errNo = XML_ERR_GT_REQUIRED;
6318 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6319 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6320 ctxt->wellFormed = 0;
6321 ctxt->disableSAX = 1;
6322 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006323 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006324
6325 /*
6326 * [ WFC: Element Type Match ]
6327 * The Name in an element's end-tag must match the element type in the
6328 * start-tag.
6329 *
6330 */
6331 if ((name == NULL) || (ctxt->name == NULL) ||
6332 (!xmlStrEqual(name, ctxt->name))) {
6333 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6335 if ((name != NULL) && (ctxt->name != NULL)) {
6336 ctxt->sax->error(ctxt->userData,
6337 "Opening and ending tag mismatch: %s and %s\n",
6338 ctxt->name, name);
6339 } else if (ctxt->name != NULL) {
6340 ctxt->sax->error(ctxt->userData,
6341 "Ending tag eror for: %s\n", ctxt->name);
6342 } else {
6343 ctxt->sax->error(ctxt->userData,
6344 "Ending tag error: internal error ???\n");
6345 }
6346
6347 }
6348 ctxt->wellFormed = 0;
6349 ctxt->disableSAX = 1;
6350 }
6351
6352 /*
6353 * SAX: End of Tag
6354 */
6355 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6356 (!ctxt->disableSAX))
6357 ctxt->sax->endElement(ctxt->userData, name);
6358
6359 if (name != NULL)
6360 xmlFree(name);
6361 oldname = namePop(ctxt);
6362 spacePop(ctxt);
6363 if (oldname != NULL) {
6364#ifdef DEBUG_STACK
6365 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6366#endif
6367 xmlFree(oldname);
6368 }
6369 return;
6370}
6371
6372/**
6373 * xmlParseCDSect:
6374 * @ctxt: an XML parser context
6375 *
6376 * Parse escaped pure raw content.
6377 *
6378 * [18] CDSect ::= CDStart CData CDEnd
6379 *
6380 * [19] CDStart ::= '<![CDATA['
6381 *
6382 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6383 *
6384 * [21] CDEnd ::= ']]>'
6385 */
6386void
6387xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6388 xmlChar *buf = NULL;
6389 int len = 0;
6390 int size = XML_PARSER_BUFFER_SIZE;
6391 int r, rl;
6392 int s, sl;
6393 int cur, l;
6394 int count = 0;
6395
6396 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6397 (NXT(2) == '[') && (NXT(3) == 'C') &&
6398 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6399 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6400 (NXT(8) == '[')) {
6401 SKIP(9);
6402 } else
6403 return;
6404
6405 ctxt->instate = XML_PARSER_CDATA_SECTION;
6406 r = CUR_CHAR(rl);
6407 if (!IS_CHAR(r)) {
6408 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6410 ctxt->sax->error(ctxt->userData,
6411 "CData section not finished\n");
6412 ctxt->wellFormed = 0;
6413 ctxt->disableSAX = 1;
6414 ctxt->instate = XML_PARSER_CONTENT;
6415 return;
6416 }
6417 NEXTL(rl);
6418 s = CUR_CHAR(sl);
6419 if (!IS_CHAR(s)) {
6420 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6421 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6422 ctxt->sax->error(ctxt->userData,
6423 "CData section not finished\n");
6424 ctxt->wellFormed = 0;
6425 ctxt->disableSAX = 1;
6426 ctxt->instate = XML_PARSER_CONTENT;
6427 return;
6428 }
6429 NEXTL(sl);
6430 cur = CUR_CHAR(l);
6431 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6432 if (buf == NULL) {
6433 xmlGenericError(xmlGenericErrorContext,
6434 "malloc of %d byte failed\n", size);
6435 return;
6436 }
6437 while (IS_CHAR(cur) &&
6438 ((r != ']') || (s != ']') || (cur != '>'))) {
6439 if (len + 5 >= size) {
6440 size *= 2;
6441 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6442 if (buf == NULL) {
6443 xmlGenericError(xmlGenericErrorContext,
6444 "realloc of %d byte failed\n", size);
6445 return;
6446 }
6447 }
6448 COPY_BUF(rl,buf,len,r);
6449 r = s;
6450 rl = sl;
6451 s = cur;
6452 sl = l;
6453 count++;
6454 if (count > 50) {
6455 GROW;
6456 count = 0;
6457 }
6458 NEXTL(l);
6459 cur = CUR_CHAR(l);
6460 }
6461 buf[len] = 0;
6462 ctxt->instate = XML_PARSER_CONTENT;
6463 if (cur != '>') {
6464 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6466 ctxt->sax->error(ctxt->userData,
6467 "CData section not finished\n%.50s\n", buf);
6468 ctxt->wellFormed = 0;
6469 ctxt->disableSAX = 1;
6470 xmlFree(buf);
6471 return;
6472 }
6473 NEXTL(l);
6474
6475 /*
6476 * Ok the buffer is to be consumed as cdata.
6477 */
6478 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6479 if (ctxt->sax->cdataBlock != NULL)
6480 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006481 else if (ctxt->sax->characters != NULL)
6482 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006483 }
6484 xmlFree(buf);
6485}
6486
6487/**
6488 * xmlParseContent:
6489 * @ctxt: an XML parser context
6490 *
6491 * Parse a content:
6492 *
6493 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6494 */
6495
6496void
6497xmlParseContent(xmlParserCtxtPtr ctxt) {
6498 GROW;
6499 while (((RAW != 0) || (ctxt->token != 0)) &&
6500 ((RAW != '<') || (NXT(1) != '/'))) {
6501 const xmlChar *test = CUR_PTR;
6502 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006503 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006504 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006505
6506 /*
6507 * Handle possible processed charrefs.
6508 */
6509 if (ctxt->token != 0) {
6510 xmlParseCharData(ctxt, 0);
6511 }
6512 /*
6513 * First case : a Processing Instruction.
6514 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006515 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006516 xmlParsePI(ctxt);
6517 }
6518
6519 /*
6520 * Second case : a CDSection
6521 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006522 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006523 (NXT(2) == '[') && (NXT(3) == 'C') &&
6524 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6525 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6526 (NXT(8) == '[')) {
6527 xmlParseCDSect(ctxt);
6528 }
6529
6530 /*
6531 * Third case : a comment
6532 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006533 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006534 (NXT(2) == '-') && (NXT(3) == '-')) {
6535 xmlParseComment(ctxt);
6536 ctxt->instate = XML_PARSER_CONTENT;
6537 }
6538
6539 /*
6540 * Fourth case : a sub-element.
6541 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006542 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006543 xmlParseElement(ctxt);
6544 }
6545
6546 /*
6547 * Fifth case : a reference. If if has not been resolved,
6548 * parsing returns it's Name, create the node
6549 */
6550
Daniel Veillard21a0f912001-02-25 19:54:14 +00006551 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006552 xmlParseReference(ctxt);
6553 }
6554
6555 /*
6556 * Last case, text. Note that References are handled directly.
6557 */
6558 else {
6559 xmlParseCharData(ctxt, 0);
6560 }
6561
6562 GROW;
6563 /*
6564 * Pop-up of finished entities.
6565 */
6566 while ((RAW == 0) && (ctxt->inputNr > 1))
6567 xmlPopInput(ctxt);
6568 SHRINK;
6569
6570 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6571 (tok == ctxt->token)) {
6572 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6574 ctxt->sax->error(ctxt->userData,
6575 "detected an error in element content\n");
6576 ctxt->wellFormed = 0;
6577 ctxt->disableSAX = 1;
6578 ctxt->instate = XML_PARSER_EOF;
6579 break;
6580 }
6581 }
6582}
6583
6584/**
6585 * xmlParseElement:
6586 * @ctxt: an XML parser context
6587 *
6588 * parse an XML element, this is highly recursive
6589 *
6590 * [39] element ::= EmptyElemTag | STag content ETag
6591 *
6592 * [ WFC: Element Type Match ]
6593 * The Name in an element's end-tag must match the element type in the
6594 * start-tag.
6595 *
6596 * [ VC: Element Valid ]
6597 * An element is valid if there is a declaration matching elementdecl
6598 * where the Name matches the element type and one of the following holds:
6599 * - The declaration matches EMPTY and the element has no content.
6600 * - The declaration matches children and the sequence of child elements
6601 * belongs to the language generated by the regular expression in the
6602 * content model, with optional white space (characters matching the
6603 * nonterminal S) between each pair of child elements.
6604 * - The declaration matches Mixed and the content consists of character
6605 * data and child elements whose types match names in the content model.
6606 * - The declaration matches ANY, and the types of any child elements have
6607 * been declared.
6608 */
6609
6610void
6611xmlParseElement(xmlParserCtxtPtr ctxt) {
6612 const xmlChar *openTag = CUR_PTR;
6613 xmlChar *name;
6614 xmlChar *oldname;
6615 xmlParserNodeInfo node_info;
6616 xmlNodePtr ret;
6617
6618 /* Capture start position */
6619 if (ctxt->record_info) {
6620 node_info.begin_pos = ctxt->input->consumed +
6621 (CUR_PTR - ctxt->input->base);
6622 node_info.begin_line = ctxt->input->line;
6623 }
6624
6625 if (ctxt->spaceNr == 0)
6626 spacePush(ctxt, -1);
6627 else
6628 spacePush(ctxt, *ctxt->space);
6629
6630 name = xmlParseStartTag(ctxt);
6631 if (name == NULL) {
6632 spacePop(ctxt);
6633 return;
6634 }
6635 namePush(ctxt, name);
6636 ret = ctxt->node;
6637
6638 /*
6639 * [ VC: Root Element Type ]
6640 * The Name in the document type declaration must match the element
6641 * type of the root element.
6642 */
6643 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6644 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6645 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6646
6647 /*
6648 * Check for an Empty Element.
6649 */
6650 if ((RAW == '/') && (NXT(1) == '>')) {
6651 SKIP(2);
6652 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6653 (!ctxt->disableSAX))
6654 ctxt->sax->endElement(ctxt->userData, name);
6655 oldname = namePop(ctxt);
6656 spacePop(ctxt);
6657 if (oldname != NULL) {
6658#ifdef DEBUG_STACK
6659 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6660#endif
6661 xmlFree(oldname);
6662 }
6663 if ( ret != NULL && ctxt->record_info ) {
6664 node_info.end_pos = ctxt->input->consumed +
6665 (CUR_PTR - ctxt->input->base);
6666 node_info.end_line = ctxt->input->line;
6667 node_info.node = ret;
6668 xmlParserAddNodeInfo(ctxt, &node_info);
6669 }
6670 return;
6671 }
6672 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006673 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006674 } else {
6675 ctxt->errNo = XML_ERR_GT_REQUIRED;
6676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6677 ctxt->sax->error(ctxt->userData,
6678 "Couldn't find end of Start Tag\n%.30s\n",
6679 openTag);
6680 ctxt->wellFormed = 0;
6681 ctxt->disableSAX = 1;
6682
6683 /*
6684 * end of parsing of this node.
6685 */
6686 nodePop(ctxt);
6687 oldname = namePop(ctxt);
6688 spacePop(ctxt);
6689 if (oldname != NULL) {
6690#ifdef DEBUG_STACK
6691 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6692#endif
6693 xmlFree(oldname);
6694 }
6695
6696 /*
6697 * Capture end position and add node
6698 */
6699 if ( ret != NULL && ctxt->record_info ) {
6700 node_info.end_pos = ctxt->input->consumed +
6701 (CUR_PTR - ctxt->input->base);
6702 node_info.end_line = ctxt->input->line;
6703 node_info.node = ret;
6704 xmlParserAddNodeInfo(ctxt, &node_info);
6705 }
6706 return;
6707 }
6708
6709 /*
6710 * Parse the content of the element:
6711 */
6712 xmlParseContent(ctxt);
6713 if (!IS_CHAR(RAW)) {
6714 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6716 ctxt->sax->error(ctxt->userData,
6717 "Premature end of data in tag %.30s\n", openTag);
6718 ctxt->wellFormed = 0;
6719 ctxt->disableSAX = 1;
6720
6721 /*
6722 * end of parsing of this node.
6723 */
6724 nodePop(ctxt);
6725 oldname = namePop(ctxt);
6726 spacePop(ctxt);
6727 if (oldname != NULL) {
6728#ifdef DEBUG_STACK
6729 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6730#endif
6731 xmlFree(oldname);
6732 }
6733 return;
6734 }
6735
6736 /*
6737 * parse the end of tag: '</' should be here.
6738 */
6739 xmlParseEndTag(ctxt);
6740
6741 /*
6742 * Capture end position and add node
6743 */
6744 if ( ret != NULL && ctxt->record_info ) {
6745 node_info.end_pos = ctxt->input->consumed +
6746 (CUR_PTR - ctxt->input->base);
6747 node_info.end_line = ctxt->input->line;
6748 node_info.node = ret;
6749 xmlParserAddNodeInfo(ctxt, &node_info);
6750 }
6751}
6752
6753/**
6754 * xmlParseVersionNum:
6755 * @ctxt: an XML parser context
6756 *
6757 * parse the XML version value.
6758 *
6759 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6760 *
6761 * Returns the string giving the XML version number, or NULL
6762 */
6763xmlChar *
6764xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6765 xmlChar *buf = NULL;
6766 int len = 0;
6767 int size = 10;
6768 xmlChar cur;
6769
6770 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6771 if (buf == NULL) {
6772 xmlGenericError(xmlGenericErrorContext,
6773 "malloc of %d byte failed\n", size);
6774 return(NULL);
6775 }
6776 cur = CUR;
6777 while (((cur >= 'a') && (cur <= 'z')) ||
6778 ((cur >= 'A') && (cur <= 'Z')) ||
6779 ((cur >= '0') && (cur <= '9')) ||
6780 (cur == '_') || (cur == '.') ||
6781 (cur == ':') || (cur == '-')) {
6782 if (len + 1 >= size) {
6783 size *= 2;
6784 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6785 if (buf == NULL) {
6786 xmlGenericError(xmlGenericErrorContext,
6787 "realloc of %d byte failed\n", size);
6788 return(NULL);
6789 }
6790 }
6791 buf[len++] = cur;
6792 NEXT;
6793 cur=CUR;
6794 }
6795 buf[len] = 0;
6796 return(buf);
6797}
6798
6799/**
6800 * xmlParseVersionInfo:
6801 * @ctxt: an XML parser context
6802 *
6803 * parse the XML version.
6804 *
6805 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6806 *
6807 * [25] Eq ::= S? '=' S?
6808 *
6809 * Returns the version string, e.g. "1.0"
6810 */
6811
6812xmlChar *
6813xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6814 xmlChar *version = NULL;
6815 const xmlChar *q;
6816
6817 if ((RAW == 'v') && (NXT(1) == 'e') &&
6818 (NXT(2) == 'r') && (NXT(3) == 's') &&
6819 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6820 (NXT(6) == 'n')) {
6821 SKIP(7);
6822 SKIP_BLANKS;
6823 if (RAW != '=') {
6824 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6826 ctxt->sax->error(ctxt->userData,
6827 "xmlParseVersionInfo : expected '='\n");
6828 ctxt->wellFormed = 0;
6829 ctxt->disableSAX = 1;
6830 return(NULL);
6831 }
6832 NEXT;
6833 SKIP_BLANKS;
6834 if (RAW == '"') {
6835 NEXT;
6836 q = CUR_PTR;
6837 version = xmlParseVersionNum(ctxt);
6838 if (RAW != '"') {
6839 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6840 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6841 ctxt->sax->error(ctxt->userData,
6842 "String not closed\n%.50s\n", q);
6843 ctxt->wellFormed = 0;
6844 ctxt->disableSAX = 1;
6845 } else
6846 NEXT;
6847 } else if (RAW == '\''){
6848 NEXT;
6849 q = CUR_PTR;
6850 version = xmlParseVersionNum(ctxt);
6851 if (RAW != '\'') {
6852 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6854 ctxt->sax->error(ctxt->userData,
6855 "String not closed\n%.50s\n", q);
6856 ctxt->wellFormed = 0;
6857 ctxt->disableSAX = 1;
6858 } else
6859 NEXT;
6860 } else {
6861 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6863 ctxt->sax->error(ctxt->userData,
6864 "xmlParseVersionInfo : expected ' or \"\n");
6865 ctxt->wellFormed = 0;
6866 ctxt->disableSAX = 1;
6867 }
6868 }
6869 return(version);
6870}
6871
6872/**
6873 * xmlParseEncName:
6874 * @ctxt: an XML parser context
6875 *
6876 * parse the XML encoding name
6877 *
6878 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6879 *
6880 * Returns the encoding name value or NULL
6881 */
6882xmlChar *
6883xmlParseEncName(xmlParserCtxtPtr ctxt) {
6884 xmlChar *buf = NULL;
6885 int len = 0;
6886 int size = 10;
6887 xmlChar cur;
6888
6889 cur = CUR;
6890 if (((cur >= 'a') && (cur <= 'z')) ||
6891 ((cur >= 'A') && (cur <= 'Z'))) {
6892 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6893 if (buf == NULL) {
6894 xmlGenericError(xmlGenericErrorContext,
6895 "malloc of %d byte failed\n", size);
6896 return(NULL);
6897 }
6898
6899 buf[len++] = cur;
6900 NEXT;
6901 cur = CUR;
6902 while (((cur >= 'a') && (cur <= 'z')) ||
6903 ((cur >= 'A') && (cur <= 'Z')) ||
6904 ((cur >= '0') && (cur <= '9')) ||
6905 (cur == '.') || (cur == '_') ||
6906 (cur == '-')) {
6907 if (len + 1 >= size) {
6908 size *= 2;
6909 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6910 if (buf == NULL) {
6911 xmlGenericError(xmlGenericErrorContext,
6912 "realloc of %d byte failed\n", size);
6913 return(NULL);
6914 }
6915 }
6916 buf[len++] = cur;
6917 NEXT;
6918 cur = CUR;
6919 if (cur == 0) {
6920 SHRINK;
6921 GROW;
6922 cur = CUR;
6923 }
6924 }
6925 buf[len] = 0;
6926 } else {
6927 ctxt->errNo = XML_ERR_ENCODING_NAME;
6928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6929 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6930 ctxt->wellFormed = 0;
6931 ctxt->disableSAX = 1;
6932 }
6933 return(buf);
6934}
6935
6936/**
6937 * xmlParseEncodingDecl:
6938 * @ctxt: an XML parser context
6939 *
6940 * parse the XML encoding declaration
6941 *
6942 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6943 *
6944 * this setups the conversion filters.
6945 *
6946 * Returns the encoding value or NULL
6947 */
6948
6949xmlChar *
6950xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6951 xmlChar *encoding = NULL;
6952 const xmlChar *q;
6953
6954 SKIP_BLANKS;
6955 if ((RAW == 'e') && (NXT(1) == 'n') &&
6956 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6957 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6958 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6959 SKIP(8);
6960 SKIP_BLANKS;
6961 if (RAW != '=') {
6962 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6964 ctxt->sax->error(ctxt->userData,
6965 "xmlParseEncodingDecl : expected '='\n");
6966 ctxt->wellFormed = 0;
6967 ctxt->disableSAX = 1;
6968 return(NULL);
6969 }
6970 NEXT;
6971 SKIP_BLANKS;
6972 if (RAW == '"') {
6973 NEXT;
6974 q = CUR_PTR;
6975 encoding = xmlParseEncName(ctxt);
6976 if (RAW != '"') {
6977 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6979 ctxt->sax->error(ctxt->userData,
6980 "String not closed\n%.50s\n", q);
6981 ctxt->wellFormed = 0;
6982 ctxt->disableSAX = 1;
6983 } else
6984 NEXT;
6985 } else if (RAW == '\''){
6986 NEXT;
6987 q = CUR_PTR;
6988 encoding = xmlParseEncName(ctxt);
6989 if (RAW != '\'') {
6990 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6992 ctxt->sax->error(ctxt->userData,
6993 "String not closed\n%.50s\n", q);
6994 ctxt->wellFormed = 0;
6995 ctxt->disableSAX = 1;
6996 } else
6997 NEXT;
6998 } else if (RAW == '"'){
6999 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7001 ctxt->sax->error(ctxt->userData,
7002 "xmlParseEncodingDecl : expected ' or \"\n");
7003 ctxt->wellFormed = 0;
7004 ctxt->disableSAX = 1;
7005 }
7006 if (encoding != NULL) {
7007 xmlCharEncoding enc;
7008 xmlCharEncodingHandlerPtr handler;
7009
7010 if (ctxt->input->encoding != NULL)
7011 xmlFree((xmlChar *) ctxt->input->encoding);
7012 ctxt->input->encoding = encoding;
7013
7014 enc = xmlParseCharEncoding((const char *) encoding);
7015 /*
7016 * registered set of known encodings
7017 */
7018 if (enc != XML_CHAR_ENCODING_ERROR) {
7019 xmlSwitchEncoding(ctxt, enc);
7020 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7021 xmlFree(encoding);
7022 return(NULL);
7023 }
7024 } else {
7025 /*
7026 * fallback for unknown encodings
7027 */
7028 handler = xmlFindCharEncodingHandler((const char *) encoding);
7029 if (handler != NULL) {
7030 xmlSwitchToEncoding(ctxt, handler);
7031 } else {
7032 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7034 ctxt->sax->error(ctxt->userData,
7035 "Unsupported encoding %s\n", encoding);
7036 return(NULL);
7037 }
7038 }
7039 }
7040 }
7041 return(encoding);
7042}
7043
7044/**
7045 * xmlParseSDDecl:
7046 * @ctxt: an XML parser context
7047 *
7048 * parse the XML standalone declaration
7049 *
7050 * [32] SDDecl ::= S 'standalone' Eq
7051 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7052 *
7053 * [ VC: Standalone Document Declaration ]
7054 * TODO The standalone document declaration must have the value "no"
7055 * if any external markup declarations contain declarations of:
7056 * - attributes with default values, if elements to which these
7057 * attributes apply appear in the document without specifications
7058 * of values for these attributes, or
7059 * - entities (other than amp, lt, gt, apos, quot), if references
7060 * to those entities appear in the document, or
7061 * - attributes with values subject to normalization, where the
7062 * attribute appears in the document with a value which will change
7063 * as a result of normalization, or
7064 * - element types with element content, if white space occurs directly
7065 * within any instance of those types.
7066 *
7067 * Returns 1 if standalone, 0 otherwise
7068 */
7069
7070int
7071xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7072 int standalone = -1;
7073
7074 SKIP_BLANKS;
7075 if ((RAW == 's') && (NXT(1) == 't') &&
7076 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7077 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7078 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7079 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7080 SKIP(10);
7081 SKIP_BLANKS;
7082 if (RAW != '=') {
7083 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7085 ctxt->sax->error(ctxt->userData,
7086 "XML standalone declaration : expected '='\n");
7087 ctxt->wellFormed = 0;
7088 ctxt->disableSAX = 1;
7089 return(standalone);
7090 }
7091 NEXT;
7092 SKIP_BLANKS;
7093 if (RAW == '\''){
7094 NEXT;
7095 if ((RAW == 'n') && (NXT(1) == 'o')) {
7096 standalone = 0;
7097 SKIP(2);
7098 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7099 (NXT(2) == 's')) {
7100 standalone = 1;
7101 SKIP(3);
7102 } else {
7103 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7105 ctxt->sax->error(ctxt->userData,
7106 "standalone accepts only 'yes' or 'no'\n");
7107 ctxt->wellFormed = 0;
7108 ctxt->disableSAX = 1;
7109 }
7110 if (RAW != '\'') {
7111 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7112 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7113 ctxt->sax->error(ctxt->userData, "String not closed\n");
7114 ctxt->wellFormed = 0;
7115 ctxt->disableSAX = 1;
7116 } else
7117 NEXT;
7118 } else if (RAW == '"'){
7119 NEXT;
7120 if ((RAW == 'n') && (NXT(1) == 'o')) {
7121 standalone = 0;
7122 SKIP(2);
7123 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7124 (NXT(2) == 's')) {
7125 standalone = 1;
7126 SKIP(3);
7127 } else {
7128 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7130 ctxt->sax->error(ctxt->userData,
7131 "standalone accepts only 'yes' or 'no'\n");
7132 ctxt->wellFormed = 0;
7133 ctxt->disableSAX = 1;
7134 }
7135 if (RAW != '"') {
7136 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7138 ctxt->sax->error(ctxt->userData, "String not closed\n");
7139 ctxt->wellFormed = 0;
7140 ctxt->disableSAX = 1;
7141 } else
7142 NEXT;
7143 } else {
7144 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7145 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7146 ctxt->sax->error(ctxt->userData,
7147 "Standalone value not found\n");
7148 ctxt->wellFormed = 0;
7149 ctxt->disableSAX = 1;
7150 }
7151 }
7152 return(standalone);
7153}
7154
7155/**
7156 * xmlParseXMLDecl:
7157 * @ctxt: an XML parser context
7158 *
7159 * parse an XML declaration header
7160 *
7161 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7162 */
7163
7164void
7165xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7166 xmlChar *version;
7167
7168 /*
7169 * We know that '<?xml' is here.
7170 */
7171 SKIP(5);
7172
7173 if (!IS_BLANK(RAW)) {
7174 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7176 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7177 ctxt->wellFormed = 0;
7178 ctxt->disableSAX = 1;
7179 }
7180 SKIP_BLANKS;
7181
7182 /*
7183 * We should have the VersionInfo here.
7184 */
7185 version = xmlParseVersionInfo(ctxt);
7186 if (version == NULL)
7187 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7188 ctxt->version = xmlStrdup(version);
7189 xmlFree(version);
7190
7191 /*
7192 * We may have the encoding declaration
7193 */
7194 if (!IS_BLANK(RAW)) {
7195 if ((RAW == '?') && (NXT(1) == '>')) {
7196 SKIP(2);
7197 return;
7198 }
7199 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7201 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7202 ctxt->wellFormed = 0;
7203 ctxt->disableSAX = 1;
7204 }
7205 xmlParseEncodingDecl(ctxt);
7206 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7207 /*
7208 * The XML REC instructs us to stop parsing right here
7209 */
7210 return;
7211 }
7212
7213 /*
7214 * We may have the standalone status.
7215 */
7216 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7217 if ((RAW == '?') && (NXT(1) == '>')) {
7218 SKIP(2);
7219 return;
7220 }
7221 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7223 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7224 ctxt->wellFormed = 0;
7225 ctxt->disableSAX = 1;
7226 }
7227 SKIP_BLANKS;
7228 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7229
7230 SKIP_BLANKS;
7231 if ((RAW == '?') && (NXT(1) == '>')) {
7232 SKIP(2);
7233 } else if (RAW == '>') {
7234 /* Deprecated old WD ... */
7235 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7237 ctxt->sax->error(ctxt->userData,
7238 "XML declaration must end-up with '?>'\n");
7239 ctxt->wellFormed = 0;
7240 ctxt->disableSAX = 1;
7241 NEXT;
7242 } else {
7243 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7244 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7245 ctxt->sax->error(ctxt->userData,
7246 "parsing XML declaration: '?>' expected\n");
7247 ctxt->wellFormed = 0;
7248 ctxt->disableSAX = 1;
7249 MOVETO_ENDTAG(CUR_PTR);
7250 NEXT;
7251 }
7252}
7253
7254/**
7255 * xmlParseMisc:
7256 * @ctxt: an XML parser context
7257 *
7258 * parse an XML Misc* optionnal field.
7259 *
7260 * [27] Misc ::= Comment | PI | S
7261 */
7262
7263void
7264xmlParseMisc(xmlParserCtxtPtr ctxt) {
7265 while (((RAW == '<') && (NXT(1) == '?')) ||
7266 ((RAW == '<') && (NXT(1) == '!') &&
7267 (NXT(2) == '-') && (NXT(3) == '-')) ||
7268 IS_BLANK(CUR)) {
7269 if ((RAW == '<') && (NXT(1) == '?')) {
7270 xmlParsePI(ctxt);
7271 } else if (IS_BLANK(CUR)) {
7272 NEXT;
7273 } else
7274 xmlParseComment(ctxt);
7275 }
7276}
7277
7278/**
7279 * xmlParseDocument:
7280 * @ctxt: an XML parser context
7281 *
7282 * parse an XML document (and build a tree if using the standard SAX
7283 * interface).
7284 *
7285 * [1] document ::= prolog element Misc*
7286 *
7287 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7288 *
7289 * Returns 0, -1 in case of error. the parser context is augmented
7290 * as a result of the parsing.
7291 */
7292
7293int
7294xmlParseDocument(xmlParserCtxtPtr ctxt) {
7295 xmlChar start[4];
7296 xmlCharEncoding enc;
7297
7298 xmlInitParser();
7299
7300 GROW;
7301
7302 /*
7303 * SAX: beginning of the document processing.
7304 */
7305 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7306 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7307
Daniel Veillard50f34372001-08-03 12:06:36 +00007308 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007309 /*
7310 * Get the 4 first bytes and decode the charset
7311 * if enc != XML_CHAR_ENCODING_NONE
7312 * plug some encoding conversion routines.
7313 */
7314 start[0] = RAW;
7315 start[1] = NXT(1);
7316 start[2] = NXT(2);
7317 start[3] = NXT(3);
7318 enc = xmlDetectCharEncoding(start, 4);
7319 if (enc != XML_CHAR_ENCODING_NONE) {
7320 xmlSwitchEncoding(ctxt, enc);
7321 }
Owen Taylor3473f882001-02-23 17:55:21 +00007322 }
7323
7324
7325 if (CUR == 0) {
7326 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7328 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7329 ctxt->wellFormed = 0;
7330 ctxt->disableSAX = 1;
7331 }
7332
7333 /*
7334 * Check for the XMLDecl in the Prolog.
7335 */
7336 GROW;
7337 if ((RAW == '<') && (NXT(1) == '?') &&
7338 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7339 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7340
7341 /*
7342 * Note that we will switch encoding on the fly.
7343 */
7344 xmlParseXMLDecl(ctxt);
7345 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7346 /*
7347 * The XML REC instructs us to stop parsing right here
7348 */
7349 return(-1);
7350 }
7351 ctxt->standalone = ctxt->input->standalone;
7352 SKIP_BLANKS;
7353 } else {
7354 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7355 }
7356 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7357 ctxt->sax->startDocument(ctxt->userData);
7358
7359 /*
7360 * The Misc part of the Prolog
7361 */
7362 GROW;
7363 xmlParseMisc(ctxt);
7364
7365 /*
7366 * Then possibly doc type declaration(s) and more Misc
7367 * (doctypedecl Misc*)?
7368 */
7369 GROW;
7370 if ((RAW == '<') && (NXT(1) == '!') &&
7371 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7372 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7373 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7374 (NXT(8) == 'E')) {
7375
7376 ctxt->inSubset = 1;
7377 xmlParseDocTypeDecl(ctxt);
7378 if (RAW == '[') {
7379 ctxt->instate = XML_PARSER_DTD;
7380 xmlParseInternalSubset(ctxt);
7381 }
7382
7383 /*
7384 * Create and update the external subset.
7385 */
7386 ctxt->inSubset = 2;
7387 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7388 (!ctxt->disableSAX))
7389 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7390 ctxt->extSubSystem, ctxt->extSubURI);
7391 ctxt->inSubset = 0;
7392
7393
7394 ctxt->instate = XML_PARSER_PROLOG;
7395 xmlParseMisc(ctxt);
7396 }
7397
7398 /*
7399 * Time to start parsing the tree itself
7400 */
7401 GROW;
7402 if (RAW != '<') {
7403 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7405 ctxt->sax->error(ctxt->userData,
7406 "Start tag expected, '<' not found\n");
7407 ctxt->wellFormed = 0;
7408 ctxt->disableSAX = 1;
7409 ctxt->instate = XML_PARSER_EOF;
7410 } else {
7411 ctxt->instate = XML_PARSER_CONTENT;
7412 xmlParseElement(ctxt);
7413 ctxt->instate = XML_PARSER_EPILOG;
7414
7415
7416 /*
7417 * The Misc part at the end
7418 */
7419 xmlParseMisc(ctxt);
7420
7421 if (RAW != 0) {
7422 ctxt->errNo = XML_ERR_DOCUMENT_END;
7423 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7424 ctxt->sax->error(ctxt->userData,
7425 "Extra content at the end of the document\n");
7426 ctxt->wellFormed = 0;
7427 ctxt->disableSAX = 1;
7428 }
7429 ctxt->instate = XML_PARSER_EOF;
7430 }
7431
7432 /*
7433 * SAX: end of the document processing.
7434 */
7435 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7436 (!ctxt->disableSAX))
7437 ctxt->sax->endDocument(ctxt->userData);
7438
7439 if (! ctxt->wellFormed) return(-1);
7440 return(0);
7441}
7442
7443/**
7444 * xmlParseExtParsedEnt:
7445 * @ctxt: an XML parser context
7446 *
7447 * parse a genreral parsed entity
7448 * An external general parsed entity is well-formed if it matches the
7449 * production labeled extParsedEnt.
7450 *
7451 * [78] extParsedEnt ::= TextDecl? content
7452 *
7453 * Returns 0, -1 in case of error. the parser context is augmented
7454 * as a result of the parsing.
7455 */
7456
7457int
7458xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7459 xmlChar start[4];
7460 xmlCharEncoding enc;
7461
7462 xmlDefaultSAXHandlerInit();
7463
7464 GROW;
7465
7466 /*
7467 * SAX: beginning of the document processing.
7468 */
7469 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7470 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7471
7472 /*
7473 * Get the 4 first bytes and decode the charset
7474 * if enc != XML_CHAR_ENCODING_NONE
7475 * plug some encoding conversion routines.
7476 */
7477 start[0] = RAW;
7478 start[1] = NXT(1);
7479 start[2] = NXT(2);
7480 start[3] = NXT(3);
7481 enc = xmlDetectCharEncoding(start, 4);
7482 if (enc != XML_CHAR_ENCODING_NONE) {
7483 xmlSwitchEncoding(ctxt, enc);
7484 }
7485
7486
7487 if (CUR == 0) {
7488 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7490 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7491 ctxt->wellFormed = 0;
7492 ctxt->disableSAX = 1;
7493 }
7494
7495 /*
7496 * Check for the XMLDecl in the Prolog.
7497 */
7498 GROW;
7499 if ((RAW == '<') && (NXT(1) == '?') &&
7500 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7501 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7502
7503 /*
7504 * Note that we will switch encoding on the fly.
7505 */
7506 xmlParseXMLDecl(ctxt);
7507 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7508 /*
7509 * The XML REC instructs us to stop parsing right here
7510 */
7511 return(-1);
7512 }
7513 SKIP_BLANKS;
7514 } else {
7515 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7516 }
7517 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7518 ctxt->sax->startDocument(ctxt->userData);
7519
7520 /*
7521 * Doing validity checking on chunk doesn't make sense
7522 */
7523 ctxt->instate = XML_PARSER_CONTENT;
7524 ctxt->validate = 0;
7525 ctxt->loadsubset = 0;
7526 ctxt->depth = 0;
7527
7528 xmlParseContent(ctxt);
7529
7530 if ((RAW == '<') && (NXT(1) == '/')) {
7531 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7533 ctxt->sax->error(ctxt->userData,
7534 "chunk is not well balanced\n");
7535 ctxt->wellFormed = 0;
7536 ctxt->disableSAX = 1;
7537 } else if (RAW != 0) {
7538 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7539 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7540 ctxt->sax->error(ctxt->userData,
7541 "extra content at the end of well balanced chunk\n");
7542 ctxt->wellFormed = 0;
7543 ctxt->disableSAX = 1;
7544 }
7545
7546 /*
7547 * SAX: end of the document processing.
7548 */
7549 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7550 (!ctxt->disableSAX))
7551 ctxt->sax->endDocument(ctxt->userData);
7552
7553 if (! ctxt->wellFormed) return(-1);
7554 return(0);
7555}
7556
7557/************************************************************************
7558 * *
7559 * Progressive parsing interfaces *
7560 * *
7561 ************************************************************************/
7562
7563/**
7564 * xmlParseLookupSequence:
7565 * @ctxt: an XML parser context
7566 * @first: the first char to lookup
7567 * @next: the next char to lookup or zero
7568 * @third: the next char to lookup or zero
7569 *
7570 * Try to find if a sequence (first, next, third) or just (first next) or
7571 * (first) is available in the input stream.
7572 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7573 * to avoid rescanning sequences of bytes, it DOES change the state of the
7574 * parser, do not use liberally.
7575 *
7576 * Returns the index to the current parsing point if the full sequence
7577 * is available, -1 otherwise.
7578 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007579static int
Owen Taylor3473f882001-02-23 17:55:21 +00007580xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7581 xmlChar next, xmlChar third) {
7582 int base, len;
7583 xmlParserInputPtr in;
7584 const xmlChar *buf;
7585
7586 in = ctxt->input;
7587 if (in == NULL) return(-1);
7588 base = in->cur - in->base;
7589 if (base < 0) return(-1);
7590 if (ctxt->checkIndex > base)
7591 base = ctxt->checkIndex;
7592 if (in->buf == NULL) {
7593 buf = in->base;
7594 len = in->length;
7595 } else {
7596 buf = in->buf->buffer->content;
7597 len = in->buf->buffer->use;
7598 }
7599 /* take into account the sequence length */
7600 if (third) len -= 2;
7601 else if (next) len --;
7602 for (;base < len;base++) {
7603 if (buf[base] == first) {
7604 if (third != 0) {
7605 if ((buf[base + 1] != next) ||
7606 (buf[base + 2] != third)) continue;
7607 } else if (next != 0) {
7608 if (buf[base + 1] != next) continue;
7609 }
7610 ctxt->checkIndex = 0;
7611#ifdef DEBUG_PUSH
7612 if (next == 0)
7613 xmlGenericError(xmlGenericErrorContext,
7614 "PP: lookup '%c' found at %d\n",
7615 first, base);
7616 else if (third == 0)
7617 xmlGenericError(xmlGenericErrorContext,
7618 "PP: lookup '%c%c' found at %d\n",
7619 first, next, base);
7620 else
7621 xmlGenericError(xmlGenericErrorContext,
7622 "PP: lookup '%c%c%c' found at %d\n",
7623 first, next, third, base);
7624#endif
7625 return(base - (in->cur - in->base));
7626 }
7627 }
7628 ctxt->checkIndex = base;
7629#ifdef DEBUG_PUSH
7630 if (next == 0)
7631 xmlGenericError(xmlGenericErrorContext,
7632 "PP: lookup '%c' failed\n", first);
7633 else if (third == 0)
7634 xmlGenericError(xmlGenericErrorContext,
7635 "PP: lookup '%c%c' failed\n", first, next);
7636 else
7637 xmlGenericError(xmlGenericErrorContext,
7638 "PP: lookup '%c%c%c' failed\n", first, next, third);
7639#endif
7640 return(-1);
7641}
7642
7643/**
7644 * xmlParseTryOrFinish:
7645 * @ctxt: an XML parser context
7646 * @terminate: last chunk indicator
7647 *
7648 * Try to progress on parsing
7649 *
7650 * Returns zero if no parsing was possible
7651 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007652static int
Owen Taylor3473f882001-02-23 17:55:21 +00007653xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7654 int ret = 0;
7655 int avail;
7656 xmlChar cur, next;
7657
7658#ifdef DEBUG_PUSH
7659 switch (ctxt->instate) {
7660 case XML_PARSER_EOF:
7661 xmlGenericError(xmlGenericErrorContext,
7662 "PP: try EOF\n"); break;
7663 case XML_PARSER_START:
7664 xmlGenericError(xmlGenericErrorContext,
7665 "PP: try START\n"); break;
7666 case XML_PARSER_MISC:
7667 xmlGenericError(xmlGenericErrorContext,
7668 "PP: try MISC\n");break;
7669 case XML_PARSER_COMMENT:
7670 xmlGenericError(xmlGenericErrorContext,
7671 "PP: try COMMENT\n");break;
7672 case XML_PARSER_PROLOG:
7673 xmlGenericError(xmlGenericErrorContext,
7674 "PP: try PROLOG\n");break;
7675 case XML_PARSER_START_TAG:
7676 xmlGenericError(xmlGenericErrorContext,
7677 "PP: try START_TAG\n");break;
7678 case XML_PARSER_CONTENT:
7679 xmlGenericError(xmlGenericErrorContext,
7680 "PP: try CONTENT\n");break;
7681 case XML_PARSER_CDATA_SECTION:
7682 xmlGenericError(xmlGenericErrorContext,
7683 "PP: try CDATA_SECTION\n");break;
7684 case XML_PARSER_END_TAG:
7685 xmlGenericError(xmlGenericErrorContext,
7686 "PP: try END_TAG\n");break;
7687 case XML_PARSER_ENTITY_DECL:
7688 xmlGenericError(xmlGenericErrorContext,
7689 "PP: try ENTITY_DECL\n");break;
7690 case XML_PARSER_ENTITY_VALUE:
7691 xmlGenericError(xmlGenericErrorContext,
7692 "PP: try ENTITY_VALUE\n");break;
7693 case XML_PARSER_ATTRIBUTE_VALUE:
7694 xmlGenericError(xmlGenericErrorContext,
7695 "PP: try ATTRIBUTE_VALUE\n");break;
7696 case XML_PARSER_DTD:
7697 xmlGenericError(xmlGenericErrorContext,
7698 "PP: try DTD\n");break;
7699 case XML_PARSER_EPILOG:
7700 xmlGenericError(xmlGenericErrorContext,
7701 "PP: try EPILOG\n");break;
7702 case XML_PARSER_PI:
7703 xmlGenericError(xmlGenericErrorContext,
7704 "PP: try PI\n");break;
7705 case XML_PARSER_IGNORE:
7706 xmlGenericError(xmlGenericErrorContext,
7707 "PP: try IGNORE\n");break;
7708 }
7709#endif
7710
7711 while (1) {
7712 /*
7713 * Pop-up of finished entities.
7714 */
7715 while ((RAW == 0) && (ctxt->inputNr > 1))
7716 xmlPopInput(ctxt);
7717
7718 if (ctxt->input ==NULL) break;
7719 if (ctxt->input->buf == NULL)
7720 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7721 else
7722 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7723 if (avail < 1)
7724 goto done;
7725 switch (ctxt->instate) {
7726 case XML_PARSER_EOF:
7727 /*
7728 * Document parsing is done !
7729 */
7730 goto done;
7731 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007732 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7733 xmlChar start[4];
7734 xmlCharEncoding enc;
7735
7736 /*
7737 * Very first chars read from the document flow.
7738 */
7739 if (avail < 4)
7740 goto done;
7741
7742 /*
7743 * Get the 4 first bytes and decode the charset
7744 * if enc != XML_CHAR_ENCODING_NONE
7745 * plug some encoding conversion routines.
7746 */
7747 start[0] = RAW;
7748 start[1] = NXT(1);
7749 start[2] = NXT(2);
7750 start[3] = NXT(3);
7751 enc = xmlDetectCharEncoding(start, 4);
7752 if (enc != XML_CHAR_ENCODING_NONE) {
7753 xmlSwitchEncoding(ctxt, enc);
7754 }
7755 break;
7756 }
Owen Taylor3473f882001-02-23 17:55:21 +00007757
7758 cur = ctxt->input->cur[0];
7759 next = ctxt->input->cur[1];
7760 if (cur == 0) {
7761 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7762 ctxt->sax->setDocumentLocator(ctxt->userData,
7763 &xmlDefaultSAXLocator);
7764 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7766 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7767 ctxt->wellFormed = 0;
7768 ctxt->disableSAX = 1;
7769 ctxt->instate = XML_PARSER_EOF;
7770#ifdef DEBUG_PUSH
7771 xmlGenericError(xmlGenericErrorContext,
7772 "PP: entering EOF\n");
7773#endif
7774 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7775 ctxt->sax->endDocument(ctxt->userData);
7776 goto done;
7777 }
7778 if ((cur == '<') && (next == '?')) {
7779 /* PI or XML decl */
7780 if (avail < 5) return(ret);
7781 if ((!terminate) &&
7782 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7783 return(ret);
7784 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7785 ctxt->sax->setDocumentLocator(ctxt->userData,
7786 &xmlDefaultSAXLocator);
7787 if ((ctxt->input->cur[2] == 'x') &&
7788 (ctxt->input->cur[3] == 'm') &&
7789 (ctxt->input->cur[4] == 'l') &&
7790 (IS_BLANK(ctxt->input->cur[5]))) {
7791 ret += 5;
7792#ifdef DEBUG_PUSH
7793 xmlGenericError(xmlGenericErrorContext,
7794 "PP: Parsing XML Decl\n");
7795#endif
7796 xmlParseXMLDecl(ctxt);
7797 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7798 /*
7799 * The XML REC instructs us to stop parsing right
7800 * here
7801 */
7802 ctxt->instate = XML_PARSER_EOF;
7803 return(0);
7804 }
7805 ctxt->standalone = ctxt->input->standalone;
7806 if ((ctxt->encoding == NULL) &&
7807 (ctxt->input->encoding != NULL))
7808 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7809 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7810 (!ctxt->disableSAX))
7811 ctxt->sax->startDocument(ctxt->userData);
7812 ctxt->instate = XML_PARSER_MISC;
7813#ifdef DEBUG_PUSH
7814 xmlGenericError(xmlGenericErrorContext,
7815 "PP: entering MISC\n");
7816#endif
7817 } else {
7818 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7819 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7820 (!ctxt->disableSAX))
7821 ctxt->sax->startDocument(ctxt->userData);
7822 ctxt->instate = XML_PARSER_MISC;
7823#ifdef DEBUG_PUSH
7824 xmlGenericError(xmlGenericErrorContext,
7825 "PP: entering MISC\n");
7826#endif
7827 }
7828 } else {
7829 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7830 ctxt->sax->setDocumentLocator(ctxt->userData,
7831 &xmlDefaultSAXLocator);
7832 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7833 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7834 (!ctxt->disableSAX))
7835 ctxt->sax->startDocument(ctxt->userData);
7836 ctxt->instate = XML_PARSER_MISC;
7837#ifdef DEBUG_PUSH
7838 xmlGenericError(xmlGenericErrorContext,
7839 "PP: entering MISC\n");
7840#endif
7841 }
7842 break;
7843 case XML_PARSER_MISC:
7844 SKIP_BLANKS;
7845 if (ctxt->input->buf == NULL)
7846 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7847 else
7848 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7849 if (avail < 2)
7850 goto done;
7851 cur = ctxt->input->cur[0];
7852 next = ctxt->input->cur[1];
7853 if ((cur == '<') && (next == '?')) {
7854 if ((!terminate) &&
7855 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7856 goto done;
7857#ifdef DEBUG_PUSH
7858 xmlGenericError(xmlGenericErrorContext,
7859 "PP: Parsing PI\n");
7860#endif
7861 xmlParsePI(ctxt);
7862 } else if ((cur == '<') && (next == '!') &&
7863 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7864 if ((!terminate) &&
7865 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7866 goto done;
7867#ifdef DEBUG_PUSH
7868 xmlGenericError(xmlGenericErrorContext,
7869 "PP: Parsing Comment\n");
7870#endif
7871 xmlParseComment(ctxt);
7872 ctxt->instate = XML_PARSER_MISC;
7873 } else if ((cur == '<') && (next == '!') &&
7874 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7875 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7876 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7877 (ctxt->input->cur[8] == 'E')) {
7878 if ((!terminate) &&
7879 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7880 goto done;
7881#ifdef DEBUG_PUSH
7882 xmlGenericError(xmlGenericErrorContext,
7883 "PP: Parsing internal subset\n");
7884#endif
7885 ctxt->inSubset = 1;
7886 xmlParseDocTypeDecl(ctxt);
7887 if (RAW == '[') {
7888 ctxt->instate = XML_PARSER_DTD;
7889#ifdef DEBUG_PUSH
7890 xmlGenericError(xmlGenericErrorContext,
7891 "PP: entering DTD\n");
7892#endif
7893 } else {
7894 /*
7895 * Create and update the external subset.
7896 */
7897 ctxt->inSubset = 2;
7898 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7899 (ctxt->sax->externalSubset != NULL))
7900 ctxt->sax->externalSubset(ctxt->userData,
7901 ctxt->intSubName, ctxt->extSubSystem,
7902 ctxt->extSubURI);
7903 ctxt->inSubset = 0;
7904 ctxt->instate = XML_PARSER_PROLOG;
7905#ifdef DEBUG_PUSH
7906 xmlGenericError(xmlGenericErrorContext,
7907 "PP: entering PROLOG\n");
7908#endif
7909 }
7910 } else if ((cur == '<') && (next == '!') &&
7911 (avail < 9)) {
7912 goto done;
7913 } else {
7914 ctxt->instate = XML_PARSER_START_TAG;
7915#ifdef DEBUG_PUSH
7916 xmlGenericError(xmlGenericErrorContext,
7917 "PP: entering START_TAG\n");
7918#endif
7919 }
7920 break;
7921 case XML_PARSER_IGNORE:
7922 xmlGenericError(xmlGenericErrorContext,
7923 "PP: internal error, state == IGNORE");
7924 ctxt->instate = XML_PARSER_DTD;
7925#ifdef DEBUG_PUSH
7926 xmlGenericError(xmlGenericErrorContext,
7927 "PP: entering DTD\n");
7928#endif
7929 break;
7930 case XML_PARSER_PROLOG:
7931 SKIP_BLANKS;
7932 if (ctxt->input->buf == NULL)
7933 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7934 else
7935 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7936 if (avail < 2)
7937 goto done;
7938 cur = ctxt->input->cur[0];
7939 next = ctxt->input->cur[1];
7940 if ((cur == '<') && (next == '?')) {
7941 if ((!terminate) &&
7942 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7943 goto done;
7944#ifdef DEBUG_PUSH
7945 xmlGenericError(xmlGenericErrorContext,
7946 "PP: Parsing PI\n");
7947#endif
7948 xmlParsePI(ctxt);
7949 } else if ((cur == '<') && (next == '!') &&
7950 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7951 if ((!terminate) &&
7952 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7953 goto done;
7954#ifdef DEBUG_PUSH
7955 xmlGenericError(xmlGenericErrorContext,
7956 "PP: Parsing Comment\n");
7957#endif
7958 xmlParseComment(ctxt);
7959 ctxt->instate = XML_PARSER_PROLOG;
7960 } else if ((cur == '<') && (next == '!') &&
7961 (avail < 4)) {
7962 goto done;
7963 } else {
7964 ctxt->instate = XML_PARSER_START_TAG;
7965#ifdef DEBUG_PUSH
7966 xmlGenericError(xmlGenericErrorContext,
7967 "PP: entering START_TAG\n");
7968#endif
7969 }
7970 break;
7971 case XML_PARSER_EPILOG:
7972 SKIP_BLANKS;
7973 if (ctxt->input->buf == NULL)
7974 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7975 else
7976 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7977 if (avail < 2)
7978 goto done;
7979 cur = ctxt->input->cur[0];
7980 next = ctxt->input->cur[1];
7981 if ((cur == '<') && (next == '?')) {
7982 if ((!terminate) &&
7983 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7984 goto done;
7985#ifdef DEBUG_PUSH
7986 xmlGenericError(xmlGenericErrorContext,
7987 "PP: Parsing PI\n");
7988#endif
7989 xmlParsePI(ctxt);
7990 ctxt->instate = XML_PARSER_EPILOG;
7991 } else if ((cur == '<') && (next == '!') &&
7992 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7993 if ((!terminate) &&
7994 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7995 goto done;
7996#ifdef DEBUG_PUSH
7997 xmlGenericError(xmlGenericErrorContext,
7998 "PP: Parsing Comment\n");
7999#endif
8000 xmlParseComment(ctxt);
8001 ctxt->instate = XML_PARSER_EPILOG;
8002 } else if ((cur == '<') && (next == '!') &&
8003 (avail < 4)) {
8004 goto done;
8005 } else {
8006 ctxt->errNo = XML_ERR_DOCUMENT_END;
8007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8008 ctxt->sax->error(ctxt->userData,
8009 "Extra content at the end of the document\n");
8010 ctxt->wellFormed = 0;
8011 ctxt->disableSAX = 1;
8012 ctxt->instate = XML_PARSER_EOF;
8013#ifdef DEBUG_PUSH
8014 xmlGenericError(xmlGenericErrorContext,
8015 "PP: entering EOF\n");
8016#endif
8017 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8018 (!ctxt->disableSAX))
8019 ctxt->sax->endDocument(ctxt->userData);
8020 goto done;
8021 }
8022 break;
8023 case XML_PARSER_START_TAG: {
8024 xmlChar *name, *oldname;
8025
8026 if ((avail < 2) && (ctxt->inputNr == 1))
8027 goto done;
8028 cur = ctxt->input->cur[0];
8029 if (cur != '<') {
8030 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8031 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8032 ctxt->sax->error(ctxt->userData,
8033 "Start tag expect, '<' not found\n");
8034 ctxt->wellFormed = 0;
8035 ctxt->disableSAX = 1;
8036 ctxt->instate = XML_PARSER_EOF;
8037#ifdef DEBUG_PUSH
8038 xmlGenericError(xmlGenericErrorContext,
8039 "PP: entering EOF\n");
8040#endif
8041 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8042 (!ctxt->disableSAX))
8043 ctxt->sax->endDocument(ctxt->userData);
8044 goto done;
8045 }
8046 if ((!terminate) &&
8047 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8048 goto done;
8049 if (ctxt->spaceNr == 0)
8050 spacePush(ctxt, -1);
8051 else
8052 spacePush(ctxt, *ctxt->space);
8053 name = xmlParseStartTag(ctxt);
8054 if (name == NULL) {
8055 spacePop(ctxt);
8056 ctxt->instate = XML_PARSER_EOF;
8057#ifdef DEBUG_PUSH
8058 xmlGenericError(xmlGenericErrorContext,
8059 "PP: entering EOF\n");
8060#endif
8061 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8062 (!ctxt->disableSAX))
8063 ctxt->sax->endDocument(ctxt->userData);
8064 goto done;
8065 }
8066 namePush(ctxt, xmlStrdup(name));
8067
8068 /*
8069 * [ VC: Root Element Type ]
8070 * The Name in the document type declaration must match
8071 * the element type of the root element.
8072 */
8073 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8074 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8075 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8076
8077 /*
8078 * Check for an Empty Element.
8079 */
8080 if ((RAW == '/') && (NXT(1) == '>')) {
8081 SKIP(2);
8082 if ((ctxt->sax != NULL) &&
8083 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8084 ctxt->sax->endElement(ctxt->userData, name);
8085 xmlFree(name);
8086 oldname = namePop(ctxt);
8087 spacePop(ctxt);
8088 if (oldname != NULL) {
8089#ifdef DEBUG_STACK
8090 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8091#endif
8092 xmlFree(oldname);
8093 }
8094 if (ctxt->name == NULL) {
8095 ctxt->instate = XML_PARSER_EPILOG;
8096#ifdef DEBUG_PUSH
8097 xmlGenericError(xmlGenericErrorContext,
8098 "PP: entering EPILOG\n");
8099#endif
8100 } else {
8101 ctxt->instate = XML_PARSER_CONTENT;
8102#ifdef DEBUG_PUSH
8103 xmlGenericError(xmlGenericErrorContext,
8104 "PP: entering CONTENT\n");
8105#endif
8106 }
8107 break;
8108 }
8109 if (RAW == '>') {
8110 NEXT;
8111 } else {
8112 ctxt->errNo = XML_ERR_GT_REQUIRED;
8113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8114 ctxt->sax->error(ctxt->userData,
8115 "Couldn't find end of Start Tag %s\n",
8116 name);
8117 ctxt->wellFormed = 0;
8118 ctxt->disableSAX = 1;
8119
8120 /*
8121 * end of parsing of this node.
8122 */
8123 nodePop(ctxt);
8124 oldname = namePop(ctxt);
8125 spacePop(ctxt);
8126 if (oldname != NULL) {
8127#ifdef DEBUG_STACK
8128 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8129#endif
8130 xmlFree(oldname);
8131 }
8132 }
8133 xmlFree(name);
8134 ctxt->instate = XML_PARSER_CONTENT;
8135#ifdef DEBUG_PUSH
8136 xmlGenericError(xmlGenericErrorContext,
8137 "PP: entering CONTENT\n");
8138#endif
8139 break;
8140 }
8141 case XML_PARSER_CONTENT: {
8142 const xmlChar *test;
8143 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008144 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008145
8146 /*
8147 * Handle preparsed entities and charRef
8148 */
8149 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008150 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008151
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008152 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008153 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8154 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008155 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008156 ctxt->token = 0;
8157 }
8158 if ((avail < 2) && (ctxt->inputNr == 1))
8159 goto done;
8160 cur = ctxt->input->cur[0];
8161 next = ctxt->input->cur[1];
8162
8163 test = CUR_PTR;
8164 cons = ctxt->input->consumed;
8165 tok = ctxt->token;
8166 if ((cur == '<') && (next == '?')) {
8167 if ((!terminate) &&
8168 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8169 goto done;
8170#ifdef DEBUG_PUSH
8171 xmlGenericError(xmlGenericErrorContext,
8172 "PP: Parsing PI\n");
8173#endif
8174 xmlParsePI(ctxt);
8175 } else if ((cur == '<') && (next == '!') &&
8176 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8177 if ((!terminate) &&
8178 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8179 goto done;
8180#ifdef DEBUG_PUSH
8181 xmlGenericError(xmlGenericErrorContext,
8182 "PP: Parsing Comment\n");
8183#endif
8184 xmlParseComment(ctxt);
8185 ctxt->instate = XML_PARSER_CONTENT;
8186 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8187 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8188 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8189 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8190 (ctxt->input->cur[8] == '[')) {
8191 SKIP(9);
8192 ctxt->instate = XML_PARSER_CDATA_SECTION;
8193#ifdef DEBUG_PUSH
8194 xmlGenericError(xmlGenericErrorContext,
8195 "PP: entering CDATA_SECTION\n");
8196#endif
8197 break;
8198 } else if ((cur == '<') && (next == '!') &&
8199 (avail < 9)) {
8200 goto done;
8201 } else if ((cur == '<') && (next == '/')) {
8202 ctxt->instate = XML_PARSER_END_TAG;
8203#ifdef DEBUG_PUSH
8204 xmlGenericError(xmlGenericErrorContext,
8205 "PP: entering END_TAG\n");
8206#endif
8207 break;
8208 } else if (cur == '<') {
8209 ctxt->instate = XML_PARSER_START_TAG;
8210#ifdef DEBUG_PUSH
8211 xmlGenericError(xmlGenericErrorContext,
8212 "PP: entering START_TAG\n");
8213#endif
8214 break;
8215 } else if (cur == '&') {
8216 if ((!terminate) &&
8217 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8218 goto done;
8219#ifdef DEBUG_PUSH
8220 xmlGenericError(xmlGenericErrorContext,
8221 "PP: Parsing Reference\n");
8222#endif
8223 xmlParseReference(ctxt);
8224 } else {
8225 /* TODO Avoid the extra copy, handle directly !!! */
8226 /*
8227 * Goal of the following test is:
8228 * - minimize calls to the SAX 'character' callback
8229 * when they are mergeable
8230 * - handle an problem for isBlank when we only parse
8231 * a sequence of blank chars and the next one is
8232 * not available to check against '<' presence.
8233 * - tries to homogenize the differences in SAX
8234 * callbacks beween the push and pull versions
8235 * of the parser.
8236 */
8237 if ((ctxt->inputNr == 1) &&
8238 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8239 if ((!terminate) &&
8240 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8241 goto done;
8242 }
8243 ctxt->checkIndex = 0;
8244#ifdef DEBUG_PUSH
8245 xmlGenericError(xmlGenericErrorContext,
8246 "PP: Parsing char data\n");
8247#endif
8248 xmlParseCharData(ctxt, 0);
8249 }
8250 /*
8251 * Pop-up of finished entities.
8252 */
8253 while ((RAW == 0) && (ctxt->inputNr > 1))
8254 xmlPopInput(ctxt);
8255 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8256 (tok == ctxt->token)) {
8257 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8258 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8259 ctxt->sax->error(ctxt->userData,
8260 "detected an error in element content\n");
8261 ctxt->wellFormed = 0;
8262 ctxt->disableSAX = 1;
8263 ctxt->instate = XML_PARSER_EOF;
8264 break;
8265 }
8266 break;
8267 }
8268 case XML_PARSER_CDATA_SECTION: {
8269 /*
8270 * The Push mode need to have the SAX callback for
8271 * cdataBlock merge back contiguous callbacks.
8272 */
8273 int base;
8274
8275 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8276 if (base < 0) {
8277 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8278 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8279 if (ctxt->sax->cdataBlock != NULL)
8280 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8281 XML_PARSER_BIG_BUFFER_SIZE);
8282 }
8283 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8284 ctxt->checkIndex = 0;
8285 }
8286 goto done;
8287 } else {
8288 if ((ctxt->sax != NULL) && (base > 0) &&
8289 (!ctxt->disableSAX)) {
8290 if (ctxt->sax->cdataBlock != NULL)
8291 ctxt->sax->cdataBlock(ctxt->userData,
8292 ctxt->input->cur, base);
8293 }
8294 SKIP(base + 3);
8295 ctxt->checkIndex = 0;
8296 ctxt->instate = XML_PARSER_CONTENT;
8297#ifdef DEBUG_PUSH
8298 xmlGenericError(xmlGenericErrorContext,
8299 "PP: entering CONTENT\n");
8300#endif
8301 }
8302 break;
8303 }
8304 case XML_PARSER_END_TAG:
8305 if (avail < 2)
8306 goto done;
8307 if ((!terminate) &&
8308 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8309 goto done;
8310 xmlParseEndTag(ctxt);
8311 if (ctxt->name == NULL) {
8312 ctxt->instate = XML_PARSER_EPILOG;
8313#ifdef DEBUG_PUSH
8314 xmlGenericError(xmlGenericErrorContext,
8315 "PP: entering EPILOG\n");
8316#endif
8317 } else {
8318 ctxt->instate = XML_PARSER_CONTENT;
8319#ifdef DEBUG_PUSH
8320 xmlGenericError(xmlGenericErrorContext,
8321 "PP: entering CONTENT\n");
8322#endif
8323 }
8324 break;
8325 case XML_PARSER_DTD: {
8326 /*
8327 * Sorry but progressive parsing of the internal subset
8328 * is not expected to be supported. We first check that
8329 * the full content of the internal subset is available and
8330 * the parsing is launched only at that point.
8331 * Internal subset ends up with "']' S? '>'" in an unescaped
8332 * section and not in a ']]>' sequence which are conditional
8333 * sections (whoever argued to keep that crap in XML deserve
8334 * a place in hell !).
8335 */
8336 int base, i;
8337 xmlChar *buf;
8338 xmlChar quote = 0;
8339
8340 base = ctxt->input->cur - ctxt->input->base;
8341 if (base < 0) return(0);
8342 if (ctxt->checkIndex > base)
8343 base = ctxt->checkIndex;
8344 buf = ctxt->input->buf->buffer->content;
8345 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8346 base++) {
8347 if (quote != 0) {
8348 if (buf[base] == quote)
8349 quote = 0;
8350 continue;
8351 }
8352 if (buf[base] == '"') {
8353 quote = '"';
8354 continue;
8355 }
8356 if (buf[base] == '\'') {
8357 quote = '\'';
8358 continue;
8359 }
8360 if (buf[base] == ']') {
8361 if ((unsigned int) base +1 >=
8362 ctxt->input->buf->buffer->use)
8363 break;
8364 if (buf[base + 1] == ']') {
8365 /* conditional crap, skip both ']' ! */
8366 base++;
8367 continue;
8368 }
8369 for (i = 0;
8370 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8371 i++) {
8372 if (buf[base + i] == '>')
8373 goto found_end_int_subset;
8374 }
8375 break;
8376 }
8377 }
8378 /*
8379 * We didn't found the end of the Internal subset
8380 */
8381 if (quote == 0)
8382 ctxt->checkIndex = base;
8383#ifdef DEBUG_PUSH
8384 if (next == 0)
8385 xmlGenericError(xmlGenericErrorContext,
8386 "PP: lookup of int subset end filed\n");
8387#endif
8388 goto done;
8389
8390found_end_int_subset:
8391 xmlParseInternalSubset(ctxt);
8392 ctxt->inSubset = 2;
8393 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8394 (ctxt->sax->externalSubset != NULL))
8395 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8396 ctxt->extSubSystem, ctxt->extSubURI);
8397 ctxt->inSubset = 0;
8398 ctxt->instate = XML_PARSER_PROLOG;
8399 ctxt->checkIndex = 0;
8400#ifdef DEBUG_PUSH
8401 xmlGenericError(xmlGenericErrorContext,
8402 "PP: entering PROLOG\n");
8403#endif
8404 break;
8405 }
8406 case XML_PARSER_COMMENT:
8407 xmlGenericError(xmlGenericErrorContext,
8408 "PP: internal error, state == COMMENT\n");
8409 ctxt->instate = XML_PARSER_CONTENT;
8410#ifdef DEBUG_PUSH
8411 xmlGenericError(xmlGenericErrorContext,
8412 "PP: entering CONTENT\n");
8413#endif
8414 break;
8415 case XML_PARSER_PI:
8416 xmlGenericError(xmlGenericErrorContext,
8417 "PP: internal error, state == PI\n");
8418 ctxt->instate = XML_PARSER_CONTENT;
8419#ifdef DEBUG_PUSH
8420 xmlGenericError(xmlGenericErrorContext,
8421 "PP: entering CONTENT\n");
8422#endif
8423 break;
8424 case XML_PARSER_ENTITY_DECL:
8425 xmlGenericError(xmlGenericErrorContext,
8426 "PP: internal error, state == ENTITY_DECL\n");
8427 ctxt->instate = XML_PARSER_DTD;
8428#ifdef DEBUG_PUSH
8429 xmlGenericError(xmlGenericErrorContext,
8430 "PP: entering DTD\n");
8431#endif
8432 break;
8433 case XML_PARSER_ENTITY_VALUE:
8434 xmlGenericError(xmlGenericErrorContext,
8435 "PP: internal error, state == ENTITY_VALUE\n");
8436 ctxt->instate = XML_PARSER_CONTENT;
8437#ifdef DEBUG_PUSH
8438 xmlGenericError(xmlGenericErrorContext,
8439 "PP: entering DTD\n");
8440#endif
8441 break;
8442 case XML_PARSER_ATTRIBUTE_VALUE:
8443 xmlGenericError(xmlGenericErrorContext,
8444 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8445 ctxt->instate = XML_PARSER_START_TAG;
8446#ifdef DEBUG_PUSH
8447 xmlGenericError(xmlGenericErrorContext,
8448 "PP: entering START_TAG\n");
8449#endif
8450 break;
8451 case XML_PARSER_SYSTEM_LITERAL:
8452 xmlGenericError(xmlGenericErrorContext,
8453 "PP: internal error, state == SYSTEM_LITERAL\n");
8454 ctxt->instate = XML_PARSER_START_TAG;
8455#ifdef DEBUG_PUSH
8456 xmlGenericError(xmlGenericErrorContext,
8457 "PP: entering START_TAG\n");
8458#endif
8459 break;
8460 }
8461 }
8462done:
8463#ifdef DEBUG_PUSH
8464 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8465#endif
8466 return(ret);
8467}
8468
8469/**
Owen Taylor3473f882001-02-23 17:55:21 +00008470 * xmlParseChunk:
8471 * @ctxt: an XML parser context
8472 * @chunk: an char array
8473 * @size: the size in byte of the chunk
8474 * @terminate: last chunk indicator
8475 *
8476 * Parse a Chunk of memory
8477 *
8478 * Returns zero if no error, the xmlParserErrors otherwise.
8479 */
8480int
8481xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8482 int terminate) {
8483 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8484 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8485 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8486 int cur = ctxt->input->cur - ctxt->input->base;
8487
8488 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8489 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8490 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008491 ctxt->input->end =
8492 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008493#ifdef DEBUG_PUSH
8494 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8495#endif
8496
8497 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8498 xmlParseTryOrFinish(ctxt, terminate);
8499 } else if (ctxt->instate != XML_PARSER_EOF) {
8500 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8501 xmlParserInputBufferPtr in = ctxt->input->buf;
8502 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8503 (in->raw != NULL)) {
8504 int nbchars;
8505
8506 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8507 if (nbchars < 0) {
8508 xmlGenericError(xmlGenericErrorContext,
8509 "xmlParseChunk: encoder error\n");
8510 return(XML_ERR_INVALID_ENCODING);
8511 }
8512 }
8513 }
8514 }
8515 xmlParseTryOrFinish(ctxt, terminate);
8516 if (terminate) {
8517 /*
8518 * Check for termination
8519 */
8520 if ((ctxt->instate != XML_PARSER_EOF) &&
8521 (ctxt->instate != XML_PARSER_EPILOG)) {
8522 ctxt->errNo = XML_ERR_DOCUMENT_END;
8523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8524 ctxt->sax->error(ctxt->userData,
8525 "Extra content at the end of the document\n");
8526 ctxt->wellFormed = 0;
8527 ctxt->disableSAX = 1;
8528 }
8529 if (ctxt->instate != XML_PARSER_EOF) {
8530 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8531 (!ctxt->disableSAX))
8532 ctxt->sax->endDocument(ctxt->userData);
8533 }
8534 ctxt->instate = XML_PARSER_EOF;
8535 }
8536 return((xmlParserErrors) ctxt->errNo);
8537}
8538
8539/************************************************************************
8540 * *
8541 * I/O front end functions to the parser *
8542 * *
8543 ************************************************************************/
8544
8545/**
8546 * xmlStopParser:
8547 * @ctxt: an XML parser context
8548 *
8549 * Blocks further parser processing
8550 */
8551void
8552xmlStopParser(xmlParserCtxtPtr ctxt) {
8553 ctxt->instate = XML_PARSER_EOF;
8554 if (ctxt->input != NULL)
8555 ctxt->input->cur = BAD_CAST"";
8556}
8557
8558/**
8559 * xmlCreatePushParserCtxt:
8560 * @sax: a SAX handler
8561 * @user_data: The user data returned on SAX callbacks
8562 * @chunk: a pointer to an array of chars
8563 * @size: number of chars in the array
8564 * @filename: an optional file name or URI
8565 *
8566 * Create a parser context for using the XML parser in push mode
8567 * To allow content encoding detection, @size should be >= 4
8568 * The value of @filename is used for fetching external entities
8569 * and error/warning reports.
8570 *
8571 * Returns the new parser context or NULL
8572 */
8573xmlParserCtxtPtr
8574xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8575 const char *chunk, int size, const char *filename) {
8576 xmlParserCtxtPtr ctxt;
8577 xmlParserInputPtr inputStream;
8578 xmlParserInputBufferPtr buf;
8579 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8580
8581 /*
8582 * plug some encoding conversion routines
8583 */
8584 if ((chunk != NULL) && (size >= 4))
8585 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8586
8587 buf = xmlAllocParserInputBuffer(enc);
8588 if (buf == NULL) return(NULL);
8589
8590 ctxt = xmlNewParserCtxt();
8591 if (ctxt == NULL) {
8592 xmlFree(buf);
8593 return(NULL);
8594 }
8595 if (sax != NULL) {
8596 if (ctxt->sax != &xmlDefaultSAXHandler)
8597 xmlFree(ctxt->sax);
8598 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8599 if (ctxt->sax == NULL) {
8600 xmlFree(buf);
8601 xmlFree(ctxt);
8602 return(NULL);
8603 }
8604 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8605 if (user_data != NULL)
8606 ctxt->userData = user_data;
8607 }
8608 if (filename == NULL) {
8609 ctxt->directory = NULL;
8610 } else {
8611 ctxt->directory = xmlParserGetDirectory(filename);
8612 }
8613
8614 inputStream = xmlNewInputStream(ctxt);
8615 if (inputStream == NULL) {
8616 xmlFreeParserCtxt(ctxt);
8617 return(NULL);
8618 }
8619
8620 if (filename == NULL)
8621 inputStream->filename = NULL;
8622 else
8623 inputStream->filename = xmlMemStrdup(filename);
8624 inputStream->buf = buf;
8625 inputStream->base = inputStream->buf->buffer->content;
8626 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008627 inputStream->end =
8628 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008629
8630 inputPush(ctxt, inputStream);
8631
8632 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8633 (ctxt->input->buf != NULL)) {
8634 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8635#ifdef DEBUG_PUSH
8636 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8637#endif
8638 }
8639
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008640 if (enc != XML_CHAR_ENCODING_NONE) {
8641 xmlSwitchEncoding(ctxt, enc);
8642 }
8643
Owen Taylor3473f882001-02-23 17:55:21 +00008644 return(ctxt);
8645}
8646
8647/**
8648 * xmlCreateIOParserCtxt:
8649 * @sax: a SAX handler
8650 * @user_data: The user data returned on SAX callbacks
8651 * @ioread: an I/O read function
8652 * @ioclose: an I/O close function
8653 * @ioctx: an I/O handler
8654 * @enc: the charset encoding if known
8655 *
8656 * Create a parser context for using the XML parser with an existing
8657 * I/O stream
8658 *
8659 * Returns the new parser context or NULL
8660 */
8661xmlParserCtxtPtr
8662xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8663 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8664 void *ioctx, xmlCharEncoding enc) {
8665 xmlParserCtxtPtr ctxt;
8666 xmlParserInputPtr inputStream;
8667 xmlParserInputBufferPtr buf;
8668
8669 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8670 if (buf == NULL) return(NULL);
8671
8672 ctxt = xmlNewParserCtxt();
8673 if (ctxt == NULL) {
8674 xmlFree(buf);
8675 return(NULL);
8676 }
8677 if (sax != NULL) {
8678 if (ctxt->sax != &xmlDefaultSAXHandler)
8679 xmlFree(ctxt->sax);
8680 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8681 if (ctxt->sax == NULL) {
8682 xmlFree(buf);
8683 xmlFree(ctxt);
8684 return(NULL);
8685 }
8686 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8687 if (user_data != NULL)
8688 ctxt->userData = user_data;
8689 }
8690
8691 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8692 if (inputStream == NULL) {
8693 xmlFreeParserCtxt(ctxt);
8694 return(NULL);
8695 }
8696 inputPush(ctxt, inputStream);
8697
8698 return(ctxt);
8699}
8700
8701/************************************************************************
8702 * *
8703 * Front ends when parsing a Dtd *
8704 * *
8705 ************************************************************************/
8706
8707/**
8708 * xmlIOParseDTD:
8709 * @sax: the SAX handler block or NULL
8710 * @input: an Input Buffer
8711 * @enc: the charset encoding if known
8712 *
8713 * Load and parse a DTD
8714 *
8715 * Returns the resulting xmlDtdPtr or NULL in case of error.
8716 * @input will be freed at parsing end.
8717 */
8718
8719xmlDtdPtr
8720xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8721 xmlCharEncoding enc) {
8722 xmlDtdPtr ret = NULL;
8723 xmlParserCtxtPtr ctxt;
8724 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008725 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008726
8727 if (input == NULL)
8728 return(NULL);
8729
8730 ctxt = xmlNewParserCtxt();
8731 if (ctxt == NULL) {
8732 return(NULL);
8733 }
8734
8735 /*
8736 * Set-up the SAX context
8737 */
8738 if (sax != NULL) {
8739 if (ctxt->sax != NULL)
8740 xmlFree(ctxt->sax);
8741 ctxt->sax = sax;
8742 ctxt->userData = NULL;
8743 }
8744
8745 /*
8746 * generate a parser input from the I/O handler
8747 */
8748
8749 pinput = xmlNewIOInputStream(ctxt, input, enc);
8750 if (pinput == NULL) {
8751 if (sax != NULL) ctxt->sax = NULL;
8752 xmlFreeParserCtxt(ctxt);
8753 return(NULL);
8754 }
8755
8756 /*
8757 * plug some encoding conversion routines here.
8758 */
8759 xmlPushInput(ctxt, pinput);
8760
8761 pinput->filename = NULL;
8762 pinput->line = 1;
8763 pinput->col = 1;
8764 pinput->base = ctxt->input->cur;
8765 pinput->cur = ctxt->input->cur;
8766 pinput->free = NULL;
8767
8768 /*
8769 * let's parse that entity knowing it's an external subset.
8770 */
8771 ctxt->inSubset = 2;
8772 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8773 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8774 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008775
8776 if (enc == XML_CHAR_ENCODING_NONE) {
8777 /*
8778 * Get the 4 first bytes and decode the charset
8779 * if enc != XML_CHAR_ENCODING_NONE
8780 * plug some encoding conversion routines.
8781 */
8782 start[0] = RAW;
8783 start[1] = NXT(1);
8784 start[2] = NXT(2);
8785 start[3] = NXT(3);
8786 enc = xmlDetectCharEncoding(start, 4);
8787 if (enc != XML_CHAR_ENCODING_NONE) {
8788 xmlSwitchEncoding(ctxt, enc);
8789 }
8790 }
8791
Owen Taylor3473f882001-02-23 17:55:21 +00008792 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8793
8794 if (ctxt->myDoc != NULL) {
8795 if (ctxt->wellFormed) {
8796 ret = ctxt->myDoc->extSubset;
8797 ctxt->myDoc->extSubset = NULL;
8798 } else {
8799 ret = NULL;
8800 }
8801 xmlFreeDoc(ctxt->myDoc);
8802 ctxt->myDoc = NULL;
8803 }
8804 if (sax != NULL) ctxt->sax = NULL;
8805 xmlFreeParserCtxt(ctxt);
8806
8807 return(ret);
8808}
8809
8810/**
8811 * xmlSAXParseDTD:
8812 * @sax: the SAX handler block
8813 * @ExternalID: a NAME* containing the External ID of the DTD
8814 * @SystemID: a NAME* containing the URL to the DTD
8815 *
8816 * Load and parse an external subset.
8817 *
8818 * Returns the resulting xmlDtdPtr or NULL in case of error.
8819 */
8820
8821xmlDtdPtr
8822xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8823 const xmlChar *SystemID) {
8824 xmlDtdPtr ret = NULL;
8825 xmlParserCtxtPtr ctxt;
8826 xmlParserInputPtr input = NULL;
8827 xmlCharEncoding enc;
8828
8829 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8830
8831 ctxt = xmlNewParserCtxt();
8832 if (ctxt == NULL) {
8833 return(NULL);
8834 }
8835
8836 /*
8837 * Set-up the SAX context
8838 */
8839 if (sax != NULL) {
8840 if (ctxt->sax != NULL)
8841 xmlFree(ctxt->sax);
8842 ctxt->sax = sax;
8843 ctxt->userData = NULL;
8844 }
8845
8846 /*
8847 * Ask the Entity resolver to load the damn thing
8848 */
8849
8850 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8851 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8852 if (input == NULL) {
8853 if (sax != NULL) ctxt->sax = NULL;
8854 xmlFreeParserCtxt(ctxt);
8855 return(NULL);
8856 }
8857
8858 /*
8859 * plug some encoding conversion routines here.
8860 */
8861 xmlPushInput(ctxt, input);
8862 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8863 xmlSwitchEncoding(ctxt, enc);
8864
8865 if (input->filename == NULL)
8866 input->filename = (char *) xmlStrdup(SystemID);
8867 input->line = 1;
8868 input->col = 1;
8869 input->base = ctxt->input->cur;
8870 input->cur = ctxt->input->cur;
8871 input->free = NULL;
8872
8873 /*
8874 * let's parse that entity knowing it's an external subset.
8875 */
8876 ctxt->inSubset = 2;
8877 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8878 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8879 ExternalID, SystemID);
8880 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8881
8882 if (ctxt->myDoc != NULL) {
8883 if (ctxt->wellFormed) {
8884 ret = ctxt->myDoc->extSubset;
8885 ctxt->myDoc->extSubset = NULL;
8886 } else {
8887 ret = NULL;
8888 }
8889 xmlFreeDoc(ctxt->myDoc);
8890 ctxt->myDoc = NULL;
8891 }
8892 if (sax != NULL) ctxt->sax = NULL;
8893 xmlFreeParserCtxt(ctxt);
8894
8895 return(ret);
8896}
8897
8898/**
8899 * xmlParseDTD:
8900 * @ExternalID: a NAME* containing the External ID of the DTD
8901 * @SystemID: a NAME* containing the URL to the DTD
8902 *
8903 * Load and parse an external subset.
8904 *
8905 * Returns the resulting xmlDtdPtr or NULL in case of error.
8906 */
8907
8908xmlDtdPtr
8909xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8910 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8911}
8912
8913/************************************************************************
8914 * *
8915 * Front ends when parsing an Entity *
8916 * *
8917 ************************************************************************/
8918
8919/**
Owen Taylor3473f882001-02-23 17:55:21 +00008920 * xmlParseCtxtExternalEntity:
8921 * @ctx: the existing parsing context
8922 * @URL: the URL for the entity to load
8923 * @ID: the System ID for the entity to load
8924 * @list: the return value for the set of parsed nodes
8925 *
8926 * Parse an external general entity within an existing parsing context
8927 * An external general parsed entity is well-formed if it matches the
8928 * production labeled extParsedEnt.
8929 *
8930 * [78] extParsedEnt ::= TextDecl? content
8931 *
8932 * Returns 0 if the entity is well formed, -1 in case of args problem and
8933 * the parser error code otherwise
8934 */
8935
8936int
8937xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8938 const xmlChar *ID, xmlNodePtr *list) {
8939 xmlParserCtxtPtr ctxt;
8940 xmlDocPtr newDoc;
8941 xmlSAXHandlerPtr oldsax = NULL;
8942 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008943 xmlChar start[4];
8944 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00008945
8946 if (ctx->depth > 40) {
8947 return(XML_ERR_ENTITY_LOOP);
8948 }
8949
8950 if (list != NULL)
8951 *list = NULL;
8952 if ((URL == NULL) && (ID == NULL))
8953 return(-1);
8954 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8955 return(-1);
8956
8957
8958 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8959 if (ctxt == NULL) return(-1);
8960 ctxt->userData = ctxt;
8961 oldsax = ctxt->sax;
8962 ctxt->sax = ctx->sax;
8963 newDoc = xmlNewDoc(BAD_CAST "1.0");
8964 if (newDoc == NULL) {
8965 xmlFreeParserCtxt(ctxt);
8966 return(-1);
8967 }
8968 if (ctx->myDoc != NULL) {
8969 newDoc->intSubset = ctx->myDoc->intSubset;
8970 newDoc->extSubset = ctx->myDoc->extSubset;
8971 }
8972 if (ctx->myDoc->URL != NULL) {
8973 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8974 }
8975 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8976 if (newDoc->children == NULL) {
8977 ctxt->sax = oldsax;
8978 xmlFreeParserCtxt(ctxt);
8979 newDoc->intSubset = NULL;
8980 newDoc->extSubset = NULL;
8981 xmlFreeDoc(newDoc);
8982 return(-1);
8983 }
8984 nodePush(ctxt, newDoc->children);
8985 if (ctx->myDoc == NULL) {
8986 ctxt->myDoc = newDoc;
8987 } else {
8988 ctxt->myDoc = ctx->myDoc;
8989 newDoc->children->doc = ctx->myDoc;
8990 }
8991
Daniel Veillard87a764e2001-06-20 17:41:10 +00008992 /*
8993 * Get the 4 first bytes and decode the charset
8994 * if enc != XML_CHAR_ENCODING_NONE
8995 * plug some encoding conversion routines.
8996 */
8997 GROW
8998 start[0] = RAW;
8999 start[1] = NXT(1);
9000 start[2] = NXT(2);
9001 start[3] = NXT(3);
9002 enc = xmlDetectCharEncoding(start, 4);
9003 if (enc != XML_CHAR_ENCODING_NONE) {
9004 xmlSwitchEncoding(ctxt, enc);
9005 }
9006
Owen Taylor3473f882001-02-23 17:55:21 +00009007 /*
9008 * Parse a possible text declaration first
9009 */
Owen Taylor3473f882001-02-23 17:55:21 +00009010 if ((RAW == '<') && (NXT(1) == '?') &&
9011 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9012 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9013 xmlParseTextDecl(ctxt);
9014 }
9015
9016 /*
9017 * Doing validity checking on chunk doesn't make sense
9018 */
9019 ctxt->instate = XML_PARSER_CONTENT;
9020 ctxt->validate = ctx->validate;
9021 ctxt->loadsubset = ctx->loadsubset;
9022 ctxt->depth = ctx->depth + 1;
9023 ctxt->replaceEntities = ctx->replaceEntities;
9024 if (ctxt->validate) {
9025 ctxt->vctxt.error = ctx->vctxt.error;
9026 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009027 } else {
9028 ctxt->vctxt.error = NULL;
9029 ctxt->vctxt.warning = NULL;
9030 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009031 ctxt->vctxt.nodeTab = NULL;
9032 ctxt->vctxt.nodeNr = 0;
9033 ctxt->vctxt.nodeMax = 0;
9034 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009035
9036 xmlParseContent(ctxt);
9037
9038 if ((RAW == '<') && (NXT(1) == '/')) {
9039 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9041 ctxt->sax->error(ctxt->userData,
9042 "chunk is not well balanced\n");
9043 ctxt->wellFormed = 0;
9044 ctxt->disableSAX = 1;
9045 } else if (RAW != 0) {
9046 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9048 ctxt->sax->error(ctxt->userData,
9049 "extra content at the end of well balanced chunk\n");
9050 ctxt->wellFormed = 0;
9051 ctxt->disableSAX = 1;
9052 }
9053 if (ctxt->node != newDoc->children) {
9054 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9055 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9056 ctxt->sax->error(ctxt->userData,
9057 "chunk is not well balanced\n");
9058 ctxt->wellFormed = 0;
9059 ctxt->disableSAX = 1;
9060 }
9061
9062 if (!ctxt->wellFormed) {
9063 if (ctxt->errNo == 0)
9064 ret = 1;
9065 else
9066 ret = ctxt->errNo;
9067 } else {
9068 if (list != NULL) {
9069 xmlNodePtr cur;
9070
9071 /*
9072 * Return the newly created nodeset after unlinking it from
9073 * they pseudo parent.
9074 */
9075 cur = newDoc->children->children;
9076 *list = cur;
9077 while (cur != NULL) {
9078 cur->parent = NULL;
9079 cur = cur->next;
9080 }
9081 newDoc->children->children = NULL;
9082 }
9083 ret = 0;
9084 }
9085 ctxt->sax = oldsax;
9086 xmlFreeParserCtxt(ctxt);
9087 newDoc->intSubset = NULL;
9088 newDoc->extSubset = NULL;
9089 xmlFreeDoc(newDoc);
9090
9091 return(ret);
9092}
9093
9094/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009095 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009096 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009097 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009098 * @sax: the SAX handler bloc (possibly NULL)
9099 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9100 * @depth: Used for loop detection, use 0
9101 * @URL: the URL for the entity to load
9102 * @ID: the System ID for the entity to load
9103 * @list: the return value for the set of parsed nodes
9104 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009105 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009106 *
9107 * Returns 0 if the entity is well formed, -1 in case of args problem and
9108 * the parser error code otherwise
9109 */
9110
Daniel Veillard257d9102001-05-08 10:41:44 +00009111static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009112xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9113 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009114 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009115 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009116 xmlParserCtxtPtr ctxt;
9117 xmlDocPtr newDoc;
9118 xmlSAXHandlerPtr oldsax = NULL;
9119 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009120 xmlChar start[4];
9121 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009122
9123 if (depth > 40) {
9124 return(XML_ERR_ENTITY_LOOP);
9125 }
9126
9127
9128
9129 if (list != NULL)
9130 *list = NULL;
9131 if ((URL == NULL) && (ID == NULL))
9132 return(-1);
9133 if (doc == NULL) /* @@ relax but check for dereferences */
9134 return(-1);
9135
9136
9137 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9138 if (ctxt == NULL) return(-1);
9139 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009140 if (oldctxt != NULL) {
9141 ctxt->_private = oldctxt->_private;
9142 ctxt->loadsubset = oldctxt->loadsubset;
9143 ctxt->validate = oldctxt->validate;
9144 ctxt->external = oldctxt->external;
9145 } else {
9146 /*
9147 * Doing validity checking on chunk without context
9148 * doesn't make sense
9149 */
9150 ctxt->_private = NULL;
9151 ctxt->validate = 0;
9152 ctxt->external = 2;
9153 ctxt->loadsubset = 0;
9154 }
Owen Taylor3473f882001-02-23 17:55:21 +00009155 if (sax != NULL) {
9156 oldsax = ctxt->sax;
9157 ctxt->sax = sax;
9158 if (user_data != NULL)
9159 ctxt->userData = user_data;
9160 }
9161 newDoc = xmlNewDoc(BAD_CAST "1.0");
9162 if (newDoc == NULL) {
9163 xmlFreeParserCtxt(ctxt);
9164 return(-1);
9165 }
9166 if (doc != NULL) {
9167 newDoc->intSubset = doc->intSubset;
9168 newDoc->extSubset = doc->extSubset;
9169 }
9170 if (doc->URL != NULL) {
9171 newDoc->URL = xmlStrdup(doc->URL);
9172 }
9173 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9174 if (newDoc->children == NULL) {
9175 if (sax != NULL)
9176 ctxt->sax = oldsax;
9177 xmlFreeParserCtxt(ctxt);
9178 newDoc->intSubset = NULL;
9179 newDoc->extSubset = NULL;
9180 xmlFreeDoc(newDoc);
9181 return(-1);
9182 }
9183 nodePush(ctxt, newDoc->children);
9184 if (doc == NULL) {
9185 ctxt->myDoc = newDoc;
9186 } else {
9187 ctxt->myDoc = doc;
9188 newDoc->children->doc = doc;
9189 }
9190
Daniel Veillard87a764e2001-06-20 17:41:10 +00009191 /*
9192 * Get the 4 first bytes and decode the charset
9193 * if enc != XML_CHAR_ENCODING_NONE
9194 * plug some encoding conversion routines.
9195 */
9196 GROW;
9197 start[0] = RAW;
9198 start[1] = NXT(1);
9199 start[2] = NXT(2);
9200 start[3] = NXT(3);
9201 enc = xmlDetectCharEncoding(start, 4);
9202 if (enc != XML_CHAR_ENCODING_NONE) {
9203 xmlSwitchEncoding(ctxt, enc);
9204 }
9205
Owen Taylor3473f882001-02-23 17:55:21 +00009206 /*
9207 * Parse a possible text declaration first
9208 */
Owen Taylor3473f882001-02-23 17:55:21 +00009209 if ((RAW == '<') && (NXT(1) == '?') &&
9210 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9211 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9212 xmlParseTextDecl(ctxt);
9213 }
9214
Owen Taylor3473f882001-02-23 17:55:21 +00009215 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009216 ctxt->depth = depth;
9217
9218 xmlParseContent(ctxt);
9219
9220 if ((RAW == '<') && (NXT(1) == '/')) {
9221 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9223 ctxt->sax->error(ctxt->userData,
9224 "chunk is not well balanced\n");
9225 ctxt->wellFormed = 0;
9226 ctxt->disableSAX = 1;
9227 } else if (RAW != 0) {
9228 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9230 ctxt->sax->error(ctxt->userData,
9231 "extra content at the end of well balanced chunk\n");
9232 ctxt->wellFormed = 0;
9233 ctxt->disableSAX = 1;
9234 }
9235 if (ctxt->node != newDoc->children) {
9236 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9238 ctxt->sax->error(ctxt->userData,
9239 "chunk is not well balanced\n");
9240 ctxt->wellFormed = 0;
9241 ctxt->disableSAX = 1;
9242 }
9243
9244 if (!ctxt->wellFormed) {
9245 if (ctxt->errNo == 0)
9246 ret = 1;
9247 else
9248 ret = ctxt->errNo;
9249 } else {
9250 if (list != NULL) {
9251 xmlNodePtr cur;
9252
9253 /*
9254 * Return the newly created nodeset after unlinking it from
9255 * they pseudo parent.
9256 */
9257 cur = newDoc->children->children;
9258 *list = cur;
9259 while (cur != NULL) {
9260 cur->parent = NULL;
9261 cur = cur->next;
9262 }
9263 newDoc->children->children = NULL;
9264 }
9265 ret = 0;
9266 }
9267 if (sax != NULL)
9268 ctxt->sax = oldsax;
9269 xmlFreeParserCtxt(ctxt);
9270 newDoc->intSubset = NULL;
9271 newDoc->extSubset = NULL;
9272 xmlFreeDoc(newDoc);
9273
9274 return(ret);
9275}
9276
9277/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009278 * xmlParseExternalEntity:
9279 * @doc: the document the chunk pertains to
9280 * @sax: the SAX handler bloc (possibly NULL)
9281 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9282 * @depth: Used for loop detection, use 0
9283 * @URL: the URL for the entity to load
9284 * @ID: the System ID for the entity to load
9285 * @list: the return value for the set of parsed nodes
9286 *
9287 * Parse an external general entity
9288 * An external general parsed entity is well-formed if it matches the
9289 * production labeled extParsedEnt.
9290 *
9291 * [78] extParsedEnt ::= TextDecl? content
9292 *
9293 * Returns 0 if the entity is well formed, -1 in case of args problem and
9294 * the parser error code otherwise
9295 */
9296
9297int
9298xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
9299 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009300 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
9301 ID, list));
Daniel Veillard257d9102001-05-08 10:41:44 +00009302}
9303
9304/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009305 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009306 * @doc: the document the chunk pertains to
9307 * @sax: the SAX handler bloc (possibly NULL)
9308 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9309 * @depth: Used for loop detection, use 0
9310 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9311 * @list: the return value for the set of parsed nodes
9312 *
9313 * Parse a well-balanced chunk of an XML document
9314 * called by the parser
9315 * The allowed sequence for the Well Balanced Chunk is the one defined by
9316 * the content production in the XML grammar:
9317 *
9318 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9319 *
9320 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9321 * the parser error code otherwise
9322 */
9323
9324int
9325xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9326 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9327 xmlParserCtxtPtr ctxt;
9328 xmlDocPtr newDoc;
9329 xmlSAXHandlerPtr oldsax = NULL;
9330 int size;
9331 int ret = 0;
9332
9333 if (depth > 40) {
9334 return(XML_ERR_ENTITY_LOOP);
9335 }
9336
9337
9338 if (list != NULL)
9339 *list = NULL;
9340 if (string == NULL)
9341 return(-1);
9342
9343 size = xmlStrlen(string);
9344
9345 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9346 if (ctxt == NULL) return(-1);
9347 ctxt->userData = ctxt;
9348 if (sax != NULL) {
9349 oldsax = ctxt->sax;
9350 ctxt->sax = sax;
9351 if (user_data != NULL)
9352 ctxt->userData = user_data;
9353 }
9354 newDoc = xmlNewDoc(BAD_CAST "1.0");
9355 if (newDoc == NULL) {
9356 xmlFreeParserCtxt(ctxt);
9357 return(-1);
9358 }
9359 if (doc != NULL) {
9360 newDoc->intSubset = doc->intSubset;
9361 newDoc->extSubset = doc->extSubset;
9362 }
9363 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9364 if (newDoc->children == NULL) {
9365 if (sax != NULL)
9366 ctxt->sax = oldsax;
9367 xmlFreeParserCtxt(ctxt);
9368 newDoc->intSubset = NULL;
9369 newDoc->extSubset = NULL;
9370 xmlFreeDoc(newDoc);
9371 return(-1);
9372 }
9373 nodePush(ctxt, newDoc->children);
9374 if (doc == NULL) {
9375 ctxt->myDoc = newDoc;
9376 } else {
9377 ctxt->myDoc = doc;
9378 newDoc->children->doc = doc;
9379 }
9380 ctxt->instate = XML_PARSER_CONTENT;
9381 ctxt->depth = depth;
9382
9383 /*
9384 * Doing validity checking on chunk doesn't make sense
9385 */
9386 ctxt->validate = 0;
9387 ctxt->loadsubset = 0;
9388
9389 xmlParseContent(ctxt);
9390
9391 if ((RAW == '<') && (NXT(1) == '/')) {
9392 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9394 ctxt->sax->error(ctxt->userData,
9395 "chunk is not well balanced\n");
9396 ctxt->wellFormed = 0;
9397 ctxt->disableSAX = 1;
9398 } else if (RAW != 0) {
9399 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9401 ctxt->sax->error(ctxt->userData,
9402 "extra content at the end of well balanced chunk\n");
9403 ctxt->wellFormed = 0;
9404 ctxt->disableSAX = 1;
9405 }
9406 if (ctxt->node != newDoc->children) {
9407 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9409 ctxt->sax->error(ctxt->userData,
9410 "chunk is not well balanced\n");
9411 ctxt->wellFormed = 0;
9412 ctxt->disableSAX = 1;
9413 }
9414
9415 if (!ctxt->wellFormed) {
9416 if (ctxt->errNo == 0)
9417 ret = 1;
9418 else
9419 ret = ctxt->errNo;
9420 } else {
9421 if (list != NULL) {
9422 xmlNodePtr cur;
9423
9424 /*
9425 * Return the newly created nodeset after unlinking it from
9426 * they pseudo parent.
9427 */
9428 cur = newDoc->children->children;
9429 *list = cur;
9430 while (cur != NULL) {
9431 cur->parent = NULL;
9432 cur = cur->next;
9433 }
9434 newDoc->children->children = NULL;
9435 }
9436 ret = 0;
9437 }
9438 if (sax != NULL)
9439 ctxt->sax = oldsax;
9440 xmlFreeParserCtxt(ctxt);
9441 newDoc->intSubset = NULL;
9442 newDoc->extSubset = NULL;
9443 xmlFreeDoc(newDoc);
9444
9445 return(ret);
9446}
9447
9448/**
9449 * xmlSAXParseEntity:
9450 * @sax: the SAX handler block
9451 * @filename: the filename
9452 *
9453 * parse an XML external entity out of context and build a tree.
9454 * It use the given SAX function block to handle the parsing callback.
9455 * If sax is NULL, fallback to the default DOM tree building routines.
9456 *
9457 * [78] extParsedEnt ::= TextDecl? content
9458 *
9459 * This correspond to a "Well Balanced" chunk
9460 *
9461 * Returns the resulting document tree
9462 */
9463
9464xmlDocPtr
9465xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9466 xmlDocPtr ret;
9467 xmlParserCtxtPtr ctxt;
9468 char *directory = NULL;
9469
9470 ctxt = xmlCreateFileParserCtxt(filename);
9471 if (ctxt == NULL) {
9472 return(NULL);
9473 }
9474 if (sax != NULL) {
9475 if (ctxt->sax != NULL)
9476 xmlFree(ctxt->sax);
9477 ctxt->sax = sax;
9478 ctxt->userData = NULL;
9479 }
9480
9481 if ((ctxt->directory == NULL) && (directory == NULL))
9482 directory = xmlParserGetDirectory(filename);
9483
9484 xmlParseExtParsedEnt(ctxt);
9485
9486 if (ctxt->wellFormed)
9487 ret = ctxt->myDoc;
9488 else {
9489 ret = NULL;
9490 xmlFreeDoc(ctxt->myDoc);
9491 ctxt->myDoc = NULL;
9492 }
9493 if (sax != NULL)
9494 ctxt->sax = NULL;
9495 xmlFreeParserCtxt(ctxt);
9496
9497 return(ret);
9498}
9499
9500/**
9501 * xmlParseEntity:
9502 * @filename: the filename
9503 *
9504 * parse an XML external entity out of context and build a tree.
9505 *
9506 * [78] extParsedEnt ::= TextDecl? content
9507 *
9508 * This correspond to a "Well Balanced" chunk
9509 *
9510 * Returns the resulting document tree
9511 */
9512
9513xmlDocPtr
9514xmlParseEntity(const char *filename) {
9515 return(xmlSAXParseEntity(NULL, filename));
9516}
9517
9518/**
9519 * xmlCreateEntityParserCtxt:
9520 * @URL: the entity URL
9521 * @ID: the entity PUBLIC ID
9522 * @base: a posible base for the target URI
9523 *
9524 * Create a parser context for an external entity
9525 * Automatic support for ZLIB/Compress compressed document is provided
9526 * by default if found at compile-time.
9527 *
9528 * Returns the new parser context or NULL
9529 */
9530xmlParserCtxtPtr
9531xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9532 const xmlChar *base) {
9533 xmlParserCtxtPtr ctxt;
9534 xmlParserInputPtr inputStream;
9535 char *directory = NULL;
9536 xmlChar *uri;
9537
9538 ctxt = xmlNewParserCtxt();
9539 if (ctxt == NULL) {
9540 return(NULL);
9541 }
9542
9543 uri = xmlBuildURI(URL, base);
9544
9545 if (uri == NULL) {
9546 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9547 if (inputStream == NULL) {
9548 xmlFreeParserCtxt(ctxt);
9549 return(NULL);
9550 }
9551
9552 inputPush(ctxt, inputStream);
9553
9554 if ((ctxt->directory == NULL) && (directory == NULL))
9555 directory = xmlParserGetDirectory((char *)URL);
9556 if ((ctxt->directory == NULL) && (directory != NULL))
9557 ctxt->directory = directory;
9558 } else {
9559 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9560 if (inputStream == NULL) {
9561 xmlFree(uri);
9562 xmlFreeParserCtxt(ctxt);
9563 return(NULL);
9564 }
9565
9566 inputPush(ctxt, inputStream);
9567
9568 if ((ctxt->directory == NULL) && (directory == NULL))
9569 directory = xmlParserGetDirectory((char *)uri);
9570 if ((ctxt->directory == NULL) && (directory != NULL))
9571 ctxt->directory = directory;
9572 xmlFree(uri);
9573 }
9574
9575 return(ctxt);
9576}
9577
9578/************************************************************************
9579 * *
9580 * Front ends when parsing from a file *
9581 * *
9582 ************************************************************************/
9583
9584/**
9585 * xmlCreateFileParserCtxt:
9586 * @filename: the filename
9587 *
9588 * Create a parser context for a file content.
9589 * Automatic support for ZLIB/Compress compressed document is provided
9590 * by default if found at compile-time.
9591 *
9592 * Returns the new parser context or NULL
9593 */
9594xmlParserCtxtPtr
9595xmlCreateFileParserCtxt(const char *filename)
9596{
9597 xmlParserCtxtPtr ctxt;
9598 xmlParserInputPtr inputStream;
9599 xmlParserInputBufferPtr buf;
9600 char *directory = NULL;
9601
9602 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9603 if (buf == NULL) {
9604 return(NULL);
9605 }
9606
9607 ctxt = xmlNewParserCtxt();
9608 if (ctxt == NULL) {
9609 if (xmlDefaultSAXHandler.error != NULL) {
9610 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9611 }
9612 return(NULL);
9613 }
9614
9615 inputStream = xmlNewInputStream(ctxt);
9616 if (inputStream == NULL) {
9617 xmlFreeParserCtxt(ctxt);
9618 return(NULL);
9619 }
9620
9621 inputStream->filename = xmlMemStrdup(filename);
9622 inputStream->buf = buf;
9623 inputStream->base = inputStream->buf->buffer->content;
9624 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009625 inputStream->end =
9626 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009627
9628 inputPush(ctxt, inputStream);
9629 if ((ctxt->directory == NULL) && (directory == NULL))
9630 directory = xmlParserGetDirectory(filename);
9631 if ((ctxt->directory == NULL) && (directory != NULL))
9632 ctxt->directory = directory;
9633
9634 return(ctxt);
9635}
9636
9637/**
9638 * xmlSAXParseFile:
9639 * @sax: the SAX handler block
9640 * @filename: the filename
9641 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9642 * documents
9643 *
9644 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9645 * compressed document is provided by default if found at compile-time.
9646 * It use the given SAX function block to handle the parsing callback.
9647 * If sax is NULL, fallback to the default DOM tree building routines.
9648 *
9649 * Returns the resulting document tree
9650 */
9651
9652xmlDocPtr
9653xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9654 int recovery) {
9655 xmlDocPtr ret;
9656 xmlParserCtxtPtr ctxt;
9657 char *directory = NULL;
9658
9659 ctxt = xmlCreateFileParserCtxt(filename);
9660 if (ctxt == NULL) {
9661 return(NULL);
9662 }
9663 if (sax != NULL) {
9664 if (ctxt->sax != NULL)
9665 xmlFree(ctxt->sax);
9666 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009667 }
9668
9669 if ((ctxt->directory == NULL) && (directory == NULL))
9670 directory = xmlParserGetDirectory(filename);
9671 if ((ctxt->directory == NULL) && (directory != NULL))
9672 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9673
9674 xmlParseDocument(ctxt);
9675
9676 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9677 else {
9678 ret = NULL;
9679 xmlFreeDoc(ctxt->myDoc);
9680 ctxt->myDoc = NULL;
9681 }
9682 if (sax != NULL)
9683 ctxt->sax = NULL;
9684 xmlFreeParserCtxt(ctxt);
9685
9686 return(ret);
9687}
9688
9689/**
9690 * xmlRecoverDoc:
9691 * @cur: a pointer to an array of xmlChar
9692 *
9693 * parse an XML in-memory document and build a tree.
9694 * In the case the document is not Well Formed, a tree is built anyway
9695 *
9696 * Returns the resulting document tree
9697 */
9698
9699xmlDocPtr
9700xmlRecoverDoc(xmlChar *cur) {
9701 return(xmlSAXParseDoc(NULL, cur, 1));
9702}
9703
9704/**
9705 * xmlParseFile:
9706 * @filename: the filename
9707 *
9708 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9709 * compressed document is provided by default if found at compile-time.
9710 *
9711 * Returns the resulting document tree
9712 */
9713
9714xmlDocPtr
9715xmlParseFile(const char *filename) {
9716 return(xmlSAXParseFile(NULL, filename, 0));
9717}
9718
9719/**
9720 * xmlRecoverFile:
9721 * @filename: the filename
9722 *
9723 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9724 * compressed document is provided by default if found at compile-time.
9725 * In the case the document is not Well Formed, a tree is built anyway
9726 *
9727 * Returns the resulting document tree
9728 */
9729
9730xmlDocPtr
9731xmlRecoverFile(const char *filename) {
9732 return(xmlSAXParseFile(NULL, filename, 1));
9733}
9734
9735
9736/**
9737 * xmlSetupParserForBuffer:
9738 * @ctxt: an XML parser context
9739 * @buffer: a xmlChar * buffer
9740 * @filename: a file name
9741 *
9742 * Setup the parser context to parse a new buffer; Clears any prior
9743 * contents from the parser context. The buffer parameter must not be
9744 * NULL, but the filename parameter can be
9745 */
9746void
9747xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9748 const char* filename)
9749{
9750 xmlParserInputPtr input;
9751
9752 input = xmlNewInputStream(ctxt);
9753 if (input == NULL) {
9754 perror("malloc");
9755 xmlFree(ctxt);
9756 return;
9757 }
9758
9759 xmlClearParserCtxt(ctxt);
9760 if (filename != NULL)
9761 input->filename = xmlMemStrdup(filename);
9762 input->base = buffer;
9763 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009764 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009765 inputPush(ctxt, input);
9766}
9767
9768/**
9769 * xmlSAXUserParseFile:
9770 * @sax: a SAX handler
9771 * @user_data: The user data returned on SAX callbacks
9772 * @filename: a file name
9773 *
9774 * parse an XML file and call the given SAX handler routines.
9775 * Automatic support for ZLIB/Compress compressed document is provided
9776 *
9777 * Returns 0 in case of success or a error number otherwise
9778 */
9779int
9780xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9781 const char *filename) {
9782 int ret = 0;
9783 xmlParserCtxtPtr ctxt;
9784
9785 ctxt = xmlCreateFileParserCtxt(filename);
9786 if (ctxt == NULL) return -1;
9787 if (ctxt->sax != &xmlDefaultSAXHandler)
9788 xmlFree(ctxt->sax);
9789 ctxt->sax = sax;
9790 if (user_data != NULL)
9791 ctxt->userData = user_data;
9792
9793 xmlParseDocument(ctxt);
9794
9795 if (ctxt->wellFormed)
9796 ret = 0;
9797 else {
9798 if (ctxt->errNo != 0)
9799 ret = ctxt->errNo;
9800 else
9801 ret = -1;
9802 }
9803 if (sax != NULL)
9804 ctxt->sax = NULL;
9805 xmlFreeParserCtxt(ctxt);
9806
9807 return ret;
9808}
9809
9810/************************************************************************
9811 * *
9812 * Front ends when parsing from memory *
9813 * *
9814 ************************************************************************/
9815
9816/**
9817 * xmlCreateMemoryParserCtxt:
9818 * @buffer: a pointer to a char array
9819 * @size: the size of the array
9820 *
9821 * Create a parser context for an XML in-memory document.
9822 *
9823 * Returns the new parser context or NULL
9824 */
9825xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009826xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009827 xmlParserCtxtPtr ctxt;
9828 xmlParserInputPtr input;
9829 xmlParserInputBufferPtr buf;
9830
9831 if (buffer == NULL)
9832 return(NULL);
9833 if (size <= 0)
9834 return(NULL);
9835
9836 ctxt = xmlNewParserCtxt();
9837 if (ctxt == NULL)
9838 return(NULL);
9839
9840 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9841 if (buf == NULL) return(NULL);
9842
9843 input = xmlNewInputStream(ctxt);
9844 if (input == NULL) {
9845 xmlFreeParserCtxt(ctxt);
9846 return(NULL);
9847 }
9848
9849 input->filename = NULL;
9850 input->buf = buf;
9851 input->base = input->buf->buffer->content;
9852 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009853 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009854
9855 inputPush(ctxt, input);
9856 return(ctxt);
9857}
9858
9859/**
9860 * xmlSAXParseMemory:
9861 * @sax: the SAX handler block
9862 * @buffer: an pointer to a char array
9863 * @size: the size of the array
9864 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9865 * documents
9866 *
9867 * parse an XML in-memory block and use the given SAX function block
9868 * to handle the parsing callback. If sax is NULL, fallback to the default
9869 * DOM tree building routines.
9870 *
9871 * Returns the resulting document tree
9872 */
9873xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +00009874xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
9875 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +00009876 xmlDocPtr ret;
9877 xmlParserCtxtPtr ctxt;
9878
9879 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9880 if (ctxt == NULL) return(NULL);
9881 if (sax != NULL) {
9882 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009883 }
9884
9885 xmlParseDocument(ctxt);
9886
9887 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9888 else {
9889 ret = NULL;
9890 xmlFreeDoc(ctxt->myDoc);
9891 ctxt->myDoc = NULL;
9892 }
9893 if (sax != NULL)
9894 ctxt->sax = NULL;
9895 xmlFreeParserCtxt(ctxt);
9896
9897 return(ret);
9898}
9899
9900/**
9901 * xmlParseMemory:
9902 * @buffer: an pointer to a char array
9903 * @size: the size of the array
9904 *
9905 * parse an XML in-memory block and build a tree.
9906 *
9907 * Returns the resulting document tree
9908 */
9909
Daniel Veillard50822cb2001-07-26 20:05:51 +00009910xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009911 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9912}
9913
9914/**
9915 * xmlRecoverMemory:
9916 * @buffer: an pointer to a char array
9917 * @size: the size of the array
9918 *
9919 * parse an XML in-memory block and build a tree.
9920 * In the case the document is not Well Formed, a tree is built anyway
9921 *
9922 * Returns the resulting document tree
9923 */
9924
Daniel Veillard50822cb2001-07-26 20:05:51 +00009925xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009926 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9927}
9928
9929/**
9930 * xmlSAXUserParseMemory:
9931 * @sax: a SAX handler
9932 * @user_data: The user data returned on SAX callbacks
9933 * @buffer: an in-memory XML document input
9934 * @size: the length of the XML document in bytes
9935 *
9936 * A better SAX parsing routine.
9937 * parse an XML in-memory buffer and call the given SAX handler routines.
9938 *
9939 * Returns 0 in case of success or a error number otherwise
9940 */
9941int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009942 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009943 int ret = 0;
9944 xmlParserCtxtPtr ctxt;
9945 xmlSAXHandlerPtr oldsax = NULL;
9946
9947 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9948 if (ctxt == NULL) return -1;
9949 if (sax != NULL) {
9950 oldsax = ctxt->sax;
9951 ctxt->sax = sax;
9952 }
Daniel Veillard30211a02001-04-26 09:33:18 +00009953 if (user_data != NULL)
9954 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00009955
9956 xmlParseDocument(ctxt);
9957
9958 if (ctxt->wellFormed)
9959 ret = 0;
9960 else {
9961 if (ctxt->errNo != 0)
9962 ret = ctxt->errNo;
9963 else
9964 ret = -1;
9965 }
9966 if (sax != NULL) {
9967 ctxt->sax = oldsax;
9968 }
9969 xmlFreeParserCtxt(ctxt);
9970
9971 return ret;
9972}
9973
9974/**
9975 * xmlCreateDocParserCtxt:
9976 * @cur: a pointer to an array of xmlChar
9977 *
9978 * Creates a parser context for an XML in-memory document.
9979 *
9980 * Returns the new parser context or NULL
9981 */
9982xmlParserCtxtPtr
9983xmlCreateDocParserCtxt(xmlChar *cur) {
9984 int len;
9985
9986 if (cur == NULL)
9987 return(NULL);
9988 len = xmlStrlen(cur);
9989 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9990}
9991
9992/**
9993 * xmlSAXParseDoc:
9994 * @sax: the SAX handler block
9995 * @cur: a pointer to an array of xmlChar
9996 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9997 * documents
9998 *
9999 * parse an XML in-memory document and build a tree.
10000 * It use the given SAX function block to handle the parsing callback.
10001 * If sax is NULL, fallback to the default DOM tree building routines.
10002 *
10003 * Returns the resulting document tree
10004 */
10005
10006xmlDocPtr
10007xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10008 xmlDocPtr ret;
10009 xmlParserCtxtPtr ctxt;
10010
10011 if (cur == NULL) return(NULL);
10012
10013
10014 ctxt = xmlCreateDocParserCtxt(cur);
10015 if (ctxt == NULL) return(NULL);
10016 if (sax != NULL) {
10017 ctxt->sax = sax;
10018 ctxt->userData = NULL;
10019 }
10020
10021 xmlParseDocument(ctxt);
10022 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10023 else {
10024 ret = NULL;
10025 xmlFreeDoc(ctxt->myDoc);
10026 ctxt->myDoc = NULL;
10027 }
10028 if (sax != NULL)
10029 ctxt->sax = NULL;
10030 xmlFreeParserCtxt(ctxt);
10031
10032 return(ret);
10033}
10034
10035/**
10036 * xmlParseDoc:
10037 * @cur: a pointer to an array of xmlChar
10038 *
10039 * parse an XML in-memory document and build a tree.
10040 *
10041 * Returns the resulting document tree
10042 */
10043
10044xmlDocPtr
10045xmlParseDoc(xmlChar *cur) {
10046 return(xmlSAXParseDoc(NULL, cur, 0));
10047}
10048
10049
10050/************************************************************************
10051 * *
10052 * Miscellaneous *
10053 * *
10054 ************************************************************************/
10055
10056#ifdef LIBXML_XPATH_ENABLED
10057#include <libxml/xpath.h>
10058#endif
10059
10060static int xmlParserInitialized = 0;
10061
10062/**
10063 * xmlInitParser:
10064 *
10065 * Initialization function for the XML parser.
10066 * This is not reentrant. Call once before processing in case of
10067 * use in multithreaded programs.
10068 */
10069
10070void
10071xmlInitParser(void) {
10072 if (xmlParserInitialized) return;
10073
10074 xmlInitCharEncodingHandlers();
10075 xmlInitializePredefinedEntities();
10076 xmlDefaultSAXHandlerInit();
10077 xmlRegisterDefaultInputCallbacks();
10078 xmlRegisterDefaultOutputCallbacks();
10079#ifdef LIBXML_HTML_ENABLED
10080 htmlInitAutoClose();
10081 htmlDefaultSAXHandlerInit();
10082#endif
10083#ifdef LIBXML_XPATH_ENABLED
10084 xmlXPathInit();
10085#endif
10086 xmlParserInitialized = 1;
10087}
10088
10089/**
10090 * xmlCleanupParser:
10091 *
10092 * Cleanup function for the XML parser. It tries to reclaim all
10093 * parsing related global memory allocated for the parser processing.
10094 * It doesn't deallocate any document related memory. Calling this
10095 * function should not prevent reusing the parser.
10096 */
10097
10098void
10099xmlCleanupParser(void) {
10100 xmlParserInitialized = 0;
10101 xmlCleanupCharEncodingHandlers();
10102 xmlCleanupPredefinedEntities();
10103}
10104