blob: 5e11c3ee03e9eec6e8e037e14fa6a8b3709a5815 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillard5d96fff2001-08-31 14:55:30 +000025 * from the SAX callbacks or as standalones functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Owen Taylor3473f882001-02-23 17:55:21 +000035#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000044#include <libxml/threads.h>
45#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/tree.h>
47#include <libxml/parser.h>
48#include <libxml/parserInternals.h>
49#include <libxml/valid.h>
50#include <libxml/entities.h>
51#include <libxml/xmlerror.h>
52#include <libxml/encoding.h>
53#include <libxml/xmlIO.h>
54#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000055#ifdef LIBXML_CATALOG_ENABLED
56#include <libxml/catalog.h>
57#endif
Owen Taylor3473f882001-02-23 17:55:21 +000058
59#ifdef HAVE_CTYPE_H
60#include <ctype.h>
61#endif
62#ifdef HAVE_STDLIB_H
63#include <stdlib.h>
64#endif
65#ifdef HAVE_SYS_STAT_H
66#include <sys/stat.h>
67#endif
68#ifdef HAVE_FCNTL_H
69#include <fcntl.h>
70#endif
71#ifdef HAVE_UNISTD_H
72#include <unistd.h>
73#endif
74#ifdef HAVE_ZLIB_H
75#include <zlib.h>
76#endif
77
78
Daniel Veillard21a0f912001-02-25 19:54:14 +000079#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000080#define XML_PARSER_BUFFER_SIZE 100
81
82/*
Owen Taylor3473f882001-02-23 17:55:21 +000083 * List of XML prefixed PI allowed by W3C specs
84 */
85
Daniel Veillardb44025c2001-10-11 22:55:55 +000086static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000087 "xml-stylesheet",
88 NULL
89};
90
91/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
92void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
93xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
94 const xmlChar **str);
95
Daniel Veillard257d9102001-05-08 10:41:44 +000096static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000097xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
98 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000099 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000100 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000101
102/************************************************************************
103 * *
104 * Parser stacks related functions and macros *
105 * *
106 ************************************************************************/
107
108xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
109 const xmlChar ** str);
110
111/*
112 * Generic function for accessing stacks in the Parser Context
113 */
114
115#define PUSH_AND_POP(scope, type, name) \
116scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
117 if (ctxt->name##Nr >= ctxt->name##Max) { \
118 ctxt->name##Max *= 2; \
119 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
120 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
121 if (ctxt->name##Tab == NULL) { \
122 xmlGenericError(xmlGenericErrorContext, \
123 "realloc failed !\n"); \
124 return(0); \
125 } \
126 } \
127 ctxt->name##Tab[ctxt->name##Nr] = value; \
128 ctxt->name = value; \
129 return(ctxt->name##Nr++); \
130} \
131scope type name##Pop(xmlParserCtxtPtr ctxt) { \
132 type ret; \
133 if (ctxt->name##Nr <= 0) return(0); \
134 ctxt->name##Nr--; \
135 if (ctxt->name##Nr > 0) \
136 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
137 else \
138 ctxt->name = NULL; \
139 ret = ctxt->name##Tab[ctxt->name##Nr]; \
140 ctxt->name##Tab[ctxt->name##Nr] = 0; \
141 return(ret); \
142} \
143
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000144/**
145 * inputPop:
146 * @ctxt: an XML parser context
147 *
148 * Pops the top parser input from the input stack
149 *
150 * Returns the input just removed
151 */
152/**
153 * inputPush:
154 * @ctxt: an XML parser context
155 * @input: the parser input
156 *
157 * Pushes a new parser input on top of the input stack
158 */
159/**
160 * namePop:
161 * @ctxt: an XML parser context
162 *
163 * Pops the top element name from the name stack
164 *
165 * Returns the name just removed
166 */
167/**
168 * namePush:
169 * @ctxt: an XML parser context
170 * @name: the element name
171 *
172 * Pushes a new element name on top of the name stack
173 */
174/**
175 * nodePop:
176 * @ctxt: an XML parser context
177 *
178 * Pops the top element node from the node stack
179 *
180 * Returns the node just removed
181 */
182/**
183 * nodePush:
184 * @ctxt: an XML parser context
185 * @node: the element node
186 *
187 * Pushes a new element node on top of the node stack
188 */
Owen Taylor3473f882001-02-23 17:55:21 +0000189/*
190 * Those macros actually generate the functions
191 */
192PUSH_AND_POP(extern, xmlParserInputPtr, input)
193PUSH_AND_POP(extern, xmlNodePtr, node)
194PUSH_AND_POP(extern, xmlChar*, name)
195
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000196static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000197 if (ctxt->spaceNr >= ctxt->spaceMax) {
198 ctxt->spaceMax *= 2;
199 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
200 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
201 if (ctxt->spaceTab == NULL) {
202 xmlGenericError(xmlGenericErrorContext,
203 "realloc failed !\n");
204 return(0);
205 }
206 }
207 ctxt->spaceTab[ctxt->spaceNr] = val;
208 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
209 return(ctxt->spaceNr++);
210}
211
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000212static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000213 int ret;
214 if (ctxt->spaceNr <= 0) return(0);
215 ctxt->spaceNr--;
216 if (ctxt->spaceNr > 0)
217 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
218 else
219 ctxt->space = NULL;
220 ret = ctxt->spaceTab[ctxt->spaceNr];
221 ctxt->spaceTab[ctxt->spaceNr] = -1;
222 return(ret);
223}
224
225/*
226 * Macros for accessing the content. Those should be used only by the parser,
227 * and not exported.
228 *
229 * Dirty macros, i.e. one often need to make assumption on the context to
230 * use them
231 *
232 * CUR_PTR return the current pointer to the xmlChar to be parsed.
233 * To be used with extreme caution since operations consuming
234 * characters may move the input buffer to a different location !
235 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
236 * This should be used internally by the parser
237 * only to compare to ASCII values otherwise it would break when
238 * running with UTF-8 encoding.
239 * RAW same as CUR but in the input buffer, bypass any token
240 * extraction that may have been done
241 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
242 * to compare on ASCII based substring.
243 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
244 * strings within the parser.
245 *
246 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
247 *
248 * NEXT Skip to the next character, this does the proper decoding
249 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
250 * NEXTL(l) Skip l xmlChars in the input buffer
251 * CUR_CHAR(l) returns the current unicode character (int), set l
252 * to the number of xmlChars used for the encoding [0-5].
253 * CUR_SCHAR same but operate on a string instead of the context
254 * COPY_BUF copy the current unicode char to the target buffer, increment
255 * the index
256 * GROW, SHRINK handling of input buffers
257 */
258
259#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
260#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
261#define NXT(val) ctxt->input->cur[(val)]
262#define CUR_PTR ctxt->input->cur
263
264#define SKIP(val) do { \
265 ctxt->nbChars += (val),ctxt->input->cur += (val); \
266 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000267 if ((*ctxt->input->cur == 0) && \
268 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
269 xmlPopInput(ctxt); \
270 } while (0)
271
Daniel Veillard48b2f892001-02-25 16:11:03 +0000272#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000273 xmlParserInputShrink(ctxt->input); \
274 if ((*ctxt->input->cur == 0) && \
275 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
276 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000277 }
Owen Taylor3473f882001-02-23 17:55:21 +0000278
Daniel Veillard48b2f892001-02-25 16:11:03 +0000279#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000280 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
281 if ((*ctxt->input->cur == 0) && \
282 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
283 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000284 }
Owen Taylor3473f882001-02-23 17:55:21 +0000285
286#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
287
288#define NEXT xmlNextChar(ctxt)
289
Daniel Veillard21a0f912001-02-25 19:54:14 +0000290#define NEXT1 { \
291 ctxt->input->cur++; \
292 ctxt->nbChars++; \
293 if (*ctxt->input->cur == 0) \
294 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
295 }
296
Owen Taylor3473f882001-02-23 17:55:21 +0000297#define NEXTL(l) do { \
298 if (*(ctxt->input->cur) == '\n') { \
299 ctxt->input->line++; ctxt->input->col = 1; \
300 } else ctxt->input->col++; \
301 ctxt->token = 0; ctxt->input->cur += l; \
302 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000303 } while (0)
304
305#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
306#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
307
308#define COPY_BUF(l,b,i,v) \
309 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000311
312/**
313 * xmlSkipBlankChars:
314 * @ctxt: the XML parser context
315 *
316 * skip all blanks character found at that point in the input streams.
317 * It pops up finished entities in the process if allowable at that point.
318 *
319 * Returns the number of space chars skipped
320 */
321
322int
323xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000324 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000325
Daniel Veillard02141ea2001-04-30 11:46:40 +0000326 if (ctxt->token != 0) {
327 if (!IS_BLANK(ctxt->token))
328 return(0);
329 ctxt->token = 0;
330 res++;
331 }
Owen Taylor3473f882001-02-23 17:55:21 +0000332 /*
333 * It's Okay to use CUR/NEXT here since all the blanks are on
334 * the ASCII range.
335 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000336 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
337 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000338 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000339 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000340 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000341 cur = ctxt->input->cur;
342 while (IS_BLANK(*cur)) {
343 if (*cur == '\n') {
344 ctxt->input->line++; ctxt->input->col = 1;
345 }
346 cur++;
347 res++;
348 if (*cur == 0) {
349 ctxt->input->cur = cur;
350 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
351 cur = ctxt->input->cur;
352 }
353 }
354 ctxt->input->cur = cur;
355 } else {
356 int cur;
357 do {
358 cur = CUR;
359 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
360 NEXT;
361 cur = CUR;
362 res++;
363 }
364 while ((cur == 0) && (ctxt->inputNr > 1) &&
365 (ctxt->instate != XML_PARSER_COMMENT)) {
366 xmlPopInput(ctxt);
367 cur = CUR;
368 }
369 /*
370 * Need to handle support of entities branching here
371 */
372 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
373 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
374 }
Owen Taylor3473f882001-02-23 17:55:21 +0000375 return(res);
376}
377
378/************************************************************************
379 * *
380 * Commodity functions to handle entities *
381 * *
382 ************************************************************************/
383
384/**
385 * xmlPopInput:
386 * @ctxt: an XML parser context
387 *
388 * xmlPopInput: the current input pointed by ctxt->input came to an end
389 * pop it and return the next char.
390 *
391 * Returns the current xmlChar in the parser context
392 */
393xmlChar
394xmlPopInput(xmlParserCtxtPtr ctxt) {
395 if (ctxt->inputNr == 1) return(0); /* End of main Input */
396 if (xmlParserDebugEntities)
397 xmlGenericError(xmlGenericErrorContext,
398 "Popping input %d\n", ctxt->inputNr);
399 xmlFreeInputStream(inputPop(ctxt));
400 if ((*ctxt->input->cur == 0) &&
401 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
402 return(xmlPopInput(ctxt));
403 return(CUR);
404}
405
406/**
407 * xmlPushInput:
408 * @ctxt: an XML parser context
409 * @input: an XML parser input fragment (entity, XML fragment ...).
410 *
411 * xmlPushInput: switch to a new input stream which is stacked on top
412 * of the previous one(s).
413 */
414void
415xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
416 if (input == NULL) return;
417
418 if (xmlParserDebugEntities) {
419 if ((ctxt->input != NULL) && (ctxt->input->filename))
420 xmlGenericError(xmlGenericErrorContext,
421 "%s(%d): ", ctxt->input->filename,
422 ctxt->input->line);
423 xmlGenericError(xmlGenericErrorContext,
424 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
425 }
426 inputPush(ctxt, input);
427 GROW;
428}
429
430/**
431 * xmlParseCharRef:
432 * @ctxt: an XML parser context
433 *
434 * parse Reference declarations
435 *
436 * [66] CharRef ::= '&#' [0-9]+ ';' |
437 * '&#x' [0-9a-fA-F]+ ';'
438 *
439 * [ WFC: Legal Character ]
440 * Characters referred to using character references must match the
441 * production for Char.
442 *
443 * Returns the value parsed (as an int), 0 in case of error
444 */
445int
446xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000447 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000448 int count = 0;
449
450 if (ctxt->token != 0) {
451 val = ctxt->token;
452 ctxt->token = 0;
453 return(val);
454 }
455 /*
456 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
457 */
458 if ((RAW == '&') && (NXT(1) == '#') &&
459 (NXT(2) == 'x')) {
460 SKIP(3);
461 GROW;
462 while (RAW != ';') { /* loop blocked by count */
463 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
464 val = val * 16 + (CUR - '0');
465 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
466 val = val * 16 + (CUR - 'a') + 10;
467 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
468 val = val * 16 + (CUR - 'A') + 10;
469 else {
470 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
472 ctxt->sax->error(ctxt->userData,
473 "xmlParseCharRef: invalid hexadecimal value\n");
474 ctxt->wellFormed = 0;
475 ctxt->disableSAX = 1;
476 val = 0;
477 break;
478 }
479 NEXT;
480 count++;
481 }
482 if (RAW == ';') {
483 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
484 ctxt->nbChars ++;
485 ctxt->input->cur++;
486 }
487 } else if ((RAW == '&') && (NXT(1) == '#')) {
488 SKIP(2);
489 GROW;
490 while (RAW != ';') { /* loop blocked by count */
491 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
492 val = val * 10 + (CUR - '0');
493 else {
494 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
495 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
496 ctxt->sax->error(ctxt->userData,
497 "xmlParseCharRef: invalid decimal value\n");
498 ctxt->wellFormed = 0;
499 ctxt->disableSAX = 1;
500 val = 0;
501 break;
502 }
503 NEXT;
504 count++;
505 }
506 if (RAW == ';') {
507 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
508 ctxt->nbChars ++;
509 ctxt->input->cur++;
510 }
511 } else {
512 ctxt->errNo = XML_ERR_INVALID_CHARREF;
513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
514 ctxt->sax->error(ctxt->userData,
515 "xmlParseCharRef: invalid value\n");
516 ctxt->wellFormed = 0;
517 ctxt->disableSAX = 1;
518 }
519
520 /*
521 * [ WFC: Legal Character ]
522 * Characters referred to using character references must match the
523 * production for Char.
524 */
525 if (IS_CHAR(val)) {
526 return(val);
527 } else {
528 ctxt->errNo = XML_ERR_INVALID_CHAR;
529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
530 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
531 val);
532 ctxt->wellFormed = 0;
533 ctxt->disableSAX = 1;
534 }
535 return(0);
536}
537
538/**
539 * xmlParseStringCharRef:
540 * @ctxt: an XML parser context
541 * @str: a pointer to an index in the string
542 *
543 * parse Reference declarations, variant parsing from a string rather
544 * than an an input flow.
545 *
546 * [66] CharRef ::= '&#' [0-9]+ ';' |
547 * '&#x' [0-9a-fA-F]+ ';'
548 *
549 * [ WFC: Legal Character ]
550 * Characters referred to using character references must match the
551 * production for Char.
552 *
553 * Returns the value parsed (as an int), 0 in case of error, str will be
554 * updated to the current value of the index
555 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000556static int
Owen Taylor3473f882001-02-23 17:55:21 +0000557xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
558 const xmlChar *ptr;
559 xmlChar cur;
560 int val = 0;
561
562 if ((str == NULL) || (*str == NULL)) return(0);
563 ptr = *str;
564 cur = *ptr;
565 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
566 ptr += 3;
567 cur = *ptr;
568 while (cur != ';') { /* Non input consuming loop */
569 if ((cur >= '0') && (cur <= '9'))
570 val = val * 16 + (cur - '0');
571 else if ((cur >= 'a') && (cur <= 'f'))
572 val = val * 16 + (cur - 'a') + 10;
573 else if ((cur >= 'A') && (cur <= 'F'))
574 val = val * 16 + (cur - 'A') + 10;
575 else {
576 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
578 ctxt->sax->error(ctxt->userData,
579 "xmlParseStringCharRef: invalid hexadecimal value\n");
580 ctxt->wellFormed = 0;
581 ctxt->disableSAX = 1;
582 val = 0;
583 break;
584 }
585 ptr++;
586 cur = *ptr;
587 }
588 if (cur == ';')
589 ptr++;
590 } else if ((cur == '&') && (ptr[1] == '#')){
591 ptr += 2;
592 cur = *ptr;
593 while (cur != ';') { /* Non input consuming loops */
594 if ((cur >= '0') && (cur <= '9'))
595 val = val * 10 + (cur - '0');
596 else {
597 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
599 ctxt->sax->error(ctxt->userData,
600 "xmlParseStringCharRef: invalid decimal value\n");
601 ctxt->wellFormed = 0;
602 ctxt->disableSAX = 1;
603 val = 0;
604 break;
605 }
606 ptr++;
607 cur = *ptr;
608 }
609 if (cur == ';')
610 ptr++;
611 } else {
612 ctxt->errNo = XML_ERR_INVALID_CHARREF;
613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
614 ctxt->sax->error(ctxt->userData,
615 "xmlParseCharRef: invalid value\n");
616 ctxt->wellFormed = 0;
617 ctxt->disableSAX = 1;
618 return(0);
619 }
620 *str = ptr;
621
622 /*
623 * [ WFC: Legal Character ]
624 * Characters referred to using character references must match the
625 * production for Char.
626 */
627 if (IS_CHAR(val)) {
628 return(val);
629 } else {
630 ctxt->errNo = XML_ERR_INVALID_CHAR;
631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
632 ctxt->sax->error(ctxt->userData,
633 "CharRef: invalid xmlChar value %d\n", val);
634 ctxt->wellFormed = 0;
635 ctxt->disableSAX = 1;
636 }
637 return(0);
638}
639
640/**
641 * xmlParserHandlePEReference:
642 * @ctxt: the parser context
643 *
644 * [69] PEReference ::= '%' Name ';'
645 *
646 * [ WFC: No Recursion ]
647 * A parsed entity must not contain a recursive
648 * reference to itself, either directly or indirectly.
649 *
650 * [ WFC: Entity Declared ]
651 * In a document without any DTD, a document with only an internal DTD
652 * subset which contains no parameter entity references, or a document
653 * with "standalone='yes'", ... ... The declaration of a parameter
654 * entity must precede any reference to it...
655 *
656 * [ VC: Entity Declared ]
657 * In a document with an external subset or external parameter entities
658 * with "standalone='no'", ... ... The declaration of a parameter entity
659 * must precede any reference to it...
660 *
661 * [ WFC: In DTD ]
662 * Parameter-entity references may only appear in the DTD.
663 * NOTE: misleading but this is handled.
664 *
665 * A PEReference may have been detected in the current input stream
666 * the handling is done accordingly to
667 * http://www.w3.org/TR/REC-xml#entproc
668 * i.e.
669 * - Included in literal in entity values
670 * - Included as Paraemeter Entity reference within DTDs
671 */
672void
673xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
674 xmlChar *name;
675 xmlEntityPtr entity = NULL;
676 xmlParserInputPtr input;
677
678 if (ctxt->token != 0) {
679 return;
680 }
681 if (RAW != '%') return;
682 switch(ctxt->instate) {
683 case XML_PARSER_CDATA_SECTION:
684 return;
685 case XML_PARSER_COMMENT:
686 return;
687 case XML_PARSER_START_TAG:
688 return;
689 case XML_PARSER_END_TAG:
690 return;
691 case XML_PARSER_EOF:
692 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
694 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
695 ctxt->wellFormed = 0;
696 ctxt->disableSAX = 1;
697 return;
698 case XML_PARSER_PROLOG:
699 case XML_PARSER_START:
700 case XML_PARSER_MISC:
701 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
703 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
704 ctxt->wellFormed = 0;
705 ctxt->disableSAX = 1;
706 return;
707 case XML_PARSER_ENTITY_DECL:
708 case XML_PARSER_CONTENT:
709 case XML_PARSER_ATTRIBUTE_VALUE:
710 case XML_PARSER_PI:
711 case XML_PARSER_SYSTEM_LITERAL:
712 /* we just ignore it there */
713 return;
714 case XML_PARSER_EPILOG:
715 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
717 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
718 ctxt->wellFormed = 0;
719 ctxt->disableSAX = 1;
720 return;
721 case XML_PARSER_ENTITY_VALUE:
722 /*
723 * NOTE: in the case of entity values, we don't do the
724 * substitution here since we need the literal
725 * entity value to be able to save the internal
726 * subset of the document.
727 * This will be handled by xmlStringDecodeEntities
728 */
729 return;
730 case XML_PARSER_DTD:
731 /*
732 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
733 * In the internal DTD subset, parameter-entity references
734 * can occur only where markup declarations can occur, not
735 * within markup declarations.
736 * In that case this is handled in xmlParseMarkupDecl
737 */
738 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
739 return;
740 break;
741 case XML_PARSER_IGNORE:
742 return;
743 }
744
745 NEXT;
746 name = xmlParseName(ctxt);
747 if (xmlParserDebugEntities)
748 xmlGenericError(xmlGenericErrorContext,
749 "PE Reference: %s\n", name);
750 if (name == NULL) {
751 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
753 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
754 ctxt->wellFormed = 0;
755 ctxt->disableSAX = 1;
756 } else {
757 if (RAW == ';') {
758 NEXT;
759 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
760 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
761 if (entity == NULL) {
762
763 /*
764 * [ WFC: Entity Declared ]
765 * In a document without any DTD, a document with only an
766 * internal DTD subset which contains no parameter entity
767 * references, or a document with "standalone='yes'", ...
768 * ... The declaration of a parameter entity must precede
769 * any reference to it...
770 */
771 if ((ctxt->standalone == 1) ||
772 ((ctxt->hasExternalSubset == 0) &&
773 (ctxt->hasPErefs == 0))) {
774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
775 ctxt->sax->error(ctxt->userData,
776 "PEReference: %%%s; not found\n", name);
777 ctxt->wellFormed = 0;
778 ctxt->disableSAX = 1;
779 } else {
780 /*
781 * [ VC: Entity Declared ]
782 * In a document with an external subset or external
783 * parameter entities with "standalone='no'", ...
784 * ... The declaration of a parameter entity must precede
785 * any reference to it...
786 */
787 if ((!ctxt->disableSAX) &&
788 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
789 ctxt->vctxt.error(ctxt->vctxt.userData,
790 "PEReference: %%%s; not found\n", name);
791 } else if ((!ctxt->disableSAX) &&
792 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
793 ctxt->sax->warning(ctxt->userData,
794 "PEReference: %%%s; not found\n", name);
795 ctxt->valid = 0;
796 }
797 } else {
798 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
799 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000800 xmlChar start[4];
801 xmlCharEncoding enc;
802
Owen Taylor3473f882001-02-23 17:55:21 +0000803 /*
804 * handle the extra spaces added before and after
805 * c.f. http://www.w3.org/TR/REC-xml#as-PE
806 * this is done independantly.
807 */
808 input = xmlNewEntityInputStream(ctxt, entity);
809 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000810
811 /*
812 * Get the 4 first bytes and decode the charset
813 * if enc != XML_CHAR_ENCODING_NONE
814 * plug some encoding conversion routines.
815 */
816 GROW
817 start[0] = RAW;
818 start[1] = NXT(1);
819 start[2] = NXT(2);
820 start[3] = NXT(3);
821 enc = xmlDetectCharEncoding(start, 4);
822 if (enc != XML_CHAR_ENCODING_NONE) {
823 xmlSwitchEncoding(ctxt, enc);
824 }
825
Owen Taylor3473f882001-02-23 17:55:21 +0000826 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
827 (RAW == '<') && (NXT(1) == '?') &&
828 (NXT(2) == 'x') && (NXT(3) == 'm') &&
829 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
830 xmlParseTextDecl(ctxt);
831 }
832 if (ctxt->token == 0)
833 ctxt->token = ' ';
834 } else {
835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
836 ctxt->sax->error(ctxt->userData,
837 "xmlHandlePEReference: %s is not a parameter entity\n",
838 name);
839 ctxt->wellFormed = 0;
840 ctxt->disableSAX = 1;
841 }
842 }
843 } else {
844 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
846 ctxt->sax->error(ctxt->userData,
847 "xmlHandlePEReference: expecting ';'\n");
848 ctxt->wellFormed = 0;
849 ctxt->disableSAX = 1;
850 }
851 xmlFree(name);
852 }
853}
854
855/*
856 * Macro used to grow the current buffer.
857 */
858#define growBuffer(buffer) { \
859 buffer##_size *= 2; \
860 buffer = (xmlChar *) \
861 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
862 if (buffer == NULL) { \
863 perror("realloc failed"); \
864 return(NULL); \
865 } \
866}
867
868/**
869 * xmlStringDecodeEntities:
870 * @ctxt: the parser context
871 * @str: the input string
872 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
873 * @end: an end marker xmlChar, 0 if none
874 * @end2: an end marker xmlChar, 0 if none
875 * @end3: an end marker xmlChar, 0 if none
876 *
877 * Takes a entity string content and process to do the adequate subtitutions.
878 *
879 * [67] Reference ::= EntityRef | CharRef
880 *
881 * [69] PEReference ::= '%' Name ';'
882 *
883 * Returns A newly allocated string with the substitution done. The caller
884 * must deallocate it !
885 */
886xmlChar *
887xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
888 xmlChar end, xmlChar end2, xmlChar end3) {
889 xmlChar *buffer = NULL;
890 int buffer_size = 0;
891
892 xmlChar *current = NULL;
893 xmlEntityPtr ent;
894 int c,l;
895 int nbchars = 0;
896
897 if (str == NULL)
898 return(NULL);
899
900 if (ctxt->depth > 40) {
901 ctxt->errNo = XML_ERR_ENTITY_LOOP;
902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
903 ctxt->sax->error(ctxt->userData,
904 "Detected entity reference loop\n");
905 ctxt->wellFormed = 0;
906 ctxt->disableSAX = 1;
907 return(NULL);
908 }
909
910 /*
911 * allocate a translation buffer.
912 */
913 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
914 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
915 if (buffer == NULL) {
916 perror("xmlDecodeEntities: malloc failed");
917 return(NULL);
918 }
919
920 /*
921 * Ok loop until we reach one of the ending char or a size limit.
922 * we are operating on already parsed values.
923 */
924 c = CUR_SCHAR(str, l);
925 while ((c != 0) && (c != end) && /* non input consuming loop */
926 (c != end2) && (c != end3)) {
927
928 if (c == 0) break;
929 if ((c == '&') && (str[1] == '#')) {
930 int val = xmlParseStringCharRef(ctxt, &str);
931 if (val != 0) {
932 COPY_BUF(0,buffer,nbchars,val);
933 }
934 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
935 if (xmlParserDebugEntities)
936 xmlGenericError(xmlGenericErrorContext,
937 "String decoding Entity Reference: %.30s\n",
938 str);
939 ent = xmlParseStringEntityRef(ctxt, &str);
940 if ((ent != NULL) &&
941 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
942 if (ent->content != NULL) {
943 COPY_BUF(0,buffer,nbchars,ent->content[0]);
944 } else {
945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
946 ctxt->sax->error(ctxt->userData,
947 "internal error entity has no content\n");
948 }
949 } else if ((ent != NULL) && (ent->content != NULL)) {
950 xmlChar *rep;
951
952 ctxt->depth++;
953 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
954 0, 0, 0);
955 ctxt->depth--;
956 if (rep != NULL) {
957 current = rep;
958 while (*current != 0) { /* non input consuming loop */
959 buffer[nbchars++] = *current++;
960 if (nbchars >
961 buffer_size - XML_PARSER_BUFFER_SIZE) {
962 growBuffer(buffer);
963 }
964 }
965 xmlFree(rep);
966 }
967 } else if (ent != NULL) {
968 int i = xmlStrlen(ent->name);
969 const xmlChar *cur = ent->name;
970
971 buffer[nbchars++] = '&';
972 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
973 growBuffer(buffer);
974 }
975 for (;i > 0;i--)
976 buffer[nbchars++] = *cur++;
977 buffer[nbchars++] = ';';
978 }
979 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
980 if (xmlParserDebugEntities)
981 xmlGenericError(xmlGenericErrorContext,
982 "String decoding PE Reference: %.30s\n", str);
983 ent = xmlParseStringPEReference(ctxt, &str);
984 if (ent != NULL) {
985 xmlChar *rep;
986
987 ctxt->depth++;
988 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
989 0, 0, 0);
990 ctxt->depth--;
991 if (rep != NULL) {
992 current = rep;
993 while (*current != 0) { /* non input consuming loop */
994 buffer[nbchars++] = *current++;
995 if (nbchars >
996 buffer_size - XML_PARSER_BUFFER_SIZE) {
997 growBuffer(buffer);
998 }
999 }
1000 xmlFree(rep);
1001 }
1002 }
1003 } else {
1004 COPY_BUF(l,buffer,nbchars,c);
1005 str += l;
1006 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1007 growBuffer(buffer);
1008 }
1009 }
1010 c = CUR_SCHAR(str, l);
1011 }
1012 buffer[nbchars++] = 0;
1013 return(buffer);
1014}
1015
1016
1017/************************************************************************
1018 * *
1019 * Commodity functions to handle xmlChars *
1020 * *
1021 ************************************************************************/
1022
1023/**
1024 * xmlStrndup:
1025 * @cur: the input xmlChar *
1026 * @len: the len of @cur
1027 *
1028 * a strndup for array of xmlChar's
1029 *
1030 * Returns a new xmlChar * or NULL
1031 */
1032xmlChar *
1033xmlStrndup(const xmlChar *cur, int len) {
1034 xmlChar *ret;
1035
1036 if ((cur == NULL) || (len < 0)) return(NULL);
1037 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1038 if (ret == NULL) {
1039 xmlGenericError(xmlGenericErrorContext,
1040 "malloc of %ld byte failed\n",
1041 (len + 1) * (long)sizeof(xmlChar));
1042 return(NULL);
1043 }
1044 memcpy(ret, cur, len * sizeof(xmlChar));
1045 ret[len] = 0;
1046 return(ret);
1047}
1048
1049/**
1050 * xmlStrdup:
1051 * @cur: the input xmlChar *
1052 *
1053 * a strdup for array of xmlChar's. Since they are supposed to be
1054 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1055 * a termination mark of '0'.
1056 *
1057 * Returns a new xmlChar * or NULL
1058 */
1059xmlChar *
1060xmlStrdup(const xmlChar *cur) {
1061 const xmlChar *p = cur;
1062
1063 if (cur == NULL) return(NULL);
1064 while (*p != 0) p++; /* non input consuming */
1065 return(xmlStrndup(cur, p - cur));
1066}
1067
1068/**
1069 * xmlCharStrndup:
1070 * @cur: the input char *
1071 * @len: the len of @cur
1072 *
1073 * a strndup for char's to xmlChar's
1074 *
1075 * Returns a new xmlChar * or NULL
1076 */
1077
1078xmlChar *
1079xmlCharStrndup(const char *cur, int len) {
1080 int i;
1081 xmlChar *ret;
1082
1083 if ((cur == NULL) || (len < 0)) return(NULL);
1084 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1085 if (ret == NULL) {
1086 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1087 (len + 1) * (long)sizeof(xmlChar));
1088 return(NULL);
1089 }
1090 for (i = 0;i < len;i++)
1091 ret[i] = (xmlChar) cur[i];
1092 ret[len] = 0;
1093 return(ret);
1094}
1095
1096/**
1097 * xmlCharStrdup:
1098 * @cur: the input char *
1099 * @len: the len of @cur
1100 *
1101 * a strdup for char's to xmlChar's
1102 *
1103 * Returns a new xmlChar * or NULL
1104 */
1105
1106xmlChar *
1107xmlCharStrdup(const char *cur) {
1108 const char *p = cur;
1109
1110 if (cur == NULL) return(NULL);
1111 while (*p != '\0') p++; /* non input consuming */
1112 return(xmlCharStrndup(cur, p - cur));
1113}
1114
1115/**
1116 * xmlStrcmp:
1117 * @str1: the first xmlChar *
1118 * @str2: the second xmlChar *
1119 *
1120 * a strcmp for xmlChar's
1121 *
1122 * Returns the integer result of the comparison
1123 */
1124
1125int
1126xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1127 register int tmp;
1128
1129 if (str1 == str2) return(0);
1130 if (str1 == NULL) return(-1);
1131 if (str2 == NULL) return(1);
1132 do {
1133 tmp = *str1++ - *str2;
1134 if (tmp != 0) return(tmp);
1135 } while (*str2++ != 0);
1136 return 0;
1137}
1138
1139/**
1140 * xmlStrEqual:
1141 * @str1: the first xmlChar *
1142 * @str2: the second xmlChar *
1143 *
1144 * Check if both string are equal of have same content
1145 * Should be a bit more readable and faster than xmlStrEqual()
1146 *
1147 * Returns 1 if they are equal, 0 if they are different
1148 */
1149
1150int
1151xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1152 if (str1 == str2) return(1);
1153 if (str1 == NULL) return(0);
1154 if (str2 == NULL) return(0);
1155 do {
1156 if (*str1++ != *str2) return(0);
1157 } while (*str2++);
1158 return(1);
1159}
1160
1161/**
1162 * xmlStrncmp:
1163 * @str1: the first xmlChar *
1164 * @str2: the second xmlChar *
1165 * @len: the max comparison length
1166 *
1167 * a strncmp for xmlChar's
1168 *
1169 * Returns the integer result of the comparison
1170 */
1171
1172int
1173xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1174 register int tmp;
1175
1176 if (len <= 0) return(0);
1177 if (str1 == str2) return(0);
1178 if (str1 == NULL) return(-1);
1179 if (str2 == NULL) return(1);
1180 do {
1181 tmp = *str1++ - *str2;
1182 if (tmp != 0 || --len == 0) return(tmp);
1183 } while (*str2++ != 0);
1184 return 0;
1185}
1186
Daniel Veillardb44025c2001-10-11 22:55:55 +00001187static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001188 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1189 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1190 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1191 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1192 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1193 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1194 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1195 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1196 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1197 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1198 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1199 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1200 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1201 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1202 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1203 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1204 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1205 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1206 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1207 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1208 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1209 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1210 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1211 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1212 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1213 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1214 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1215 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1216 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1217 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1218 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1219 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1220};
1221
1222/**
1223 * xmlStrcasecmp:
1224 * @str1: the first xmlChar *
1225 * @str2: the second xmlChar *
1226 *
1227 * a strcasecmp for xmlChar's
1228 *
1229 * Returns the integer result of the comparison
1230 */
1231
1232int
1233xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1234 register int tmp;
1235
1236 if (str1 == str2) return(0);
1237 if (str1 == NULL) return(-1);
1238 if (str2 == NULL) return(1);
1239 do {
1240 tmp = casemap[*str1++] - casemap[*str2];
1241 if (tmp != 0) return(tmp);
1242 } while (*str2++ != 0);
1243 return 0;
1244}
1245
1246/**
1247 * xmlStrncasecmp:
1248 * @str1: the first xmlChar *
1249 * @str2: the second xmlChar *
1250 * @len: the max comparison length
1251 *
1252 * a strncasecmp for xmlChar's
1253 *
1254 * Returns the integer result of the comparison
1255 */
1256
1257int
1258xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1259 register int tmp;
1260
1261 if (len <= 0) return(0);
1262 if (str1 == str2) return(0);
1263 if (str1 == NULL) return(-1);
1264 if (str2 == NULL) return(1);
1265 do {
1266 tmp = casemap[*str1++] - casemap[*str2];
1267 if (tmp != 0 || --len == 0) return(tmp);
1268 } while (*str2++ != 0);
1269 return 0;
1270}
1271
1272/**
1273 * xmlStrchr:
1274 * @str: the xmlChar * array
1275 * @val: the xmlChar to search
1276 *
1277 * a strchr for xmlChar's
1278 *
1279 * Returns the xmlChar * for the first occurence or NULL.
1280 */
1281
1282const xmlChar *
1283xmlStrchr(const xmlChar *str, xmlChar val) {
1284 if (str == NULL) return(NULL);
1285 while (*str != 0) { /* non input consuming */
1286 if (*str == val) return((xmlChar *) str);
1287 str++;
1288 }
1289 return(NULL);
1290}
1291
1292/**
1293 * xmlStrstr:
1294 * @str: the xmlChar * array (haystack)
1295 * @val: the xmlChar to search (needle)
1296 *
1297 * a strstr for xmlChar's
1298 *
1299 * Returns the xmlChar * for the first occurence or NULL.
1300 */
1301
1302const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001303xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001304 int n;
1305
1306 if (str == NULL) return(NULL);
1307 if (val == NULL) return(NULL);
1308 n = xmlStrlen(val);
1309
1310 if (n == 0) return(str);
1311 while (*str != 0) { /* non input consuming */
1312 if (*str == *val) {
1313 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1314 }
1315 str++;
1316 }
1317 return(NULL);
1318}
1319
1320/**
1321 * xmlStrcasestr:
1322 * @str: the xmlChar * array (haystack)
1323 * @val: the xmlChar to search (needle)
1324 *
1325 * a case-ignoring strstr for xmlChar's
1326 *
1327 * Returns the xmlChar * for the first occurence or NULL.
1328 */
1329
1330const xmlChar *
1331xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1332 int n;
1333
1334 if (str == NULL) return(NULL);
1335 if (val == NULL) return(NULL);
1336 n = xmlStrlen(val);
1337
1338 if (n == 0) return(str);
1339 while (*str != 0) { /* non input consuming */
1340 if (casemap[*str] == casemap[*val])
1341 if (!xmlStrncasecmp(str, val, n)) return(str);
1342 str++;
1343 }
1344 return(NULL);
1345}
1346
1347/**
1348 * xmlStrsub:
1349 * @str: the xmlChar * array (haystack)
1350 * @start: the index of the first char (zero based)
1351 * @len: the length of the substring
1352 *
1353 * Extract a substring of a given string
1354 *
1355 * Returns the xmlChar * for the first occurence or NULL.
1356 */
1357
1358xmlChar *
1359xmlStrsub(const xmlChar *str, int start, int len) {
1360 int i;
1361
1362 if (str == NULL) return(NULL);
1363 if (start < 0) return(NULL);
1364 if (len < 0) return(NULL);
1365
1366 for (i = 0;i < start;i++) {
1367 if (*str == 0) return(NULL);
1368 str++;
1369 }
1370 if (*str == 0) return(NULL);
1371 return(xmlStrndup(str, len));
1372}
1373
1374/**
1375 * xmlStrlen:
1376 * @str: the xmlChar * array
1377 *
1378 * length of a xmlChar's string
1379 *
1380 * Returns the number of xmlChar contained in the ARRAY.
1381 */
1382
1383int
1384xmlStrlen(const xmlChar *str) {
1385 int len = 0;
1386
1387 if (str == NULL) return(0);
1388 while (*str != 0) { /* non input consuming */
1389 str++;
1390 len++;
1391 }
1392 return(len);
1393}
1394
1395/**
1396 * xmlStrncat:
1397 * @cur: the original xmlChar * array
1398 * @add: the xmlChar * array added
1399 * @len: the length of @add
1400 *
1401 * a strncat for array of xmlChar's, it will extend cur with the len
1402 * first bytes of @add.
1403 *
1404 * Returns a new xmlChar *, the original @cur is reallocated if needed
1405 * and should not be freed
1406 */
1407
1408xmlChar *
1409xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1410 int size;
1411 xmlChar *ret;
1412
1413 if ((add == NULL) || (len == 0))
1414 return(cur);
1415 if (cur == NULL)
1416 return(xmlStrndup(add, len));
1417
1418 size = xmlStrlen(cur);
1419 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1420 if (ret == NULL) {
1421 xmlGenericError(xmlGenericErrorContext,
1422 "xmlStrncat: realloc of %ld byte failed\n",
1423 (size + len + 1) * (long)sizeof(xmlChar));
1424 return(cur);
1425 }
1426 memcpy(&ret[size], add, len * sizeof(xmlChar));
1427 ret[size + len] = 0;
1428 return(ret);
1429}
1430
1431/**
1432 * xmlStrcat:
1433 * @cur: the original xmlChar * array
1434 * @add: the xmlChar * array added
1435 *
1436 * a strcat for array of xmlChar's. Since they are supposed to be
1437 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1438 * a termination mark of '0'.
1439 *
1440 * Returns a new xmlChar * containing the concatenated string.
1441 */
1442xmlChar *
1443xmlStrcat(xmlChar *cur, const xmlChar *add) {
1444 const xmlChar *p = add;
1445
1446 if (add == NULL) return(cur);
1447 if (cur == NULL)
1448 return(xmlStrdup(add));
1449
1450 while (*p != 0) p++; /* non input consuming */
1451 return(xmlStrncat(cur, add, p - add));
1452}
1453
1454/************************************************************************
1455 * *
1456 * Commodity functions, cleanup needed ? *
1457 * *
1458 ************************************************************************/
1459
1460/**
1461 * areBlanks:
1462 * @ctxt: an XML parser context
1463 * @str: a xmlChar *
1464 * @len: the size of @str
1465 *
1466 * Is this a sequence of blank chars that one can ignore ?
1467 *
1468 * Returns 1 if ignorable 0 otherwise.
1469 */
1470
1471static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1472 int i, ret;
1473 xmlNodePtr lastChild;
1474
Daniel Veillard05c13a22001-09-09 08:38:09 +00001475 /*
1476 * Don't spend time trying to differentiate them, the same callback is
1477 * used !
1478 */
1479 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001480 return(0);
1481
Owen Taylor3473f882001-02-23 17:55:21 +00001482 /*
1483 * Check for xml:space value.
1484 */
1485 if (*(ctxt->space) == 1)
1486 return(0);
1487
1488 /*
1489 * Check that the string is made of blanks
1490 */
1491 for (i = 0;i < len;i++)
1492 if (!(IS_BLANK(str[i]))) return(0);
1493
1494 /*
1495 * Look if the element is mixed content in the Dtd if available
1496 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001497 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001498 if (ctxt->myDoc != NULL) {
1499 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1500 if (ret == 0) return(1);
1501 if (ret == 1) return(0);
1502 }
1503
1504 /*
1505 * Otherwise, heuristic :-\
1506 */
Owen Taylor3473f882001-02-23 17:55:21 +00001507 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001508 if ((ctxt->node->children == NULL) &&
1509 (RAW == '<') && (NXT(1) == '/')) return(0);
1510
1511 lastChild = xmlGetLastChild(ctxt->node);
1512 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001513 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1514 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001515 } else if (xmlNodeIsText(lastChild))
1516 return(0);
1517 else if ((ctxt->node->children != NULL) &&
1518 (xmlNodeIsText(ctxt->node->children)))
1519 return(0);
1520 return(1);
1521}
1522
1523/*
1524 * Forward definition for recusive behaviour.
1525 */
1526void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1527void xmlParseReference(xmlParserCtxtPtr ctxt);
1528
1529/************************************************************************
1530 * *
1531 * Extra stuff for namespace support *
1532 * Relates to http://www.w3.org/TR/WD-xml-names *
1533 * *
1534 ************************************************************************/
1535
1536/**
1537 * xmlSplitQName:
1538 * @ctxt: an XML parser context
1539 * @name: an XML parser context
1540 * @prefix: a xmlChar **
1541 *
1542 * parse an UTF8 encoded XML qualified name string
1543 *
1544 * [NS 5] QName ::= (Prefix ':')? LocalPart
1545 *
1546 * [NS 6] Prefix ::= NCName
1547 *
1548 * [NS 7] LocalPart ::= NCName
1549 *
1550 * Returns the local part, and prefix is updated
1551 * to get the Prefix if any.
1552 */
1553
1554xmlChar *
1555xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1556 xmlChar buf[XML_MAX_NAMELEN + 5];
1557 xmlChar *buffer = NULL;
1558 int len = 0;
1559 int max = XML_MAX_NAMELEN;
1560 xmlChar *ret = NULL;
1561 const xmlChar *cur = name;
1562 int c;
1563
1564 *prefix = NULL;
1565
1566#ifndef XML_XML_NAMESPACE
1567 /* xml: prefix is not really a namespace */
1568 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1569 (cur[2] == 'l') && (cur[3] == ':'))
1570 return(xmlStrdup(name));
1571#endif
1572
1573 /* nasty but valid */
1574 if (cur[0] == ':')
1575 return(xmlStrdup(name));
1576
1577 c = *cur++;
1578 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1579 buf[len++] = c;
1580 c = *cur++;
1581 }
1582 if (len >= max) {
1583 /*
1584 * Okay someone managed to make a huge name, so he's ready to pay
1585 * for the processing speed.
1586 */
1587 max = len * 2;
1588
1589 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1590 if (buffer == NULL) {
1591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1592 ctxt->sax->error(ctxt->userData,
1593 "xmlSplitQName: out of memory\n");
1594 return(NULL);
1595 }
1596 memcpy(buffer, buf, len);
1597 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1598 if (len + 10 > max) {
1599 max *= 2;
1600 buffer = (xmlChar *) xmlRealloc(buffer,
1601 max * sizeof(xmlChar));
1602 if (buffer == NULL) {
1603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1604 ctxt->sax->error(ctxt->userData,
1605 "xmlSplitQName: out of memory\n");
1606 return(NULL);
1607 }
1608 }
1609 buffer[len++] = c;
1610 c = *cur++;
1611 }
1612 buffer[len] = 0;
1613 }
1614
1615 if (buffer == NULL)
1616 ret = xmlStrndup(buf, len);
1617 else {
1618 ret = buffer;
1619 buffer = NULL;
1620 max = XML_MAX_NAMELEN;
1621 }
1622
1623
1624 if (c == ':') {
1625 c = *cur++;
1626 if (c == 0) return(ret);
1627 *prefix = ret;
1628 len = 0;
1629
1630 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1631 buf[len++] = c;
1632 c = *cur++;
1633 }
1634 if (len >= max) {
1635 /*
1636 * Okay someone managed to make a huge name, so he's ready to pay
1637 * for the processing speed.
1638 */
1639 max = len * 2;
1640
1641 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1642 if (buffer == NULL) {
1643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1644 ctxt->sax->error(ctxt->userData,
1645 "xmlSplitQName: out of memory\n");
1646 return(NULL);
1647 }
1648 memcpy(buffer, buf, len);
1649 while (c != 0) { /* tested bigname2.xml */
1650 if (len + 10 > max) {
1651 max *= 2;
1652 buffer = (xmlChar *) xmlRealloc(buffer,
1653 max * sizeof(xmlChar));
1654 if (buffer == NULL) {
1655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1656 ctxt->sax->error(ctxt->userData,
1657 "xmlSplitQName: out of memory\n");
1658 return(NULL);
1659 }
1660 }
1661 buffer[len++] = c;
1662 c = *cur++;
1663 }
1664 buffer[len] = 0;
1665 }
1666
1667 if (buffer == NULL)
1668 ret = xmlStrndup(buf, len);
1669 else {
1670 ret = buffer;
1671 }
1672 }
1673
1674 return(ret);
1675}
1676
1677/************************************************************************
1678 * *
1679 * The parser itself *
1680 * Relates to http://www.w3.org/TR/REC-xml *
1681 * *
1682 ************************************************************************/
1683
Daniel Veillard76d66f42001-05-16 21:05:17 +00001684static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001685/**
1686 * xmlParseName:
1687 * @ctxt: an XML parser context
1688 *
1689 * parse an XML name.
1690 *
1691 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1692 * CombiningChar | Extender
1693 *
1694 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1695 *
1696 * [6] Names ::= Name (S Name)*
1697 *
1698 * Returns the Name parsed or NULL
1699 */
1700
1701xmlChar *
1702xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001703 const xmlChar *in;
1704 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001705 int count = 0;
1706
1707 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001708
1709 /*
1710 * Accelerator for simple ASCII names
1711 */
1712 in = ctxt->input->cur;
1713 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1714 ((*in >= 0x41) && (*in <= 0x5A)) ||
1715 (*in == '_') || (*in == ':')) {
1716 in++;
1717 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1718 ((*in >= 0x41) && (*in <= 0x5A)) ||
1719 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001720 (*in == '_') || (*in == '-') ||
1721 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001722 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001723 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001724 count = in - ctxt->input->cur;
1725 ret = xmlStrndup(ctxt->input->cur, count);
1726 ctxt->input->cur = in;
1727 return(ret);
1728 }
1729 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001730 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001731}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001732
Daniel Veillard76d66f42001-05-16 21:05:17 +00001733static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001734xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1735 xmlChar buf[XML_MAX_NAMELEN + 5];
1736 int len = 0, l;
1737 int c;
1738 int count = 0;
1739
1740 /*
1741 * Handler for more complex cases
1742 */
1743 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001744 c = CUR_CHAR(l);
1745 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1746 (!IS_LETTER(c) && (c != '_') &&
1747 (c != ':'))) {
1748 return(NULL);
1749 }
1750
1751 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1752 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1753 (c == '.') || (c == '-') ||
1754 (c == '_') || (c == ':') ||
1755 (IS_COMBINING(c)) ||
1756 (IS_EXTENDER(c)))) {
1757 if (count++ > 100) {
1758 count = 0;
1759 GROW;
1760 }
1761 COPY_BUF(l,buf,len,c);
1762 NEXTL(l);
1763 c = CUR_CHAR(l);
1764 if (len >= XML_MAX_NAMELEN) {
1765 /*
1766 * Okay someone managed to make a huge name, so he's ready to pay
1767 * for the processing speed.
1768 */
1769 xmlChar *buffer;
1770 int max = len * 2;
1771
1772 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1773 if (buffer == NULL) {
1774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1775 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001776 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001777 return(NULL);
1778 }
1779 memcpy(buffer, buf, len);
1780 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1781 (c == '.') || (c == '-') ||
1782 (c == '_') || (c == ':') ||
1783 (IS_COMBINING(c)) ||
1784 (IS_EXTENDER(c))) {
1785 if (count++ > 100) {
1786 count = 0;
1787 GROW;
1788 }
1789 if (len + 10 > max) {
1790 max *= 2;
1791 buffer = (xmlChar *) xmlRealloc(buffer,
1792 max * sizeof(xmlChar));
1793 if (buffer == NULL) {
1794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1795 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001796 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001797 return(NULL);
1798 }
1799 }
1800 COPY_BUF(l,buffer,len,c);
1801 NEXTL(l);
1802 c = CUR_CHAR(l);
1803 }
1804 buffer[len] = 0;
1805 return(buffer);
1806 }
1807 }
1808 return(xmlStrndup(buf, len));
1809}
1810
1811/**
1812 * xmlParseStringName:
1813 * @ctxt: an XML parser context
1814 * @str: a pointer to the string pointer (IN/OUT)
1815 *
1816 * parse an XML name.
1817 *
1818 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1819 * CombiningChar | Extender
1820 *
1821 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1822 *
1823 * [6] Names ::= Name (S Name)*
1824 *
1825 * Returns the Name parsed or NULL. The str pointer
1826 * is updated to the current location in the string.
1827 */
1828
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001829static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001830xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1831 xmlChar buf[XML_MAX_NAMELEN + 5];
1832 const xmlChar *cur = *str;
1833 int len = 0, l;
1834 int c;
1835
1836 c = CUR_SCHAR(cur, l);
1837 if (!IS_LETTER(c) && (c != '_') &&
1838 (c != ':')) {
1839 return(NULL);
1840 }
1841
1842 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1843 (c == '.') || (c == '-') ||
1844 (c == '_') || (c == ':') ||
1845 (IS_COMBINING(c)) ||
1846 (IS_EXTENDER(c))) {
1847 COPY_BUF(l,buf,len,c);
1848 cur += l;
1849 c = CUR_SCHAR(cur, l);
1850 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1851 /*
1852 * Okay someone managed to make a huge name, so he's ready to pay
1853 * for the processing speed.
1854 */
1855 xmlChar *buffer;
1856 int max = len * 2;
1857
1858 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1859 if (buffer == NULL) {
1860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1861 ctxt->sax->error(ctxt->userData,
1862 "xmlParseStringName: out of memory\n");
1863 return(NULL);
1864 }
1865 memcpy(buffer, buf, len);
1866 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1867 (c == '.') || (c == '-') ||
1868 (c == '_') || (c == ':') ||
1869 (IS_COMBINING(c)) ||
1870 (IS_EXTENDER(c))) {
1871 if (len + 10 > max) {
1872 max *= 2;
1873 buffer = (xmlChar *) xmlRealloc(buffer,
1874 max * sizeof(xmlChar));
1875 if (buffer == NULL) {
1876 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1877 ctxt->sax->error(ctxt->userData,
1878 "xmlParseStringName: out of memory\n");
1879 return(NULL);
1880 }
1881 }
1882 COPY_BUF(l,buffer,len,c);
1883 cur += l;
1884 c = CUR_SCHAR(cur, l);
1885 }
1886 buffer[len] = 0;
1887 *str = cur;
1888 return(buffer);
1889 }
1890 }
1891 *str = cur;
1892 return(xmlStrndup(buf, len));
1893}
1894
1895/**
1896 * xmlParseNmtoken:
1897 * @ctxt: an XML parser context
1898 *
1899 * parse an XML Nmtoken.
1900 *
1901 * [7] Nmtoken ::= (NameChar)+
1902 *
1903 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1904 *
1905 * Returns the Nmtoken parsed or NULL
1906 */
1907
1908xmlChar *
1909xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1910 xmlChar buf[XML_MAX_NAMELEN + 5];
1911 int len = 0, l;
1912 int c;
1913 int count = 0;
1914
1915 GROW;
1916 c = CUR_CHAR(l);
1917
1918 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1919 (c == '.') || (c == '-') ||
1920 (c == '_') || (c == ':') ||
1921 (IS_COMBINING(c)) ||
1922 (IS_EXTENDER(c))) {
1923 if (count++ > 100) {
1924 count = 0;
1925 GROW;
1926 }
1927 COPY_BUF(l,buf,len,c);
1928 NEXTL(l);
1929 c = CUR_CHAR(l);
1930 if (len >= XML_MAX_NAMELEN) {
1931 /*
1932 * Okay someone managed to make a huge token, so he's ready to pay
1933 * for the processing speed.
1934 */
1935 xmlChar *buffer;
1936 int max = len * 2;
1937
1938 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1939 if (buffer == NULL) {
1940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1941 ctxt->sax->error(ctxt->userData,
1942 "xmlParseNmtoken: out of memory\n");
1943 return(NULL);
1944 }
1945 memcpy(buffer, buf, len);
1946 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1947 (c == '.') || (c == '-') ||
1948 (c == '_') || (c == ':') ||
1949 (IS_COMBINING(c)) ||
1950 (IS_EXTENDER(c))) {
1951 if (count++ > 100) {
1952 count = 0;
1953 GROW;
1954 }
1955 if (len + 10 > max) {
1956 max *= 2;
1957 buffer = (xmlChar *) xmlRealloc(buffer,
1958 max * sizeof(xmlChar));
1959 if (buffer == NULL) {
1960 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1961 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001962 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001963 return(NULL);
1964 }
1965 }
1966 COPY_BUF(l,buffer,len,c);
1967 NEXTL(l);
1968 c = CUR_CHAR(l);
1969 }
1970 buffer[len] = 0;
1971 return(buffer);
1972 }
1973 }
1974 if (len == 0)
1975 return(NULL);
1976 return(xmlStrndup(buf, len));
1977}
1978
1979/**
1980 * xmlParseEntityValue:
1981 * @ctxt: an XML parser context
1982 * @orig: if non-NULL store a copy of the original entity value
1983 *
1984 * parse a value for ENTITY declarations
1985 *
1986 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1987 * "'" ([^%&'] | PEReference | Reference)* "'"
1988 *
1989 * Returns the EntityValue parsed with reference substitued or NULL
1990 */
1991
1992xmlChar *
1993xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1994 xmlChar *buf = NULL;
1995 int len = 0;
1996 int size = XML_PARSER_BUFFER_SIZE;
1997 int c, l;
1998 xmlChar stop;
1999 xmlChar *ret = NULL;
2000 const xmlChar *cur = NULL;
2001 xmlParserInputPtr input;
2002
2003 if (RAW == '"') stop = '"';
2004 else if (RAW == '\'') stop = '\'';
2005 else {
2006 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2008 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2009 ctxt->wellFormed = 0;
2010 ctxt->disableSAX = 1;
2011 return(NULL);
2012 }
2013 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2014 if (buf == NULL) {
2015 xmlGenericError(xmlGenericErrorContext,
2016 "malloc of %d byte failed\n", size);
2017 return(NULL);
2018 }
2019
2020 /*
2021 * The content of the entity definition is copied in a buffer.
2022 */
2023
2024 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2025 input = ctxt->input;
2026 GROW;
2027 NEXT;
2028 c = CUR_CHAR(l);
2029 /*
2030 * NOTE: 4.4.5 Included in Literal
2031 * When a parameter entity reference appears in a literal entity
2032 * value, ... a single or double quote character in the replacement
2033 * text is always treated as a normal data character and will not
2034 * terminate the literal.
2035 * In practice it means we stop the loop only when back at parsing
2036 * the initial entity and the quote is found
2037 */
2038 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2039 (ctxt->input != input))) {
2040 if (len + 5 >= size) {
2041 size *= 2;
2042 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2043 if (buf == NULL) {
2044 xmlGenericError(xmlGenericErrorContext,
2045 "realloc of %d byte failed\n", size);
2046 return(NULL);
2047 }
2048 }
2049 COPY_BUF(l,buf,len,c);
2050 NEXTL(l);
2051 /*
2052 * Pop-up of finished entities.
2053 */
2054 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2055 xmlPopInput(ctxt);
2056
2057 GROW;
2058 c = CUR_CHAR(l);
2059 if (c == 0) {
2060 GROW;
2061 c = CUR_CHAR(l);
2062 }
2063 }
2064 buf[len] = 0;
2065
2066 /*
2067 * Raise problem w.r.t. '&' and '%' being used in non-entities
2068 * reference constructs. Note Charref will be handled in
2069 * xmlStringDecodeEntities()
2070 */
2071 cur = buf;
2072 while (*cur != 0) { /* non input consuming */
2073 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2074 xmlChar *name;
2075 xmlChar tmp = *cur;
2076
2077 cur++;
2078 name = xmlParseStringName(ctxt, &cur);
2079 if ((name == NULL) || (*cur != ';')) {
2080 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2082 ctxt->sax->error(ctxt->userData,
2083 "EntityValue: '%c' forbidden except for entities references\n",
2084 tmp);
2085 ctxt->wellFormed = 0;
2086 ctxt->disableSAX = 1;
2087 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002088 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2089 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002090 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2091 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2092 ctxt->sax->error(ctxt->userData,
2093 "EntityValue: PEReferences forbidden in internal subset\n",
2094 tmp);
2095 ctxt->wellFormed = 0;
2096 ctxt->disableSAX = 1;
2097 }
2098 if (name != NULL)
2099 xmlFree(name);
2100 }
2101 cur++;
2102 }
2103
2104 /*
2105 * Then PEReference entities are substituted.
2106 */
2107 if (c != stop) {
2108 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2110 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2111 ctxt->wellFormed = 0;
2112 ctxt->disableSAX = 1;
2113 xmlFree(buf);
2114 } else {
2115 NEXT;
2116 /*
2117 * NOTE: 4.4.7 Bypassed
2118 * When a general entity reference appears in the EntityValue in
2119 * an entity declaration, it is bypassed and left as is.
2120 * so XML_SUBSTITUTE_REF is not set here.
2121 */
2122 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2123 0, 0, 0);
2124 if (orig != NULL)
2125 *orig = buf;
2126 else
2127 xmlFree(buf);
2128 }
2129
2130 return(ret);
2131}
2132
2133/**
2134 * xmlParseAttValue:
2135 * @ctxt: an XML parser context
2136 *
2137 * parse a value for an attribute
2138 * Note: the parser won't do substitution of entities here, this
2139 * will be handled later in xmlStringGetNodeList
2140 *
2141 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2142 * "'" ([^<&'] | Reference)* "'"
2143 *
2144 * 3.3.3 Attribute-Value Normalization:
2145 * Before the value of an attribute is passed to the application or
2146 * checked for validity, the XML processor must normalize it as follows:
2147 * - a character reference is processed by appending the referenced
2148 * character to the attribute value
2149 * - an entity reference is processed by recursively processing the
2150 * replacement text of the entity
2151 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2152 * appending #x20 to the normalized value, except that only a single
2153 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2154 * parsed entity or the literal entity value of an internal parsed entity
2155 * - other characters are processed by appending them to the normalized value
2156 * If the declared value is not CDATA, then the XML processor must further
2157 * process the normalized attribute value by discarding any leading and
2158 * trailing space (#x20) characters, and by replacing sequences of space
2159 * (#x20) characters by a single space (#x20) character.
2160 * All attributes for which no declaration has been read should be treated
2161 * by a non-validating parser as if declared CDATA.
2162 *
2163 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2164 */
2165
2166xmlChar *
2167xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2168 xmlChar limit = 0;
2169 xmlChar *buf = NULL;
2170 int len = 0;
2171 int buf_size = 0;
2172 int c, l;
2173 xmlChar *current = NULL;
2174 xmlEntityPtr ent;
2175
2176
2177 SHRINK;
2178 if (NXT(0) == '"') {
2179 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2180 limit = '"';
2181 NEXT;
2182 } else if (NXT(0) == '\'') {
2183 limit = '\'';
2184 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2185 NEXT;
2186 } else {
2187 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2188 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2189 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2190 ctxt->wellFormed = 0;
2191 ctxt->disableSAX = 1;
2192 return(NULL);
2193 }
2194
2195 /*
2196 * allocate a translation buffer.
2197 */
2198 buf_size = XML_PARSER_BUFFER_SIZE;
2199 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2200 if (buf == NULL) {
2201 perror("xmlParseAttValue: malloc failed");
2202 return(NULL);
2203 }
2204
2205 /*
2206 * Ok loop until we reach one of the ending char or a size limit.
2207 */
2208 c = CUR_CHAR(l);
2209 while (((NXT(0) != limit) && /* checked */
2210 (c != '<')) || (ctxt->token != 0)) {
2211 if (c == 0) break;
2212 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002213 if (ctxt->replaceEntities) {
2214 if (len > buf_size - 10) {
2215 growBuffer(buf);
2216 }
2217 buf[len++] = '&';
2218 } else {
2219 /*
2220 * The reparsing will be done in xmlStringGetNodeList()
2221 * called by the attribute() function in SAX.c
2222 */
2223 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002224
Daniel Veillard319a7422001-09-11 09:27:09 +00002225 if (len > buf_size - 10) {
2226 growBuffer(buf);
2227 }
2228 current = &buffer[0];
2229 while (*current != 0) { /* non input consuming */
2230 buf[len++] = *current++;
2231 }
2232 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002233 }
Owen Taylor3473f882001-02-23 17:55:21 +00002234 } else if (c == '&') {
2235 if (NXT(1) == '#') {
2236 int val = xmlParseCharRef(ctxt);
2237 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002238 if (ctxt->replaceEntities) {
2239 if (len > buf_size - 10) {
2240 growBuffer(buf);
2241 }
2242 buf[len++] = '&';
2243 } else {
2244 /*
2245 * The reparsing will be done in xmlStringGetNodeList()
2246 * called by the attribute() function in SAX.c
2247 */
2248 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002249
Daniel Veillard319a7422001-09-11 09:27:09 +00002250 if (len > buf_size - 10) {
2251 growBuffer(buf);
2252 }
2253 current = &buffer[0];
2254 while (*current != 0) { /* non input consuming */
2255 buf[len++] = *current++;
2256 }
Owen Taylor3473f882001-02-23 17:55:21 +00002257 }
2258 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002259 if (len > buf_size - 10) {
2260 growBuffer(buf);
2261 }
Owen Taylor3473f882001-02-23 17:55:21 +00002262 len += xmlCopyChar(0, &buf[len], val);
2263 }
2264 } else {
2265 ent = xmlParseEntityRef(ctxt);
2266 if ((ent != NULL) &&
2267 (ctxt->replaceEntities != 0)) {
2268 xmlChar *rep;
2269
2270 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2271 rep = xmlStringDecodeEntities(ctxt, ent->content,
2272 XML_SUBSTITUTE_REF, 0, 0, 0);
2273 if (rep != NULL) {
2274 current = rep;
2275 while (*current != 0) { /* non input consuming */
2276 buf[len++] = *current++;
2277 if (len > buf_size - 10) {
2278 growBuffer(buf);
2279 }
2280 }
2281 xmlFree(rep);
2282 }
2283 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002284 if (len > buf_size - 10) {
2285 growBuffer(buf);
2286 }
Owen Taylor3473f882001-02-23 17:55:21 +00002287 if (ent->content != NULL)
2288 buf[len++] = ent->content[0];
2289 }
2290 } else if (ent != NULL) {
2291 int i = xmlStrlen(ent->name);
2292 const xmlChar *cur = ent->name;
2293
2294 /*
2295 * This may look absurd but is needed to detect
2296 * entities problems
2297 */
2298 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2299 (ent->content != NULL)) {
2300 xmlChar *rep;
2301 rep = xmlStringDecodeEntities(ctxt, ent->content,
2302 XML_SUBSTITUTE_REF, 0, 0, 0);
2303 if (rep != NULL)
2304 xmlFree(rep);
2305 }
2306
2307 /*
2308 * Just output the reference
2309 */
2310 buf[len++] = '&';
2311 if (len > buf_size - i - 10) {
2312 growBuffer(buf);
2313 }
2314 for (;i > 0;i--)
2315 buf[len++] = *cur++;
2316 buf[len++] = ';';
2317 }
2318 }
2319 } else {
2320 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2321 COPY_BUF(l,buf,len,0x20);
2322 if (len > buf_size - 10) {
2323 growBuffer(buf);
2324 }
2325 } else {
2326 COPY_BUF(l,buf,len,c);
2327 if (len > buf_size - 10) {
2328 growBuffer(buf);
2329 }
2330 }
2331 NEXTL(l);
2332 }
2333 GROW;
2334 c = CUR_CHAR(l);
2335 }
2336 buf[len++] = 0;
2337 if (RAW == '<') {
2338 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2340 ctxt->sax->error(ctxt->userData,
2341 "Unescaped '<' not allowed in attributes values\n");
2342 ctxt->wellFormed = 0;
2343 ctxt->disableSAX = 1;
2344 } else if (RAW != limit) {
2345 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2347 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2348 ctxt->wellFormed = 0;
2349 ctxt->disableSAX = 1;
2350 } else
2351 NEXT;
2352 return(buf);
2353}
2354
2355/**
2356 * xmlParseSystemLiteral:
2357 * @ctxt: an XML parser context
2358 *
2359 * parse an XML Literal
2360 *
2361 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2362 *
2363 * Returns the SystemLiteral parsed or NULL
2364 */
2365
2366xmlChar *
2367xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2368 xmlChar *buf = NULL;
2369 int len = 0;
2370 int size = XML_PARSER_BUFFER_SIZE;
2371 int cur, l;
2372 xmlChar stop;
2373 int state = ctxt->instate;
2374 int count = 0;
2375
2376 SHRINK;
2377 if (RAW == '"') {
2378 NEXT;
2379 stop = '"';
2380 } else if (RAW == '\'') {
2381 NEXT;
2382 stop = '\'';
2383 } else {
2384 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2386 ctxt->sax->error(ctxt->userData,
2387 "SystemLiteral \" or ' expected\n");
2388 ctxt->wellFormed = 0;
2389 ctxt->disableSAX = 1;
2390 return(NULL);
2391 }
2392
2393 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2394 if (buf == NULL) {
2395 xmlGenericError(xmlGenericErrorContext,
2396 "malloc of %d byte failed\n", size);
2397 return(NULL);
2398 }
2399 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2400 cur = CUR_CHAR(l);
2401 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2402 if (len + 5 >= size) {
2403 size *= 2;
2404 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2405 if (buf == NULL) {
2406 xmlGenericError(xmlGenericErrorContext,
2407 "realloc of %d byte failed\n", size);
2408 ctxt->instate = (xmlParserInputState) state;
2409 return(NULL);
2410 }
2411 }
2412 count++;
2413 if (count > 50) {
2414 GROW;
2415 count = 0;
2416 }
2417 COPY_BUF(l,buf,len,cur);
2418 NEXTL(l);
2419 cur = CUR_CHAR(l);
2420 if (cur == 0) {
2421 GROW;
2422 SHRINK;
2423 cur = CUR_CHAR(l);
2424 }
2425 }
2426 buf[len] = 0;
2427 ctxt->instate = (xmlParserInputState) state;
2428 if (!IS_CHAR(cur)) {
2429 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2431 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2432 ctxt->wellFormed = 0;
2433 ctxt->disableSAX = 1;
2434 } else {
2435 NEXT;
2436 }
2437 return(buf);
2438}
2439
2440/**
2441 * xmlParsePubidLiteral:
2442 * @ctxt: an XML parser context
2443 *
2444 * parse an XML public literal
2445 *
2446 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2447 *
2448 * Returns the PubidLiteral parsed or NULL.
2449 */
2450
2451xmlChar *
2452xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2453 xmlChar *buf = NULL;
2454 int len = 0;
2455 int size = XML_PARSER_BUFFER_SIZE;
2456 xmlChar cur;
2457 xmlChar stop;
2458 int count = 0;
2459
2460 SHRINK;
2461 if (RAW == '"') {
2462 NEXT;
2463 stop = '"';
2464 } else if (RAW == '\'') {
2465 NEXT;
2466 stop = '\'';
2467 } else {
2468 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2470 ctxt->sax->error(ctxt->userData,
2471 "SystemLiteral \" or ' expected\n");
2472 ctxt->wellFormed = 0;
2473 ctxt->disableSAX = 1;
2474 return(NULL);
2475 }
2476 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2477 if (buf == NULL) {
2478 xmlGenericError(xmlGenericErrorContext,
2479 "malloc of %d byte failed\n", size);
2480 return(NULL);
2481 }
2482 cur = CUR;
2483 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2484 if (len + 1 >= size) {
2485 size *= 2;
2486 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2487 if (buf == NULL) {
2488 xmlGenericError(xmlGenericErrorContext,
2489 "realloc of %d byte failed\n", size);
2490 return(NULL);
2491 }
2492 }
2493 buf[len++] = cur;
2494 count++;
2495 if (count > 50) {
2496 GROW;
2497 count = 0;
2498 }
2499 NEXT;
2500 cur = CUR;
2501 if (cur == 0) {
2502 GROW;
2503 SHRINK;
2504 cur = CUR;
2505 }
2506 }
2507 buf[len] = 0;
2508 if (cur != stop) {
2509 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2511 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2512 ctxt->wellFormed = 0;
2513 ctxt->disableSAX = 1;
2514 } else {
2515 NEXT;
2516 }
2517 return(buf);
2518}
2519
Daniel Veillard48b2f892001-02-25 16:11:03 +00002520void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002521/**
2522 * xmlParseCharData:
2523 * @ctxt: an XML parser context
2524 * @cdata: int indicating whether we are within a CDATA section
2525 *
2526 * parse a CharData section.
2527 * if we are within a CDATA section ']]>' marks an end of section.
2528 *
2529 * The right angle bracket (>) may be represented using the string "&gt;",
2530 * and must, for compatibility, be escaped using "&gt;" or a character
2531 * reference when it appears in the string "]]>" in content, when that
2532 * string is not marking the end of a CDATA section.
2533 *
2534 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2535 */
2536
2537void
2538xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002539 const xmlChar *in;
2540 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002541 int line = ctxt->input->line;
2542 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002543
2544 SHRINK;
2545 GROW;
2546 /*
2547 * Accelerated common case where input don't need to be
2548 * modified before passing it to the handler.
2549 */
2550 if ((ctxt->token == 0) && (!cdata)) {
2551 in = ctxt->input->cur;
2552 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002553get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002554 while (((*in >= 0x20) && (*in != '<') &&
2555 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2556 in++;
2557 if (*in == 0xA) {
2558 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002559 in++;
2560 while (*in == 0xA) {
2561 ctxt->input->line++;
2562 in++;
2563 }
2564 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002565 }
2566 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002567 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002568 if (IS_BLANK(*ctxt->input->cur)) {
2569 const xmlChar *tmp = ctxt->input->cur;
2570 ctxt->input->cur = in;
2571 if (areBlanks(ctxt, tmp, nbchar)) {
2572 if (ctxt->sax->ignorableWhitespace != NULL)
2573 ctxt->sax->ignorableWhitespace(ctxt->userData,
2574 tmp, nbchar);
2575 } else {
2576 if (ctxt->sax->characters != NULL)
2577 ctxt->sax->characters(ctxt->userData,
2578 tmp, nbchar);
2579 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002580 line = ctxt->input->line;
2581 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002582 } else {
2583 if (ctxt->sax->characters != NULL)
2584 ctxt->sax->characters(ctxt->userData,
2585 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002586 line = ctxt->input->line;
2587 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002588 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002589 }
2590 ctxt->input->cur = in;
2591 if (*in == 0xD) {
2592 in++;
2593 if (*in == 0xA) {
2594 ctxt->input->cur = in;
2595 in++;
2596 ctxt->input->line++;
2597 continue; /* while */
2598 }
2599 in--;
2600 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002601 if (*in == '<') {
2602 return;
2603 }
2604 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002605 return;
2606 }
2607 SHRINK;
2608 GROW;
2609 in = ctxt->input->cur;
2610 } while ((*in >= 0x20) && (*in <= 0x7F));
2611 nbchar = 0;
2612 }
Daniel Veillard50582112001-03-26 22:52:16 +00002613 ctxt->input->line = line;
2614 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002615 xmlParseCharDataComplex(ctxt, cdata);
2616}
2617
2618void
2619xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002620 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2621 int nbchar = 0;
2622 int cur, l;
2623 int count = 0;
2624
2625 SHRINK;
2626 GROW;
2627 cur = CUR_CHAR(l);
2628 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2629 ((cur != '&') || (ctxt->token == '&')) &&
2630 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2631 if ((cur == ']') && (NXT(1) == ']') &&
2632 (NXT(2) == '>')) {
2633 if (cdata) break;
2634 else {
2635 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2637 ctxt->sax->error(ctxt->userData,
2638 "Sequence ']]>' not allowed in content\n");
2639 /* Should this be relaxed ??? I see a "must here */
2640 ctxt->wellFormed = 0;
2641 ctxt->disableSAX = 1;
2642 }
2643 }
2644 COPY_BUF(l,buf,nbchar,cur);
2645 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2646 /*
2647 * Ok the segment is to be consumed as chars.
2648 */
2649 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2650 if (areBlanks(ctxt, buf, nbchar)) {
2651 if (ctxt->sax->ignorableWhitespace != NULL)
2652 ctxt->sax->ignorableWhitespace(ctxt->userData,
2653 buf, nbchar);
2654 } else {
2655 if (ctxt->sax->characters != NULL)
2656 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2657 }
2658 }
2659 nbchar = 0;
2660 }
2661 count++;
2662 if (count > 50) {
2663 GROW;
2664 count = 0;
2665 }
2666 NEXTL(l);
2667 cur = CUR_CHAR(l);
2668 }
2669 if (nbchar != 0) {
2670 /*
2671 * Ok the segment is to be consumed as chars.
2672 */
2673 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2674 if (areBlanks(ctxt, buf, nbchar)) {
2675 if (ctxt->sax->ignorableWhitespace != NULL)
2676 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2677 } else {
2678 if (ctxt->sax->characters != NULL)
2679 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2680 }
2681 }
2682 }
2683}
2684
2685/**
2686 * xmlParseExternalID:
2687 * @ctxt: an XML parser context
2688 * @publicID: a xmlChar** receiving PubidLiteral
2689 * @strict: indicate whether we should restrict parsing to only
2690 * production [75], see NOTE below
2691 *
2692 * Parse an External ID or a Public ID
2693 *
2694 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2695 * 'PUBLIC' S PubidLiteral S SystemLiteral
2696 *
2697 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2698 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2699 *
2700 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2701 *
2702 * Returns the function returns SystemLiteral and in the second
2703 * case publicID receives PubidLiteral, is strict is off
2704 * it is possible to return NULL and have publicID set.
2705 */
2706
2707xmlChar *
2708xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2709 xmlChar *URI = NULL;
2710
2711 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002712
2713 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002714 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2715 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2716 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2717 SKIP(6);
2718 if (!IS_BLANK(CUR)) {
2719 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2721 ctxt->sax->error(ctxt->userData,
2722 "Space required after 'SYSTEM'\n");
2723 ctxt->wellFormed = 0;
2724 ctxt->disableSAX = 1;
2725 }
2726 SKIP_BLANKS;
2727 URI = xmlParseSystemLiteral(ctxt);
2728 if (URI == NULL) {
2729 ctxt->errNo = XML_ERR_URI_REQUIRED;
2730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2731 ctxt->sax->error(ctxt->userData,
2732 "xmlParseExternalID: SYSTEM, no URI\n");
2733 ctxt->wellFormed = 0;
2734 ctxt->disableSAX = 1;
2735 }
2736 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2737 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2738 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2739 SKIP(6);
2740 if (!IS_BLANK(CUR)) {
2741 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2743 ctxt->sax->error(ctxt->userData,
2744 "Space required after 'PUBLIC'\n");
2745 ctxt->wellFormed = 0;
2746 ctxt->disableSAX = 1;
2747 }
2748 SKIP_BLANKS;
2749 *publicID = xmlParsePubidLiteral(ctxt);
2750 if (*publicID == NULL) {
2751 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2753 ctxt->sax->error(ctxt->userData,
2754 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2755 ctxt->wellFormed = 0;
2756 ctxt->disableSAX = 1;
2757 }
2758 if (strict) {
2759 /*
2760 * We don't handle [83] so "S SystemLiteral" is required.
2761 */
2762 if (!IS_BLANK(CUR)) {
2763 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2765 ctxt->sax->error(ctxt->userData,
2766 "Space required after the Public Identifier\n");
2767 ctxt->wellFormed = 0;
2768 ctxt->disableSAX = 1;
2769 }
2770 } else {
2771 /*
2772 * We handle [83] so we return immediately, if
2773 * "S SystemLiteral" is not detected. From a purely parsing
2774 * point of view that's a nice mess.
2775 */
2776 const xmlChar *ptr;
2777 GROW;
2778
2779 ptr = CUR_PTR;
2780 if (!IS_BLANK(*ptr)) return(NULL);
2781
2782 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2783 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2784 }
2785 SKIP_BLANKS;
2786 URI = xmlParseSystemLiteral(ctxt);
2787 if (URI == NULL) {
2788 ctxt->errNo = XML_ERR_URI_REQUIRED;
2789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2790 ctxt->sax->error(ctxt->userData,
2791 "xmlParseExternalID: PUBLIC, no URI\n");
2792 ctxt->wellFormed = 0;
2793 ctxt->disableSAX = 1;
2794 }
2795 }
2796 return(URI);
2797}
2798
2799/**
2800 * xmlParseComment:
2801 * @ctxt: an XML parser context
2802 *
2803 * Skip an XML (SGML) comment <!-- .... -->
2804 * The spec says that "For compatibility, the string "--" (double-hyphen)
2805 * must not occur within comments. "
2806 *
2807 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2808 */
2809void
2810xmlParseComment(xmlParserCtxtPtr ctxt) {
2811 xmlChar *buf = NULL;
2812 int len;
2813 int size = XML_PARSER_BUFFER_SIZE;
2814 int q, ql;
2815 int r, rl;
2816 int cur, l;
2817 xmlParserInputState state;
2818 xmlParserInputPtr input = ctxt->input;
2819 int count = 0;
2820
2821 /*
2822 * Check that there is a comment right here.
2823 */
2824 if ((RAW != '<') || (NXT(1) != '!') ||
2825 (NXT(2) != '-') || (NXT(3) != '-')) return;
2826
2827 state = ctxt->instate;
2828 ctxt->instate = XML_PARSER_COMMENT;
2829 SHRINK;
2830 SKIP(4);
2831 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2832 if (buf == NULL) {
2833 xmlGenericError(xmlGenericErrorContext,
2834 "malloc of %d byte failed\n", size);
2835 ctxt->instate = state;
2836 return;
2837 }
2838 q = CUR_CHAR(ql);
2839 NEXTL(ql);
2840 r = CUR_CHAR(rl);
2841 NEXTL(rl);
2842 cur = CUR_CHAR(l);
2843 len = 0;
2844 while (IS_CHAR(cur) && /* checked */
2845 ((cur != '>') ||
2846 (r != '-') || (q != '-'))) {
2847 if ((r == '-') && (q == '-') && (len > 1)) {
2848 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2849 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2850 ctxt->sax->error(ctxt->userData,
2851 "Comment must not contain '--' (double-hyphen)`\n");
2852 ctxt->wellFormed = 0;
2853 ctxt->disableSAX = 1;
2854 }
2855 if (len + 5 >= size) {
2856 size *= 2;
2857 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2858 if (buf == NULL) {
2859 xmlGenericError(xmlGenericErrorContext,
2860 "realloc of %d byte failed\n", size);
2861 ctxt->instate = state;
2862 return;
2863 }
2864 }
2865 COPY_BUF(ql,buf,len,q);
2866 q = r;
2867 ql = rl;
2868 r = cur;
2869 rl = l;
2870
2871 count++;
2872 if (count > 50) {
2873 GROW;
2874 count = 0;
2875 }
2876 NEXTL(l);
2877 cur = CUR_CHAR(l);
2878 if (cur == 0) {
2879 SHRINK;
2880 GROW;
2881 cur = CUR_CHAR(l);
2882 }
2883 }
2884 buf[len] = 0;
2885 if (!IS_CHAR(cur)) {
2886 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2888 ctxt->sax->error(ctxt->userData,
2889 "Comment not terminated \n<!--%.50s\n", buf);
2890 ctxt->wellFormed = 0;
2891 ctxt->disableSAX = 1;
2892 xmlFree(buf);
2893 } else {
2894 if (input != ctxt->input) {
2895 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2896 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2897 ctxt->sax->error(ctxt->userData,
2898"Comment doesn't start and stop in the same entity\n");
2899 ctxt->wellFormed = 0;
2900 ctxt->disableSAX = 1;
2901 }
2902 NEXT;
2903 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2904 (!ctxt->disableSAX))
2905 ctxt->sax->comment(ctxt->userData, buf);
2906 xmlFree(buf);
2907 }
2908 ctxt->instate = state;
2909}
2910
2911/**
2912 * xmlParsePITarget:
2913 * @ctxt: an XML parser context
2914 *
2915 * parse the name of a PI
2916 *
2917 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2918 *
2919 * Returns the PITarget name or NULL
2920 */
2921
2922xmlChar *
2923xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2924 xmlChar *name;
2925
2926 name = xmlParseName(ctxt);
2927 if ((name != NULL) &&
2928 ((name[0] == 'x') || (name[0] == 'X')) &&
2929 ((name[1] == 'm') || (name[1] == 'M')) &&
2930 ((name[2] == 'l') || (name[2] == 'L'))) {
2931 int i;
2932 if ((name[0] == 'x') && (name[1] == 'm') &&
2933 (name[2] == 'l') && (name[3] == 0)) {
2934 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2936 ctxt->sax->error(ctxt->userData,
2937 "XML declaration allowed only at the start of the document\n");
2938 ctxt->wellFormed = 0;
2939 ctxt->disableSAX = 1;
2940 return(name);
2941 } else if (name[3] == 0) {
2942 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2944 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2945 ctxt->wellFormed = 0;
2946 ctxt->disableSAX = 1;
2947 return(name);
2948 }
2949 for (i = 0;;i++) {
2950 if (xmlW3CPIs[i] == NULL) break;
2951 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2952 return(name);
2953 }
2954 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2955 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2956 ctxt->sax->warning(ctxt->userData,
2957 "xmlParsePItarget: invalid name prefix 'xml'\n");
2958 }
2959 }
2960 return(name);
2961}
2962
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002963#ifdef LIBXML_CATALOG_ENABLED
2964/**
2965 * xmlParseCatalogPI:
2966 * @ctxt: an XML parser context
2967 * @catalog: the PI value string
2968 *
2969 * parse an XML Catalog Processing Instruction.
2970 *
2971 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
2972 *
2973 * Occurs only if allowed by the user and if happening in the Misc
2974 * part of the document before any doctype informations
2975 * This will add the given catalog to the parsing context in order
2976 * to be used if there is a resolution need further down in the document
2977 */
2978
2979static void
2980xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
2981 xmlChar *URL = NULL;
2982 const xmlChar *tmp, *base;
2983 xmlChar marker;
2984
2985 tmp = catalog;
2986 while (IS_BLANK(*tmp)) tmp++;
2987 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
2988 goto error;
2989 tmp += 7;
2990 while (IS_BLANK(*tmp)) tmp++;
2991 if (*tmp != '=') {
2992 return;
2993 }
2994 tmp++;
2995 while (IS_BLANK(*tmp)) tmp++;
2996 marker = *tmp;
2997 if ((marker != '\'') && (marker != '"'))
2998 goto error;
2999 tmp++;
3000 base = tmp;
3001 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3002 if (*tmp == 0)
3003 goto error;
3004 URL = xmlStrndup(base, tmp - base);
3005 tmp++;
3006 while (IS_BLANK(*tmp)) tmp++;
3007 if (*tmp != 0)
3008 goto error;
3009
3010 if (URL != NULL) {
3011 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3012 xmlFree(URL);
3013 }
3014 return;
3015
3016error:
3017 ctxt->errNo = XML_WAR_CATALOG_PI;
3018 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3019 ctxt->sax->warning(ctxt->userData,
3020 "Catalog PI syntax error: %s\n", catalog);
3021 if (URL != NULL)
3022 xmlFree(URL);
3023}
3024#endif
3025
Owen Taylor3473f882001-02-23 17:55:21 +00003026/**
3027 * xmlParsePI:
3028 * @ctxt: an XML parser context
3029 *
3030 * parse an XML Processing Instruction.
3031 *
3032 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3033 *
3034 * The processing is transfered to SAX once parsed.
3035 */
3036
3037void
3038xmlParsePI(xmlParserCtxtPtr ctxt) {
3039 xmlChar *buf = NULL;
3040 int len = 0;
3041 int size = XML_PARSER_BUFFER_SIZE;
3042 int cur, l;
3043 xmlChar *target;
3044 xmlParserInputState state;
3045 int count = 0;
3046
3047 if ((RAW == '<') && (NXT(1) == '?')) {
3048 xmlParserInputPtr input = ctxt->input;
3049 state = ctxt->instate;
3050 ctxt->instate = XML_PARSER_PI;
3051 /*
3052 * this is a Processing Instruction.
3053 */
3054 SKIP(2);
3055 SHRINK;
3056
3057 /*
3058 * Parse the target name and check for special support like
3059 * namespace.
3060 */
3061 target = xmlParsePITarget(ctxt);
3062 if (target != NULL) {
3063 if ((RAW == '?') && (NXT(1) == '>')) {
3064 if (input != ctxt->input) {
3065 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3067 ctxt->sax->error(ctxt->userData,
3068 "PI declaration doesn't start and stop in the same entity\n");
3069 ctxt->wellFormed = 0;
3070 ctxt->disableSAX = 1;
3071 }
3072 SKIP(2);
3073
3074 /*
3075 * SAX: PI detected.
3076 */
3077 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3078 (ctxt->sax->processingInstruction != NULL))
3079 ctxt->sax->processingInstruction(ctxt->userData,
3080 target, NULL);
3081 ctxt->instate = state;
3082 xmlFree(target);
3083 return;
3084 }
3085 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3086 if (buf == NULL) {
3087 xmlGenericError(xmlGenericErrorContext,
3088 "malloc of %d byte failed\n", size);
3089 ctxt->instate = state;
3090 return;
3091 }
3092 cur = CUR;
3093 if (!IS_BLANK(cur)) {
3094 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3096 ctxt->sax->error(ctxt->userData,
3097 "xmlParsePI: PI %s space expected\n", target);
3098 ctxt->wellFormed = 0;
3099 ctxt->disableSAX = 1;
3100 }
3101 SKIP_BLANKS;
3102 cur = CUR_CHAR(l);
3103 while (IS_CHAR(cur) && /* checked */
3104 ((cur != '?') || (NXT(1) != '>'))) {
3105 if (len + 5 >= size) {
3106 size *= 2;
3107 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3108 if (buf == NULL) {
3109 xmlGenericError(xmlGenericErrorContext,
3110 "realloc of %d byte failed\n", size);
3111 ctxt->instate = state;
3112 return;
3113 }
3114 }
3115 count++;
3116 if (count > 50) {
3117 GROW;
3118 count = 0;
3119 }
3120 COPY_BUF(l,buf,len,cur);
3121 NEXTL(l);
3122 cur = CUR_CHAR(l);
3123 if (cur == 0) {
3124 SHRINK;
3125 GROW;
3126 cur = CUR_CHAR(l);
3127 }
3128 }
3129 buf[len] = 0;
3130 if (cur != '?') {
3131 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3133 ctxt->sax->error(ctxt->userData,
3134 "xmlParsePI: PI %s never end ...\n", target);
3135 ctxt->wellFormed = 0;
3136 ctxt->disableSAX = 1;
3137 } else {
3138 if (input != ctxt->input) {
3139 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3141 ctxt->sax->error(ctxt->userData,
3142 "PI declaration doesn't start and stop in the same entity\n");
3143 ctxt->wellFormed = 0;
3144 ctxt->disableSAX = 1;
3145 }
3146 SKIP(2);
3147
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003148#ifdef LIBXML_CATALOG_ENABLED
3149 if (((state == XML_PARSER_MISC) ||
3150 (state == XML_PARSER_START)) &&
3151 (xmlStrEqual(target, XML_CATALOG_PI))) {
3152 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3153 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3154 (allow == XML_CATA_ALLOW_ALL))
3155 xmlParseCatalogPI(ctxt, buf);
3156 }
3157#endif
3158
3159
Owen Taylor3473f882001-02-23 17:55:21 +00003160 /*
3161 * SAX: PI detected.
3162 */
3163 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3164 (ctxt->sax->processingInstruction != NULL))
3165 ctxt->sax->processingInstruction(ctxt->userData,
3166 target, buf);
3167 }
3168 xmlFree(buf);
3169 xmlFree(target);
3170 } else {
3171 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3172 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3173 ctxt->sax->error(ctxt->userData,
3174 "xmlParsePI : no target name\n");
3175 ctxt->wellFormed = 0;
3176 ctxt->disableSAX = 1;
3177 }
3178 ctxt->instate = state;
3179 }
3180}
3181
3182/**
3183 * xmlParseNotationDecl:
3184 * @ctxt: an XML parser context
3185 *
3186 * parse a notation declaration
3187 *
3188 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3189 *
3190 * Hence there is actually 3 choices:
3191 * 'PUBLIC' S PubidLiteral
3192 * 'PUBLIC' S PubidLiteral S SystemLiteral
3193 * and 'SYSTEM' S SystemLiteral
3194 *
3195 * See the NOTE on xmlParseExternalID().
3196 */
3197
3198void
3199xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3200 xmlChar *name;
3201 xmlChar *Pubid;
3202 xmlChar *Systemid;
3203
3204 if ((RAW == '<') && (NXT(1) == '!') &&
3205 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3206 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3207 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3208 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3209 xmlParserInputPtr input = ctxt->input;
3210 SHRINK;
3211 SKIP(10);
3212 if (!IS_BLANK(CUR)) {
3213 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3215 ctxt->sax->error(ctxt->userData,
3216 "Space required after '<!NOTATION'\n");
3217 ctxt->wellFormed = 0;
3218 ctxt->disableSAX = 1;
3219 return;
3220 }
3221 SKIP_BLANKS;
3222
Daniel Veillard76d66f42001-05-16 21:05:17 +00003223 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003224 if (name == NULL) {
3225 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3227 ctxt->sax->error(ctxt->userData,
3228 "NOTATION: Name expected here\n");
3229 ctxt->wellFormed = 0;
3230 ctxt->disableSAX = 1;
3231 return;
3232 }
3233 if (!IS_BLANK(CUR)) {
3234 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3236 ctxt->sax->error(ctxt->userData,
3237 "Space required after the NOTATION name'\n");
3238 ctxt->wellFormed = 0;
3239 ctxt->disableSAX = 1;
3240 return;
3241 }
3242 SKIP_BLANKS;
3243
3244 /*
3245 * Parse the IDs.
3246 */
3247 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3248 SKIP_BLANKS;
3249
3250 if (RAW == '>') {
3251 if (input != ctxt->input) {
3252 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3254 ctxt->sax->error(ctxt->userData,
3255"Notation declaration doesn't start and stop in the same entity\n");
3256 ctxt->wellFormed = 0;
3257 ctxt->disableSAX = 1;
3258 }
3259 NEXT;
3260 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3261 (ctxt->sax->notationDecl != NULL))
3262 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3263 } else {
3264 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3266 ctxt->sax->error(ctxt->userData,
3267 "'>' required to close NOTATION declaration\n");
3268 ctxt->wellFormed = 0;
3269 ctxt->disableSAX = 1;
3270 }
3271 xmlFree(name);
3272 if (Systemid != NULL) xmlFree(Systemid);
3273 if (Pubid != NULL) xmlFree(Pubid);
3274 }
3275}
3276
3277/**
3278 * xmlParseEntityDecl:
3279 * @ctxt: an XML parser context
3280 *
3281 * parse <!ENTITY declarations
3282 *
3283 * [70] EntityDecl ::= GEDecl | PEDecl
3284 *
3285 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3286 *
3287 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3288 *
3289 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3290 *
3291 * [74] PEDef ::= EntityValue | ExternalID
3292 *
3293 * [76] NDataDecl ::= S 'NDATA' S Name
3294 *
3295 * [ VC: Notation Declared ]
3296 * The Name must match the declared name of a notation.
3297 */
3298
3299void
3300xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3301 xmlChar *name = NULL;
3302 xmlChar *value = NULL;
3303 xmlChar *URI = NULL, *literal = NULL;
3304 xmlChar *ndata = NULL;
3305 int isParameter = 0;
3306 xmlChar *orig = NULL;
3307
3308 GROW;
3309 if ((RAW == '<') && (NXT(1) == '!') &&
3310 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3311 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3312 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3313 xmlParserInputPtr input = ctxt->input;
3314 ctxt->instate = XML_PARSER_ENTITY_DECL;
3315 SHRINK;
3316 SKIP(8);
3317 if (!IS_BLANK(CUR)) {
3318 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3320 ctxt->sax->error(ctxt->userData,
3321 "Space required after '<!ENTITY'\n");
3322 ctxt->wellFormed = 0;
3323 ctxt->disableSAX = 1;
3324 }
3325 SKIP_BLANKS;
3326
3327 if (RAW == '%') {
3328 NEXT;
3329 if (!IS_BLANK(CUR)) {
3330 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3332 ctxt->sax->error(ctxt->userData,
3333 "Space required after '%'\n");
3334 ctxt->wellFormed = 0;
3335 ctxt->disableSAX = 1;
3336 }
3337 SKIP_BLANKS;
3338 isParameter = 1;
3339 }
3340
Daniel Veillard76d66f42001-05-16 21:05:17 +00003341 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003342 if (name == NULL) {
3343 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3344 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3345 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3346 ctxt->wellFormed = 0;
3347 ctxt->disableSAX = 1;
3348 return;
3349 }
3350 if (!IS_BLANK(CUR)) {
3351 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3353 ctxt->sax->error(ctxt->userData,
3354 "Space required after the entity name\n");
3355 ctxt->wellFormed = 0;
3356 ctxt->disableSAX = 1;
3357 }
3358 SKIP_BLANKS;
3359
3360 /*
3361 * handle the various case of definitions...
3362 */
3363 if (isParameter) {
3364 if ((RAW == '"') || (RAW == '\'')) {
3365 value = xmlParseEntityValue(ctxt, &orig);
3366 if (value) {
3367 if ((ctxt->sax != NULL) &&
3368 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3369 ctxt->sax->entityDecl(ctxt->userData, name,
3370 XML_INTERNAL_PARAMETER_ENTITY,
3371 NULL, NULL, value);
3372 }
3373 } else {
3374 URI = xmlParseExternalID(ctxt, &literal, 1);
3375 if ((URI == NULL) && (literal == NULL)) {
3376 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3378 ctxt->sax->error(ctxt->userData,
3379 "Entity value required\n");
3380 ctxt->wellFormed = 0;
3381 ctxt->disableSAX = 1;
3382 }
3383 if (URI) {
3384 xmlURIPtr uri;
3385
3386 uri = xmlParseURI((const char *) URI);
3387 if (uri == NULL) {
3388 ctxt->errNo = XML_ERR_INVALID_URI;
3389 if ((ctxt->sax != NULL) &&
3390 (!ctxt->disableSAX) &&
3391 (ctxt->sax->error != NULL))
3392 ctxt->sax->error(ctxt->userData,
3393 "Invalid URI: %s\n", URI);
3394 ctxt->wellFormed = 0;
3395 } else {
3396 if (uri->fragment != NULL) {
3397 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3398 if ((ctxt->sax != NULL) &&
3399 (!ctxt->disableSAX) &&
3400 (ctxt->sax->error != NULL))
3401 ctxt->sax->error(ctxt->userData,
3402 "Fragment not allowed: %s\n", URI);
3403 ctxt->wellFormed = 0;
3404 } else {
3405 if ((ctxt->sax != NULL) &&
3406 (!ctxt->disableSAX) &&
3407 (ctxt->sax->entityDecl != NULL))
3408 ctxt->sax->entityDecl(ctxt->userData, name,
3409 XML_EXTERNAL_PARAMETER_ENTITY,
3410 literal, URI, NULL);
3411 }
3412 xmlFreeURI(uri);
3413 }
3414 }
3415 }
3416 } else {
3417 if ((RAW == '"') || (RAW == '\'')) {
3418 value = xmlParseEntityValue(ctxt, &orig);
3419 if ((ctxt->sax != NULL) &&
3420 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3421 ctxt->sax->entityDecl(ctxt->userData, name,
3422 XML_INTERNAL_GENERAL_ENTITY,
3423 NULL, NULL, value);
3424 } else {
3425 URI = xmlParseExternalID(ctxt, &literal, 1);
3426 if ((URI == NULL) && (literal == NULL)) {
3427 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3428 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3429 ctxt->sax->error(ctxt->userData,
3430 "Entity value required\n");
3431 ctxt->wellFormed = 0;
3432 ctxt->disableSAX = 1;
3433 }
3434 if (URI) {
3435 xmlURIPtr uri;
3436
3437 uri = xmlParseURI((const char *)URI);
3438 if (uri == NULL) {
3439 ctxt->errNo = XML_ERR_INVALID_URI;
3440 if ((ctxt->sax != NULL) &&
3441 (!ctxt->disableSAX) &&
3442 (ctxt->sax->error != NULL))
3443 ctxt->sax->error(ctxt->userData,
3444 "Invalid URI: %s\n", URI);
3445 ctxt->wellFormed = 0;
3446 } else {
3447 if (uri->fragment != NULL) {
3448 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3449 if ((ctxt->sax != NULL) &&
3450 (!ctxt->disableSAX) &&
3451 (ctxt->sax->error != NULL))
3452 ctxt->sax->error(ctxt->userData,
3453 "Fragment not allowed: %s\n", URI);
3454 ctxt->wellFormed = 0;
3455 }
3456 xmlFreeURI(uri);
3457 }
3458 }
3459 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3460 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3462 ctxt->sax->error(ctxt->userData,
3463 "Space required before 'NDATA'\n");
3464 ctxt->wellFormed = 0;
3465 ctxt->disableSAX = 1;
3466 }
3467 SKIP_BLANKS;
3468 if ((RAW == 'N') && (NXT(1) == 'D') &&
3469 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3470 (NXT(4) == 'A')) {
3471 SKIP(5);
3472 if (!IS_BLANK(CUR)) {
3473 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3475 ctxt->sax->error(ctxt->userData,
3476 "Space required after 'NDATA'\n");
3477 ctxt->wellFormed = 0;
3478 ctxt->disableSAX = 1;
3479 }
3480 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003481 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003482 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3483 (ctxt->sax->unparsedEntityDecl != NULL))
3484 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3485 literal, URI, ndata);
3486 } else {
3487 if ((ctxt->sax != NULL) &&
3488 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3489 ctxt->sax->entityDecl(ctxt->userData, name,
3490 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3491 literal, URI, NULL);
3492 }
3493 }
3494 }
3495 SKIP_BLANKS;
3496 if (RAW != '>') {
3497 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3499 ctxt->sax->error(ctxt->userData,
3500 "xmlParseEntityDecl: entity %s not terminated\n", name);
3501 ctxt->wellFormed = 0;
3502 ctxt->disableSAX = 1;
3503 } else {
3504 if (input != ctxt->input) {
3505 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3507 ctxt->sax->error(ctxt->userData,
3508"Entity declaration doesn't start and stop in the same entity\n");
3509 ctxt->wellFormed = 0;
3510 ctxt->disableSAX = 1;
3511 }
3512 NEXT;
3513 }
3514 if (orig != NULL) {
3515 /*
3516 * Ugly mechanism to save the raw entity value.
3517 */
3518 xmlEntityPtr cur = NULL;
3519
3520 if (isParameter) {
3521 if ((ctxt->sax != NULL) &&
3522 (ctxt->sax->getParameterEntity != NULL))
3523 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3524 } else {
3525 if ((ctxt->sax != NULL) &&
3526 (ctxt->sax->getEntity != NULL))
3527 cur = ctxt->sax->getEntity(ctxt->userData, name);
3528 }
3529 if (cur != NULL) {
3530 if (cur->orig != NULL)
3531 xmlFree(orig);
3532 else
3533 cur->orig = orig;
3534 } else
3535 xmlFree(orig);
3536 }
3537 if (name != NULL) xmlFree(name);
3538 if (value != NULL) xmlFree(value);
3539 if (URI != NULL) xmlFree(URI);
3540 if (literal != NULL) xmlFree(literal);
3541 if (ndata != NULL) xmlFree(ndata);
3542 }
3543}
3544
3545/**
3546 * xmlParseDefaultDecl:
3547 * @ctxt: an XML parser context
3548 * @value: Receive a possible fixed default value for the attribute
3549 *
3550 * Parse an attribute default declaration
3551 *
3552 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3553 *
3554 * [ VC: Required Attribute ]
3555 * if the default declaration is the keyword #REQUIRED, then the
3556 * attribute must be specified for all elements of the type in the
3557 * attribute-list declaration.
3558 *
3559 * [ VC: Attribute Default Legal ]
3560 * The declared default value must meet the lexical constraints of
3561 * the declared attribute type c.f. xmlValidateAttributeDecl()
3562 *
3563 * [ VC: Fixed Attribute Default ]
3564 * if an attribute has a default value declared with the #FIXED
3565 * keyword, instances of that attribute must match the default value.
3566 *
3567 * [ WFC: No < in Attribute Values ]
3568 * handled in xmlParseAttValue()
3569 *
3570 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3571 * or XML_ATTRIBUTE_FIXED.
3572 */
3573
3574int
3575xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3576 int val;
3577 xmlChar *ret;
3578
3579 *value = NULL;
3580 if ((RAW == '#') && (NXT(1) == 'R') &&
3581 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3582 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3583 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3584 (NXT(8) == 'D')) {
3585 SKIP(9);
3586 return(XML_ATTRIBUTE_REQUIRED);
3587 }
3588 if ((RAW == '#') && (NXT(1) == 'I') &&
3589 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3590 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3591 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3592 SKIP(8);
3593 return(XML_ATTRIBUTE_IMPLIED);
3594 }
3595 val = XML_ATTRIBUTE_NONE;
3596 if ((RAW == '#') && (NXT(1) == 'F') &&
3597 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3598 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3599 SKIP(6);
3600 val = XML_ATTRIBUTE_FIXED;
3601 if (!IS_BLANK(CUR)) {
3602 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3604 ctxt->sax->error(ctxt->userData,
3605 "Space required after '#FIXED'\n");
3606 ctxt->wellFormed = 0;
3607 ctxt->disableSAX = 1;
3608 }
3609 SKIP_BLANKS;
3610 }
3611 ret = xmlParseAttValue(ctxt);
3612 ctxt->instate = XML_PARSER_DTD;
3613 if (ret == NULL) {
3614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3615 ctxt->sax->error(ctxt->userData,
3616 "Attribute default value declaration error\n");
3617 ctxt->wellFormed = 0;
3618 ctxt->disableSAX = 1;
3619 } else
3620 *value = ret;
3621 return(val);
3622}
3623
3624/**
3625 * xmlParseNotationType:
3626 * @ctxt: an XML parser context
3627 *
3628 * parse an Notation attribute type.
3629 *
3630 * Note: the leading 'NOTATION' S part has already being parsed...
3631 *
3632 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3633 *
3634 * [ VC: Notation Attributes ]
3635 * Values of this type must match one of the notation names included
3636 * in the declaration; all notation names in the declaration must be declared.
3637 *
3638 * Returns: the notation attribute tree built while parsing
3639 */
3640
3641xmlEnumerationPtr
3642xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3643 xmlChar *name;
3644 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3645
3646 if (RAW != '(') {
3647 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3649 ctxt->sax->error(ctxt->userData,
3650 "'(' required to start 'NOTATION'\n");
3651 ctxt->wellFormed = 0;
3652 ctxt->disableSAX = 1;
3653 return(NULL);
3654 }
3655 SHRINK;
3656 do {
3657 NEXT;
3658 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003659 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003660 if (name == NULL) {
3661 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3663 ctxt->sax->error(ctxt->userData,
3664 "Name expected in NOTATION declaration\n");
3665 ctxt->wellFormed = 0;
3666 ctxt->disableSAX = 1;
3667 return(ret);
3668 }
3669 cur = xmlCreateEnumeration(name);
3670 xmlFree(name);
3671 if (cur == NULL) return(ret);
3672 if (last == NULL) ret = last = cur;
3673 else {
3674 last->next = cur;
3675 last = cur;
3676 }
3677 SKIP_BLANKS;
3678 } while (RAW == '|');
3679 if (RAW != ')') {
3680 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3681 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3682 ctxt->sax->error(ctxt->userData,
3683 "')' required to finish NOTATION declaration\n");
3684 ctxt->wellFormed = 0;
3685 ctxt->disableSAX = 1;
3686 if ((last != NULL) && (last != ret))
3687 xmlFreeEnumeration(last);
3688 return(ret);
3689 }
3690 NEXT;
3691 return(ret);
3692}
3693
3694/**
3695 * xmlParseEnumerationType:
3696 * @ctxt: an XML parser context
3697 *
3698 * parse an Enumeration attribute type.
3699 *
3700 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3701 *
3702 * [ VC: Enumeration ]
3703 * Values of this type must match one of the Nmtoken tokens in
3704 * the declaration
3705 *
3706 * Returns: the enumeration attribute tree built while parsing
3707 */
3708
3709xmlEnumerationPtr
3710xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3711 xmlChar *name;
3712 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3713
3714 if (RAW != '(') {
3715 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3717 ctxt->sax->error(ctxt->userData,
3718 "'(' required to start ATTLIST enumeration\n");
3719 ctxt->wellFormed = 0;
3720 ctxt->disableSAX = 1;
3721 return(NULL);
3722 }
3723 SHRINK;
3724 do {
3725 NEXT;
3726 SKIP_BLANKS;
3727 name = xmlParseNmtoken(ctxt);
3728 if (name == NULL) {
3729 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3731 ctxt->sax->error(ctxt->userData,
3732 "NmToken expected in ATTLIST enumeration\n");
3733 ctxt->wellFormed = 0;
3734 ctxt->disableSAX = 1;
3735 return(ret);
3736 }
3737 cur = xmlCreateEnumeration(name);
3738 xmlFree(name);
3739 if (cur == NULL) return(ret);
3740 if (last == NULL) ret = last = cur;
3741 else {
3742 last->next = cur;
3743 last = cur;
3744 }
3745 SKIP_BLANKS;
3746 } while (RAW == '|');
3747 if (RAW != ')') {
3748 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3750 ctxt->sax->error(ctxt->userData,
3751 "')' required to finish ATTLIST enumeration\n");
3752 ctxt->wellFormed = 0;
3753 ctxt->disableSAX = 1;
3754 return(ret);
3755 }
3756 NEXT;
3757 return(ret);
3758}
3759
3760/**
3761 * xmlParseEnumeratedType:
3762 * @ctxt: an XML parser context
3763 * @tree: the enumeration tree built while parsing
3764 *
3765 * parse an Enumerated attribute type.
3766 *
3767 * [57] EnumeratedType ::= NotationType | Enumeration
3768 *
3769 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3770 *
3771 *
3772 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3773 */
3774
3775int
3776xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3777 if ((RAW == 'N') && (NXT(1) == 'O') &&
3778 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3779 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3780 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3781 SKIP(8);
3782 if (!IS_BLANK(CUR)) {
3783 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3785 ctxt->sax->error(ctxt->userData,
3786 "Space required after 'NOTATION'\n");
3787 ctxt->wellFormed = 0;
3788 ctxt->disableSAX = 1;
3789 return(0);
3790 }
3791 SKIP_BLANKS;
3792 *tree = xmlParseNotationType(ctxt);
3793 if (*tree == NULL) return(0);
3794 return(XML_ATTRIBUTE_NOTATION);
3795 }
3796 *tree = xmlParseEnumerationType(ctxt);
3797 if (*tree == NULL) return(0);
3798 return(XML_ATTRIBUTE_ENUMERATION);
3799}
3800
3801/**
3802 * xmlParseAttributeType:
3803 * @ctxt: an XML parser context
3804 * @tree: the enumeration tree built while parsing
3805 *
3806 * parse the Attribute list def for an element
3807 *
3808 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3809 *
3810 * [55] StringType ::= 'CDATA'
3811 *
3812 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3813 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3814 *
3815 * Validity constraints for attribute values syntax are checked in
3816 * xmlValidateAttributeValue()
3817 *
3818 * [ VC: ID ]
3819 * Values of type ID must match the Name production. A name must not
3820 * appear more than once in an XML document as a value of this type;
3821 * i.e., ID values must uniquely identify the elements which bear them.
3822 *
3823 * [ VC: One ID per Element Type ]
3824 * No element type may have more than one ID attribute specified.
3825 *
3826 * [ VC: ID Attribute Default ]
3827 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3828 *
3829 * [ VC: IDREF ]
3830 * Values of type IDREF must match the Name production, and values
3831 * of type IDREFS must match Names; each IDREF Name must match the value
3832 * of an ID attribute on some element in the XML document; i.e. IDREF
3833 * values must match the value of some ID attribute.
3834 *
3835 * [ VC: Entity Name ]
3836 * Values of type ENTITY must match the Name production, values
3837 * of type ENTITIES must match Names; each Entity Name must match the
3838 * name of an unparsed entity declared in the DTD.
3839 *
3840 * [ VC: Name Token ]
3841 * Values of type NMTOKEN must match the Nmtoken production; values
3842 * of type NMTOKENS must match Nmtokens.
3843 *
3844 * Returns the attribute type
3845 */
3846int
3847xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3848 SHRINK;
3849 if ((RAW == 'C') && (NXT(1) == 'D') &&
3850 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3851 (NXT(4) == 'A')) {
3852 SKIP(5);
3853 return(XML_ATTRIBUTE_CDATA);
3854 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3855 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3856 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3857 SKIP(6);
3858 return(XML_ATTRIBUTE_IDREFS);
3859 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3860 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3861 (NXT(4) == 'F')) {
3862 SKIP(5);
3863 return(XML_ATTRIBUTE_IDREF);
3864 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3865 SKIP(2);
3866 return(XML_ATTRIBUTE_ID);
3867 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3868 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3869 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3870 SKIP(6);
3871 return(XML_ATTRIBUTE_ENTITY);
3872 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3873 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3874 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3875 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3876 SKIP(8);
3877 return(XML_ATTRIBUTE_ENTITIES);
3878 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3879 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3880 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3881 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3882 SKIP(8);
3883 return(XML_ATTRIBUTE_NMTOKENS);
3884 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3885 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3886 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3887 (NXT(6) == 'N')) {
3888 SKIP(7);
3889 return(XML_ATTRIBUTE_NMTOKEN);
3890 }
3891 return(xmlParseEnumeratedType(ctxt, tree));
3892}
3893
3894/**
3895 * xmlParseAttributeListDecl:
3896 * @ctxt: an XML parser context
3897 *
3898 * : parse the Attribute list def for an element
3899 *
3900 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3901 *
3902 * [53] AttDef ::= S Name S AttType S DefaultDecl
3903 *
3904 */
3905void
3906xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3907 xmlChar *elemName;
3908 xmlChar *attrName;
3909 xmlEnumerationPtr tree;
3910
3911 if ((RAW == '<') && (NXT(1) == '!') &&
3912 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3913 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3914 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3915 (NXT(8) == 'T')) {
3916 xmlParserInputPtr input = ctxt->input;
3917
3918 SKIP(9);
3919 if (!IS_BLANK(CUR)) {
3920 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3922 ctxt->sax->error(ctxt->userData,
3923 "Space required after '<!ATTLIST'\n");
3924 ctxt->wellFormed = 0;
3925 ctxt->disableSAX = 1;
3926 }
3927 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003928 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003929 if (elemName == NULL) {
3930 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3932 ctxt->sax->error(ctxt->userData,
3933 "ATTLIST: no name for Element\n");
3934 ctxt->wellFormed = 0;
3935 ctxt->disableSAX = 1;
3936 return;
3937 }
3938 SKIP_BLANKS;
3939 GROW;
3940 while (RAW != '>') {
3941 const xmlChar *check = CUR_PTR;
3942 int type;
3943 int def;
3944 xmlChar *defaultValue = NULL;
3945
3946 GROW;
3947 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003948 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003949 if (attrName == NULL) {
3950 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3952 ctxt->sax->error(ctxt->userData,
3953 "ATTLIST: no name for Attribute\n");
3954 ctxt->wellFormed = 0;
3955 ctxt->disableSAX = 1;
3956 break;
3957 }
3958 GROW;
3959 if (!IS_BLANK(CUR)) {
3960 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3961 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3962 ctxt->sax->error(ctxt->userData,
3963 "Space required after the attribute name\n");
3964 ctxt->wellFormed = 0;
3965 ctxt->disableSAX = 1;
3966 if (attrName != NULL)
3967 xmlFree(attrName);
3968 if (defaultValue != NULL)
3969 xmlFree(defaultValue);
3970 break;
3971 }
3972 SKIP_BLANKS;
3973
3974 type = xmlParseAttributeType(ctxt, &tree);
3975 if (type <= 0) {
3976 if (attrName != NULL)
3977 xmlFree(attrName);
3978 if (defaultValue != NULL)
3979 xmlFree(defaultValue);
3980 break;
3981 }
3982
3983 GROW;
3984 if (!IS_BLANK(CUR)) {
3985 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3987 ctxt->sax->error(ctxt->userData,
3988 "Space required after the attribute type\n");
3989 ctxt->wellFormed = 0;
3990 ctxt->disableSAX = 1;
3991 if (attrName != NULL)
3992 xmlFree(attrName);
3993 if (defaultValue != NULL)
3994 xmlFree(defaultValue);
3995 if (tree != NULL)
3996 xmlFreeEnumeration(tree);
3997 break;
3998 }
3999 SKIP_BLANKS;
4000
4001 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4002 if (def <= 0) {
4003 if (attrName != NULL)
4004 xmlFree(attrName);
4005 if (defaultValue != NULL)
4006 xmlFree(defaultValue);
4007 if (tree != NULL)
4008 xmlFreeEnumeration(tree);
4009 break;
4010 }
4011
4012 GROW;
4013 if (RAW != '>') {
4014 if (!IS_BLANK(CUR)) {
4015 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4017 ctxt->sax->error(ctxt->userData,
4018 "Space required after the attribute default value\n");
4019 ctxt->wellFormed = 0;
4020 ctxt->disableSAX = 1;
4021 if (attrName != NULL)
4022 xmlFree(attrName);
4023 if (defaultValue != NULL)
4024 xmlFree(defaultValue);
4025 if (tree != NULL)
4026 xmlFreeEnumeration(tree);
4027 break;
4028 }
4029 SKIP_BLANKS;
4030 }
4031 if (check == CUR_PTR) {
4032 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4034 ctxt->sax->error(ctxt->userData,
4035 "xmlParseAttributeListDecl: detected internal error\n");
4036 if (attrName != NULL)
4037 xmlFree(attrName);
4038 if (defaultValue != NULL)
4039 xmlFree(defaultValue);
4040 if (tree != NULL)
4041 xmlFreeEnumeration(tree);
4042 break;
4043 }
4044 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4045 (ctxt->sax->attributeDecl != NULL))
4046 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4047 type, def, defaultValue, tree);
4048 if (attrName != NULL)
4049 xmlFree(attrName);
4050 if (defaultValue != NULL)
4051 xmlFree(defaultValue);
4052 GROW;
4053 }
4054 if (RAW == '>') {
4055 if (input != ctxt->input) {
4056 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4058 ctxt->sax->error(ctxt->userData,
4059"Attribute list declaration doesn't start and stop in the same entity\n");
4060 ctxt->wellFormed = 0;
4061 ctxt->disableSAX = 1;
4062 }
4063 NEXT;
4064 }
4065
4066 xmlFree(elemName);
4067 }
4068}
4069
4070/**
4071 * xmlParseElementMixedContentDecl:
4072 * @ctxt: an XML parser context
4073 *
4074 * parse the declaration for a Mixed Element content
4075 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4076 *
4077 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4078 * '(' S? '#PCDATA' S? ')'
4079 *
4080 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4081 *
4082 * [ VC: No Duplicate Types ]
4083 * The same name must not appear more than once in a single
4084 * mixed-content declaration.
4085 *
4086 * returns: the list of the xmlElementContentPtr describing the element choices
4087 */
4088xmlElementContentPtr
4089xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
4090 xmlElementContentPtr ret = NULL, cur = NULL, n;
4091 xmlChar *elem = NULL;
4092
4093 GROW;
4094 if ((RAW == '#') && (NXT(1) == 'P') &&
4095 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4096 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4097 (NXT(6) == 'A')) {
4098 SKIP(7);
4099 SKIP_BLANKS;
4100 SHRINK;
4101 if (RAW == ')') {
4102 ctxt->entity = ctxt->input;
4103 NEXT;
4104 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4105 if (RAW == '*') {
4106 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4107 NEXT;
4108 }
4109 return(ret);
4110 }
4111 if ((RAW == '(') || (RAW == '|')) {
4112 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4113 if (ret == NULL) return(NULL);
4114 }
4115 while (RAW == '|') {
4116 NEXT;
4117 if (elem == NULL) {
4118 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4119 if (ret == NULL) return(NULL);
4120 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004121 if (cur != NULL)
4122 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004123 cur = ret;
4124 } else {
4125 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4126 if (n == NULL) return(NULL);
4127 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004128 if (n->c1 != NULL)
4129 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004130 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004131 if (n != NULL)
4132 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004133 cur = n;
4134 xmlFree(elem);
4135 }
4136 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004137 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004138 if (elem == NULL) {
4139 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4141 ctxt->sax->error(ctxt->userData,
4142 "xmlParseElementMixedContentDecl : Name expected\n");
4143 ctxt->wellFormed = 0;
4144 ctxt->disableSAX = 1;
4145 xmlFreeElementContent(cur);
4146 return(NULL);
4147 }
4148 SKIP_BLANKS;
4149 GROW;
4150 }
4151 if ((RAW == ')') && (NXT(1) == '*')) {
4152 if (elem != NULL) {
4153 cur->c2 = xmlNewElementContent(elem,
4154 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004155 if (cur->c2 != NULL)
4156 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004157 xmlFree(elem);
4158 }
4159 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4160 ctxt->entity = ctxt->input;
4161 SKIP(2);
4162 } else {
4163 if (elem != NULL) xmlFree(elem);
4164 xmlFreeElementContent(ret);
4165 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4167 ctxt->sax->error(ctxt->userData,
4168 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4169 ctxt->wellFormed = 0;
4170 ctxt->disableSAX = 1;
4171 return(NULL);
4172 }
4173
4174 } else {
4175 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4177 ctxt->sax->error(ctxt->userData,
4178 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4179 ctxt->wellFormed = 0;
4180 ctxt->disableSAX = 1;
4181 }
4182 return(ret);
4183}
4184
4185/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004186 * xmlParseElementChildrenContentD:
4187 * @ctxt: an XML parser context
4188 *
4189 * VMS version of xmlParseElementChildrenContentDecl()
4190 *
4191 * Returns the tree of xmlElementContentPtr describing the element
4192 * hierarchy.
4193 */
4194/**
Owen Taylor3473f882001-02-23 17:55:21 +00004195 * xmlParseElementChildrenContentDecl:
4196 * @ctxt: an XML parser context
4197 *
4198 * parse the declaration for a Mixed Element content
4199 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4200 *
4201 *
4202 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4203 *
4204 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4205 *
4206 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4207 *
4208 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4209 *
4210 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4211 * TODO Parameter-entity replacement text must be properly nested
4212 * with parenthetized groups. That is to say, if either of the
4213 * opening or closing parentheses in a choice, seq, or Mixed
4214 * construct is contained in the replacement text for a parameter
4215 * entity, both must be contained in the same replacement text. For
4216 * interoperability, if a parameter-entity reference appears in a
4217 * choice, seq, or Mixed construct, its replacement text should not
4218 * be empty, and neither the first nor last non-blank character of
4219 * the replacement text should be a connector (| or ,).
4220 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004221 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004222 * hierarchy.
4223 */
4224xmlElementContentPtr
4225#ifdef VMS
4226xmlParseElementChildrenContentD
4227#else
4228xmlParseElementChildrenContentDecl
4229#endif
4230(xmlParserCtxtPtr ctxt) {
4231 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4232 xmlChar *elem;
4233 xmlChar type = 0;
4234
4235 SKIP_BLANKS;
4236 GROW;
4237 if (RAW == '(') {
4238 /* Recurse on first child */
4239 NEXT;
4240 SKIP_BLANKS;
4241 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4242 SKIP_BLANKS;
4243 GROW;
4244 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004245 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004246 if (elem == NULL) {
4247 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4249 ctxt->sax->error(ctxt->userData,
4250 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4251 ctxt->wellFormed = 0;
4252 ctxt->disableSAX = 1;
4253 return(NULL);
4254 }
4255 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4256 GROW;
4257 if (RAW == '?') {
4258 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4259 NEXT;
4260 } else if (RAW == '*') {
4261 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4262 NEXT;
4263 } else if (RAW == '+') {
4264 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4265 NEXT;
4266 } else {
4267 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4268 }
4269 xmlFree(elem);
4270 GROW;
4271 }
4272 SKIP_BLANKS;
4273 SHRINK;
4274 while (RAW != ')') {
4275 /*
4276 * Each loop we parse one separator and one element.
4277 */
4278 if (RAW == ',') {
4279 if (type == 0) type = CUR;
4280
4281 /*
4282 * Detect "Name | Name , Name" error
4283 */
4284 else if (type != CUR) {
4285 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4287 ctxt->sax->error(ctxt->userData,
4288 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4289 type);
4290 ctxt->wellFormed = 0;
4291 ctxt->disableSAX = 1;
4292 if ((op != NULL) && (op != ret))
4293 xmlFreeElementContent(op);
4294 if ((last != NULL) && (last != ret) &&
4295 (last != ret->c1) && (last != ret->c2))
4296 xmlFreeElementContent(last);
4297 if (ret != NULL)
4298 xmlFreeElementContent(ret);
4299 return(NULL);
4300 }
4301 NEXT;
4302
4303 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4304 if (op == NULL) {
4305 xmlFreeElementContent(ret);
4306 return(NULL);
4307 }
4308 if (last == NULL) {
4309 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004310 if (ret != NULL)
4311 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004312 ret = cur = op;
4313 } else {
4314 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004315 if (op != NULL)
4316 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004317 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004318 if (last != NULL)
4319 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004320 cur =op;
4321 last = NULL;
4322 }
4323 } else if (RAW == '|') {
4324 if (type == 0) type = CUR;
4325
4326 /*
4327 * Detect "Name , Name | Name" error
4328 */
4329 else if (type != CUR) {
4330 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4332 ctxt->sax->error(ctxt->userData,
4333 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4334 type);
4335 ctxt->wellFormed = 0;
4336 ctxt->disableSAX = 1;
4337 if ((op != NULL) && (op != ret) && (op != last))
4338 xmlFreeElementContent(op);
4339 if ((last != NULL) && (last != ret) &&
4340 (last != ret->c1) && (last != ret->c2))
4341 xmlFreeElementContent(last);
4342 if (ret != NULL)
4343 xmlFreeElementContent(ret);
4344 return(NULL);
4345 }
4346 NEXT;
4347
4348 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4349 if (op == NULL) {
4350 if ((op != NULL) && (op != ret))
4351 xmlFreeElementContent(op);
4352 if ((last != NULL) && (last != ret) &&
4353 (last != ret->c1) && (last != ret->c2))
4354 xmlFreeElementContent(last);
4355 if (ret != NULL)
4356 xmlFreeElementContent(ret);
4357 return(NULL);
4358 }
4359 if (last == NULL) {
4360 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004361 if (ret != NULL)
4362 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004363 ret = cur = op;
4364 } else {
4365 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004366 if (op != NULL)
4367 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004368 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004369 if (last != NULL)
4370 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004371 cur =op;
4372 last = NULL;
4373 }
4374 } else {
4375 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4377 ctxt->sax->error(ctxt->userData,
4378 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4379 ctxt->wellFormed = 0;
4380 ctxt->disableSAX = 1;
4381 if ((op != NULL) && (op != ret))
4382 xmlFreeElementContent(op);
4383 if ((last != NULL) && (last != ret) &&
4384 (last != ret->c1) && (last != ret->c2))
4385 xmlFreeElementContent(last);
4386 if (ret != NULL)
4387 xmlFreeElementContent(ret);
4388 return(NULL);
4389 }
4390 GROW;
4391 SKIP_BLANKS;
4392 GROW;
4393 if (RAW == '(') {
4394 /* Recurse on second child */
4395 NEXT;
4396 SKIP_BLANKS;
4397 last = xmlParseElementChildrenContentDecl(ctxt);
4398 SKIP_BLANKS;
4399 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004400 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004401 if (elem == NULL) {
4402 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4404 ctxt->sax->error(ctxt->userData,
4405 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4406 ctxt->wellFormed = 0;
4407 ctxt->disableSAX = 1;
4408 if ((op != NULL) && (op != ret))
4409 xmlFreeElementContent(op);
4410 if ((last != NULL) && (last != ret) &&
4411 (last != ret->c1) && (last != ret->c2))
4412 xmlFreeElementContent(last);
4413 if (ret != NULL)
4414 xmlFreeElementContent(ret);
4415 return(NULL);
4416 }
4417 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4418 xmlFree(elem);
4419 if (RAW == '?') {
4420 last->ocur = XML_ELEMENT_CONTENT_OPT;
4421 NEXT;
4422 } else if (RAW == '*') {
4423 last->ocur = XML_ELEMENT_CONTENT_MULT;
4424 NEXT;
4425 } else if (RAW == '+') {
4426 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4427 NEXT;
4428 } else {
4429 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4430 }
4431 }
4432 SKIP_BLANKS;
4433 GROW;
4434 }
4435 if ((cur != NULL) && (last != NULL)) {
4436 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004437 if (last != NULL)
4438 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004439 }
4440 ctxt->entity = ctxt->input;
4441 NEXT;
4442 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004443 if (ret != NULL)
4444 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004445 NEXT;
4446 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004447 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004448 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004449 cur = ret;
4450 /*
4451 * Some normalization:
4452 * (a | b* | c?)* == (a | b | c)*
4453 */
4454 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4455 if ((cur->c1 != NULL) &&
4456 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4457 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4458 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4459 if ((cur->c2 != NULL) &&
4460 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4461 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4462 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4463 cur = cur->c2;
4464 }
4465 }
Owen Taylor3473f882001-02-23 17:55:21 +00004466 NEXT;
4467 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004468 if (ret != NULL) {
4469 int found = 0;
4470
Daniel Veillarde470df72001-04-18 21:41:07 +00004471 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004472 /*
4473 * Some normalization:
4474 * (a | b*)+ == (a | b)*
4475 * (a | b?)+ == (a | b)*
4476 */
4477 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4478 if ((cur->c1 != NULL) &&
4479 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4480 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4481 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4482 found = 1;
4483 }
4484 if ((cur->c2 != NULL) &&
4485 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4486 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4487 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4488 found = 1;
4489 }
4490 cur = cur->c2;
4491 }
4492 if (found)
4493 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4494 }
Owen Taylor3473f882001-02-23 17:55:21 +00004495 NEXT;
4496 }
4497 return(ret);
4498}
4499
4500/**
4501 * xmlParseElementContentDecl:
4502 * @ctxt: an XML parser context
4503 * @name: the name of the element being defined.
4504 * @result: the Element Content pointer will be stored here if any
4505 *
4506 * parse the declaration for an Element content either Mixed or Children,
4507 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4508 *
4509 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4510 *
4511 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4512 */
4513
4514int
4515xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4516 xmlElementContentPtr *result) {
4517
4518 xmlElementContentPtr tree = NULL;
4519 xmlParserInputPtr input = ctxt->input;
4520 int res;
4521
4522 *result = NULL;
4523
4524 if (RAW != '(') {
4525 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4526 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4527 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004528 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004529 ctxt->wellFormed = 0;
4530 ctxt->disableSAX = 1;
4531 return(-1);
4532 }
4533 NEXT;
4534 GROW;
4535 SKIP_BLANKS;
4536 if ((RAW == '#') && (NXT(1) == 'P') &&
4537 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4538 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4539 (NXT(6) == 'A')) {
4540 tree = xmlParseElementMixedContentDecl(ctxt);
4541 res = XML_ELEMENT_TYPE_MIXED;
4542 } else {
4543 tree = xmlParseElementChildrenContentDecl(ctxt);
4544 res = XML_ELEMENT_TYPE_ELEMENT;
4545 }
4546 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4547 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4549 ctxt->sax->error(ctxt->userData,
4550"Element content declaration doesn't start and stop in the same entity\n");
4551 ctxt->wellFormed = 0;
4552 ctxt->disableSAX = 1;
4553 }
4554 SKIP_BLANKS;
4555 *result = tree;
4556 return(res);
4557}
4558
4559/**
4560 * xmlParseElementDecl:
4561 * @ctxt: an XML parser context
4562 *
4563 * parse an Element declaration.
4564 *
4565 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4566 *
4567 * [ VC: Unique Element Type Declaration ]
4568 * No element type may be declared more than once
4569 *
4570 * Returns the type of the element, or -1 in case of error
4571 */
4572int
4573xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4574 xmlChar *name;
4575 int ret = -1;
4576 xmlElementContentPtr content = NULL;
4577
4578 GROW;
4579 if ((RAW == '<') && (NXT(1) == '!') &&
4580 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4581 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4582 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4583 (NXT(8) == 'T')) {
4584 xmlParserInputPtr input = ctxt->input;
4585
4586 SKIP(9);
4587 if (!IS_BLANK(CUR)) {
4588 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4590 ctxt->sax->error(ctxt->userData,
4591 "Space required after 'ELEMENT'\n");
4592 ctxt->wellFormed = 0;
4593 ctxt->disableSAX = 1;
4594 }
4595 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004596 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004597 if (name == NULL) {
4598 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4600 ctxt->sax->error(ctxt->userData,
4601 "xmlParseElementDecl: no name for Element\n");
4602 ctxt->wellFormed = 0;
4603 ctxt->disableSAX = 1;
4604 return(-1);
4605 }
4606 while ((RAW == 0) && (ctxt->inputNr > 1))
4607 xmlPopInput(ctxt);
4608 if (!IS_BLANK(CUR)) {
4609 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4611 ctxt->sax->error(ctxt->userData,
4612 "Space required after the element name\n");
4613 ctxt->wellFormed = 0;
4614 ctxt->disableSAX = 1;
4615 }
4616 SKIP_BLANKS;
4617 if ((RAW == 'E') && (NXT(1) == 'M') &&
4618 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4619 (NXT(4) == 'Y')) {
4620 SKIP(5);
4621 /*
4622 * Element must always be empty.
4623 */
4624 ret = XML_ELEMENT_TYPE_EMPTY;
4625 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4626 (NXT(2) == 'Y')) {
4627 SKIP(3);
4628 /*
4629 * Element is a generic container.
4630 */
4631 ret = XML_ELEMENT_TYPE_ANY;
4632 } else if (RAW == '(') {
4633 ret = xmlParseElementContentDecl(ctxt, name, &content);
4634 } else {
4635 /*
4636 * [ WFC: PEs in Internal Subset ] error handling.
4637 */
4638 if ((RAW == '%') && (ctxt->external == 0) &&
4639 (ctxt->inputNr == 1)) {
4640 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4642 ctxt->sax->error(ctxt->userData,
4643 "PEReference: forbidden within markup decl in internal subset\n");
4644 } else {
4645 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4647 ctxt->sax->error(ctxt->userData,
4648 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4649 }
4650 ctxt->wellFormed = 0;
4651 ctxt->disableSAX = 1;
4652 if (name != NULL) xmlFree(name);
4653 return(-1);
4654 }
4655
4656 SKIP_BLANKS;
4657 /*
4658 * Pop-up of finished entities.
4659 */
4660 while ((RAW == 0) && (ctxt->inputNr > 1))
4661 xmlPopInput(ctxt);
4662 SKIP_BLANKS;
4663
4664 if (RAW != '>') {
4665 ctxt->errNo = XML_ERR_GT_REQUIRED;
4666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4667 ctxt->sax->error(ctxt->userData,
4668 "xmlParseElementDecl: expected '>' at the end\n");
4669 ctxt->wellFormed = 0;
4670 ctxt->disableSAX = 1;
4671 } else {
4672 if (input != ctxt->input) {
4673 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4675 ctxt->sax->error(ctxt->userData,
4676"Element declaration doesn't start and stop in the same entity\n");
4677 ctxt->wellFormed = 0;
4678 ctxt->disableSAX = 1;
4679 }
4680
4681 NEXT;
4682 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4683 (ctxt->sax->elementDecl != NULL))
4684 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4685 content);
4686 }
4687 if (content != NULL) {
4688 xmlFreeElementContent(content);
4689 }
4690 if (name != NULL) {
4691 xmlFree(name);
4692 }
4693 }
4694 return(ret);
4695}
4696
4697/**
Owen Taylor3473f882001-02-23 17:55:21 +00004698 * xmlParseConditionalSections
4699 * @ctxt: an XML parser context
4700 *
4701 * [61] conditionalSect ::= includeSect | ignoreSect
4702 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4703 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4704 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4705 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4706 */
4707
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004708static void
Owen Taylor3473f882001-02-23 17:55:21 +00004709xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4710 SKIP(3);
4711 SKIP_BLANKS;
4712 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4713 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4714 (NXT(6) == 'E')) {
4715 SKIP(7);
4716 SKIP_BLANKS;
4717 if (RAW != '[') {
4718 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4720 ctxt->sax->error(ctxt->userData,
4721 "XML conditional section '[' expected\n");
4722 ctxt->wellFormed = 0;
4723 ctxt->disableSAX = 1;
4724 } else {
4725 NEXT;
4726 }
4727 if (xmlParserDebugEntities) {
4728 if ((ctxt->input != NULL) && (ctxt->input->filename))
4729 xmlGenericError(xmlGenericErrorContext,
4730 "%s(%d): ", ctxt->input->filename,
4731 ctxt->input->line);
4732 xmlGenericError(xmlGenericErrorContext,
4733 "Entering INCLUDE Conditional Section\n");
4734 }
4735
4736 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4737 (NXT(2) != '>'))) {
4738 const xmlChar *check = CUR_PTR;
4739 int cons = ctxt->input->consumed;
4740 int tok = ctxt->token;
4741
4742 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4743 xmlParseConditionalSections(ctxt);
4744 } else if (IS_BLANK(CUR)) {
4745 NEXT;
4746 } else if (RAW == '%') {
4747 xmlParsePEReference(ctxt);
4748 } else
4749 xmlParseMarkupDecl(ctxt);
4750
4751 /*
4752 * Pop-up of finished entities.
4753 */
4754 while ((RAW == 0) && (ctxt->inputNr > 1))
4755 xmlPopInput(ctxt);
4756
4757 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4758 (tok == ctxt->token)) {
4759 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4761 ctxt->sax->error(ctxt->userData,
4762 "Content error in the external subset\n");
4763 ctxt->wellFormed = 0;
4764 ctxt->disableSAX = 1;
4765 break;
4766 }
4767 }
4768 if (xmlParserDebugEntities) {
4769 if ((ctxt->input != NULL) && (ctxt->input->filename))
4770 xmlGenericError(xmlGenericErrorContext,
4771 "%s(%d): ", ctxt->input->filename,
4772 ctxt->input->line);
4773 xmlGenericError(xmlGenericErrorContext,
4774 "Leaving INCLUDE Conditional Section\n");
4775 }
4776
4777 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4778 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4779 int state;
4780 int instate;
4781 int depth = 0;
4782
4783 SKIP(6);
4784 SKIP_BLANKS;
4785 if (RAW != '[') {
4786 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4788 ctxt->sax->error(ctxt->userData,
4789 "XML conditional section '[' expected\n");
4790 ctxt->wellFormed = 0;
4791 ctxt->disableSAX = 1;
4792 } else {
4793 NEXT;
4794 }
4795 if (xmlParserDebugEntities) {
4796 if ((ctxt->input != NULL) && (ctxt->input->filename))
4797 xmlGenericError(xmlGenericErrorContext,
4798 "%s(%d): ", ctxt->input->filename,
4799 ctxt->input->line);
4800 xmlGenericError(xmlGenericErrorContext,
4801 "Entering IGNORE Conditional Section\n");
4802 }
4803
4804 /*
4805 * Parse up to the end of the conditionnal section
4806 * But disable SAX event generating DTD building in the meantime
4807 */
4808 state = ctxt->disableSAX;
4809 instate = ctxt->instate;
4810 ctxt->disableSAX = 1;
4811 ctxt->instate = XML_PARSER_IGNORE;
4812
4813 while (depth >= 0) {
4814 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4815 depth++;
4816 SKIP(3);
4817 continue;
4818 }
4819 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4820 if (--depth >= 0) SKIP(3);
4821 continue;
4822 }
4823 NEXT;
4824 continue;
4825 }
4826
4827 ctxt->disableSAX = state;
4828 ctxt->instate = instate;
4829
4830 if (xmlParserDebugEntities) {
4831 if ((ctxt->input != NULL) && (ctxt->input->filename))
4832 xmlGenericError(xmlGenericErrorContext,
4833 "%s(%d): ", ctxt->input->filename,
4834 ctxt->input->line);
4835 xmlGenericError(xmlGenericErrorContext,
4836 "Leaving IGNORE Conditional Section\n");
4837 }
4838
4839 } else {
4840 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4842 ctxt->sax->error(ctxt->userData,
4843 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4844 ctxt->wellFormed = 0;
4845 ctxt->disableSAX = 1;
4846 }
4847
4848 if (RAW == 0)
4849 SHRINK;
4850
4851 if (RAW == 0) {
4852 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4854 ctxt->sax->error(ctxt->userData,
4855 "XML conditional section not closed\n");
4856 ctxt->wellFormed = 0;
4857 ctxt->disableSAX = 1;
4858 } else {
4859 SKIP(3);
4860 }
4861}
4862
4863/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004864 * xmlParseMarkupDecl:
4865 * @ctxt: an XML parser context
4866 *
4867 * parse Markup declarations
4868 *
4869 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4870 * NotationDecl | PI | Comment
4871 *
4872 * [ VC: Proper Declaration/PE Nesting ]
4873 * Parameter-entity replacement text must be properly nested with
4874 * markup declarations. That is to say, if either the first character
4875 * or the last character of a markup declaration (markupdecl above) is
4876 * contained in the replacement text for a parameter-entity reference,
4877 * both must be contained in the same replacement text.
4878 *
4879 * [ WFC: PEs in Internal Subset ]
4880 * In the internal DTD subset, parameter-entity references can occur
4881 * only where markup declarations can occur, not within markup declarations.
4882 * (This does not apply to references that occur in external parameter
4883 * entities or to the external subset.)
4884 */
4885void
4886xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4887 GROW;
4888 xmlParseElementDecl(ctxt);
4889 xmlParseAttributeListDecl(ctxt);
4890 xmlParseEntityDecl(ctxt);
4891 xmlParseNotationDecl(ctxt);
4892 xmlParsePI(ctxt);
4893 xmlParseComment(ctxt);
4894 /*
4895 * This is only for internal subset. On external entities,
4896 * the replacement is done before parsing stage
4897 */
4898 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4899 xmlParsePEReference(ctxt);
4900
4901 /*
4902 * Conditional sections are allowed from entities included
4903 * by PE References in the internal subset.
4904 */
4905 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4906 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4907 xmlParseConditionalSections(ctxt);
4908 }
4909 }
4910
4911 ctxt->instate = XML_PARSER_DTD;
4912}
4913
4914/**
4915 * xmlParseTextDecl:
4916 * @ctxt: an XML parser context
4917 *
4918 * parse an XML declaration header for external entities
4919 *
4920 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4921 *
4922 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4923 */
4924
4925void
4926xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4927 xmlChar *version;
4928
4929 /*
4930 * We know that '<?xml' is here.
4931 */
4932 if ((RAW == '<') && (NXT(1) == '?') &&
4933 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4934 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4935 SKIP(5);
4936 } else {
4937 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4939 ctxt->sax->error(ctxt->userData,
4940 "Text declaration '<?xml' required\n");
4941 ctxt->wellFormed = 0;
4942 ctxt->disableSAX = 1;
4943
4944 return;
4945 }
4946
4947 if (!IS_BLANK(CUR)) {
4948 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4950 ctxt->sax->error(ctxt->userData,
4951 "Space needed after '<?xml'\n");
4952 ctxt->wellFormed = 0;
4953 ctxt->disableSAX = 1;
4954 }
4955 SKIP_BLANKS;
4956
4957 /*
4958 * We may have the VersionInfo here.
4959 */
4960 version = xmlParseVersionInfo(ctxt);
4961 if (version == NULL)
4962 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4963 ctxt->input->version = version;
4964
4965 /*
4966 * We must have the encoding declaration
4967 */
4968 if (!IS_BLANK(CUR)) {
4969 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4970 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4971 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4972 ctxt->wellFormed = 0;
4973 ctxt->disableSAX = 1;
4974 }
4975 xmlParseEncodingDecl(ctxt);
4976 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4977 /*
4978 * The XML REC instructs us to stop parsing right here
4979 */
4980 return;
4981 }
4982
4983 SKIP_BLANKS;
4984 if ((RAW == '?') && (NXT(1) == '>')) {
4985 SKIP(2);
4986 } else if (RAW == '>') {
4987 /* Deprecated old WD ... */
4988 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4990 ctxt->sax->error(ctxt->userData,
4991 "XML declaration must end-up with '?>'\n");
4992 ctxt->wellFormed = 0;
4993 ctxt->disableSAX = 1;
4994 NEXT;
4995 } else {
4996 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4998 ctxt->sax->error(ctxt->userData,
4999 "parsing XML declaration: '?>' expected\n");
5000 ctxt->wellFormed = 0;
5001 ctxt->disableSAX = 1;
5002 MOVETO_ENDTAG(CUR_PTR);
5003 NEXT;
5004 }
5005}
5006
5007/**
Owen Taylor3473f882001-02-23 17:55:21 +00005008 * xmlParseExternalSubset:
5009 * @ctxt: an XML parser context
5010 * @ExternalID: the external identifier
5011 * @SystemID: the system identifier (or URL)
5012 *
5013 * parse Markup declarations from an external subset
5014 *
5015 * [30] extSubset ::= textDecl? extSubsetDecl
5016 *
5017 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5018 */
5019void
5020xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5021 const xmlChar *SystemID) {
5022 GROW;
5023 if ((RAW == '<') && (NXT(1) == '?') &&
5024 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5025 (NXT(4) == 'l')) {
5026 xmlParseTextDecl(ctxt);
5027 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5028 /*
5029 * The XML REC instructs us to stop parsing right here
5030 */
5031 ctxt->instate = XML_PARSER_EOF;
5032 return;
5033 }
5034 }
5035 if (ctxt->myDoc == NULL) {
5036 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5037 }
5038 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5039 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5040
5041 ctxt->instate = XML_PARSER_DTD;
5042 ctxt->external = 1;
5043 while (((RAW == '<') && (NXT(1) == '?')) ||
5044 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005045 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005046 const xmlChar *check = CUR_PTR;
5047 int cons = ctxt->input->consumed;
5048 int tok = ctxt->token;
5049
5050 GROW;
5051 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5052 xmlParseConditionalSections(ctxt);
5053 } else if (IS_BLANK(CUR)) {
5054 NEXT;
5055 } else if (RAW == '%') {
5056 xmlParsePEReference(ctxt);
5057 } else
5058 xmlParseMarkupDecl(ctxt);
5059
5060 /*
5061 * Pop-up of finished entities.
5062 */
5063 while ((RAW == 0) && (ctxt->inputNr > 1))
5064 xmlPopInput(ctxt);
5065
5066 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5067 (tok == ctxt->token)) {
5068 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5069 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5070 ctxt->sax->error(ctxt->userData,
5071 "Content error in the external subset\n");
5072 ctxt->wellFormed = 0;
5073 ctxt->disableSAX = 1;
5074 break;
5075 }
5076 }
5077
5078 if (RAW != 0) {
5079 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5081 ctxt->sax->error(ctxt->userData,
5082 "Extra content at the end of the document\n");
5083 ctxt->wellFormed = 0;
5084 ctxt->disableSAX = 1;
5085 }
5086
5087}
5088
5089/**
5090 * xmlParseReference:
5091 * @ctxt: an XML parser context
5092 *
5093 * parse and handle entity references in content, depending on the SAX
5094 * interface, this may end-up in a call to character() if this is a
5095 * CharRef, a predefined entity, if there is no reference() callback.
5096 * or if the parser was asked to switch to that mode.
5097 *
5098 * [67] Reference ::= EntityRef | CharRef
5099 */
5100void
5101xmlParseReference(xmlParserCtxtPtr ctxt) {
5102 xmlEntityPtr ent;
5103 xmlChar *val;
5104 if (RAW != '&') return;
5105
5106 if (NXT(1) == '#') {
5107 int i = 0;
5108 xmlChar out[10];
5109 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005110 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005111
5112 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5113 /*
5114 * So we are using non-UTF-8 buffers
5115 * Check that the char fit on 8bits, if not
5116 * generate a CharRef.
5117 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005118 if (value <= 0xFF) {
5119 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005120 out[1] = 0;
5121 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5122 (!ctxt->disableSAX))
5123 ctxt->sax->characters(ctxt->userData, out, 1);
5124 } else {
5125 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005126 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005127 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005128 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005129 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5130 (!ctxt->disableSAX))
5131 ctxt->sax->reference(ctxt->userData, out);
5132 }
5133 } else {
5134 /*
5135 * Just encode the value in UTF-8
5136 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005137 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005138 out[i] = 0;
5139 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5140 (!ctxt->disableSAX))
5141 ctxt->sax->characters(ctxt->userData, out, i);
5142 }
5143 } else {
5144 ent = xmlParseEntityRef(ctxt);
5145 if (ent == NULL) return;
5146 if ((ent->name != NULL) &&
5147 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5148 xmlNodePtr list = NULL;
5149 int ret;
5150
5151
5152 /*
5153 * The first reference to the entity trigger a parsing phase
5154 * where the ent->children is filled with the result from
5155 * the parsing.
5156 */
5157 if (ent->children == NULL) {
5158 xmlChar *value;
5159 value = ent->content;
5160
5161 /*
5162 * Check that this entity is well formed
5163 */
5164 if ((value != NULL) &&
5165 (value[1] == 0) && (value[0] == '<') &&
5166 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5167 /*
5168 * DONE: get definite answer on this !!!
5169 * Lots of entity decls are used to declare a single
5170 * char
5171 * <!ENTITY lt "<">
5172 * Which seems to be valid since
5173 * 2.4: The ampersand character (&) and the left angle
5174 * bracket (<) may appear in their literal form only
5175 * when used ... They are also legal within the literal
5176 * entity value of an internal entity declaration;i
5177 * see "4.3.2 Well-Formed Parsed Entities".
5178 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5179 * Looking at the OASIS test suite and James Clark
5180 * tests, this is broken. However the XML REC uses
5181 * it. Is the XML REC not well-formed ????
5182 * This is a hack to avoid this problem
5183 *
5184 * ANSWER: since lt gt amp .. are already defined,
5185 * this is a redefinition and hence the fact that the
5186 * contentis not well balanced is not a Wf error, this
5187 * is lousy but acceptable.
5188 */
5189 list = xmlNewDocText(ctxt->myDoc, value);
5190 if (list != NULL) {
5191 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5192 (ent->children == NULL)) {
5193 ent->children = list;
5194 ent->last = list;
5195 list->parent = (xmlNodePtr) ent;
5196 } else {
5197 xmlFreeNodeList(list);
5198 }
5199 } else if (list != NULL) {
5200 xmlFreeNodeList(list);
5201 }
5202 } else {
5203 /*
5204 * 4.3.2: An internal general parsed entity is well-formed
5205 * if its replacement text matches the production labeled
5206 * content.
5207 */
5208 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5209 ctxt->depth++;
5210 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5211 ctxt->sax, NULL, ctxt->depth,
5212 value, &list);
5213 ctxt->depth--;
5214 } else if (ent->etype ==
5215 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5216 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005217 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005218 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005219 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005220 ctxt->depth--;
5221 } else {
5222 ret = -1;
5223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5224 ctxt->sax->error(ctxt->userData,
5225 "Internal: invalid entity type\n");
5226 }
5227 if (ret == XML_ERR_ENTITY_LOOP) {
5228 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5230 ctxt->sax->error(ctxt->userData,
5231 "Detected entity reference loop\n");
5232 ctxt->wellFormed = 0;
5233 ctxt->disableSAX = 1;
5234 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005235 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5236 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005237 (ent->children == NULL)) {
5238 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005239 if (ctxt->replaceEntities) {
5240 /*
5241 * Prune it directly in the generated document
5242 * except for single text nodes.
5243 */
5244 if ((list->type == XML_TEXT_NODE) &&
5245 (list->next == NULL)) {
5246 list->parent = (xmlNodePtr) ent;
5247 list = NULL;
5248 } else {
5249 while (list != NULL) {
5250 list->parent = (xmlNodePtr) ctxt->node;
5251 if (list->next == NULL)
5252 ent->last = list;
5253 list = list->next;
5254 }
5255 list = ent->children;
5256 }
5257 } else {
5258 while (list != NULL) {
5259 list->parent = (xmlNodePtr) ent;
5260 if (list->next == NULL)
5261 ent->last = list;
5262 list = list->next;
5263 }
Owen Taylor3473f882001-02-23 17:55:21 +00005264 }
5265 } else {
5266 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005267 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005268 }
5269 } else if (ret > 0) {
5270 ctxt->errNo = ret;
5271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5272 ctxt->sax->error(ctxt->userData,
5273 "Entity value required\n");
5274 ctxt->wellFormed = 0;
5275 ctxt->disableSAX = 1;
5276 } else if (list != NULL) {
5277 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005278 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005279 }
5280 }
5281 }
5282 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5283 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5284 /*
5285 * Create a node.
5286 */
5287 ctxt->sax->reference(ctxt->userData, ent->name);
5288 return;
5289 } else if (ctxt->replaceEntities) {
5290 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5291 /*
5292 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005293 * a simple tree copy for all references except the first
5294 * In the first occurence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005295 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005296 if (list == NULL) {
5297 xmlNodePtr new, cur;
5298 cur = ent->children;
5299 while (cur != NULL) {
5300 new = xmlCopyNode(cur, 1);
5301 xmlAddChild(ctxt->node, new);
5302 if (cur == ent->last)
5303 break;
5304 cur = cur->next;
5305 }
5306 } else {
5307 /*
5308 * the name change is to avoid coalescing of the
5309 * node with a prossible previous text one which
5310 * would make ent->children a dandling pointer
5311 */
5312 if (ent->children->type == XML_TEXT_NODE)
5313 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5314 if ((ent->last != ent->children) &&
5315 (ent->last->type == XML_TEXT_NODE))
5316 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5317 xmlAddChildList(ctxt->node, ent->children);
5318 }
5319
Owen Taylor3473f882001-02-23 17:55:21 +00005320 /*
5321 * This is to avoid a nasty side effect, see
5322 * characters() in SAX.c
5323 */
5324 ctxt->nodemem = 0;
5325 ctxt->nodelen = 0;
5326 return;
5327 } else {
5328 /*
5329 * Probably running in SAX mode
5330 */
5331 xmlParserInputPtr input;
5332
5333 input = xmlNewEntityInputStream(ctxt, ent);
5334 xmlPushInput(ctxt, input);
5335 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5336 (RAW == '<') && (NXT(1) == '?') &&
5337 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5338 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5339 xmlParseTextDecl(ctxt);
5340 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5341 /*
5342 * The XML REC instructs us to stop parsing right here
5343 */
5344 ctxt->instate = XML_PARSER_EOF;
5345 return;
5346 }
5347 if (input->standalone == 1) {
5348 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5350 ctxt->sax->error(ctxt->userData,
5351 "external parsed entities cannot be standalone\n");
5352 ctxt->wellFormed = 0;
5353 ctxt->disableSAX = 1;
5354 }
5355 }
5356 return;
5357 }
5358 }
5359 } else {
5360 val = ent->content;
5361 if (val == NULL) return;
5362 /*
5363 * inline the entity.
5364 */
5365 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5366 (!ctxt->disableSAX))
5367 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5368 }
5369 }
5370}
5371
5372/**
5373 * xmlParseEntityRef:
5374 * @ctxt: an XML parser context
5375 *
5376 * parse ENTITY references declarations
5377 *
5378 * [68] EntityRef ::= '&' Name ';'
5379 *
5380 * [ WFC: Entity Declared ]
5381 * In a document without any DTD, a document with only an internal DTD
5382 * subset which contains no parameter entity references, or a document
5383 * with "standalone='yes'", the Name given in the entity reference
5384 * must match that in an entity declaration, except that well-formed
5385 * documents need not declare any of the following entities: amp, lt,
5386 * gt, apos, quot. The declaration of a parameter entity must precede
5387 * any reference to it. Similarly, the declaration of a general entity
5388 * must precede any reference to it which appears in a default value in an
5389 * attribute-list declaration. Note that if entities are declared in the
5390 * external subset or in external parameter entities, a non-validating
5391 * processor is not obligated to read and process their declarations;
5392 * for such documents, the rule that an entity must be declared is a
5393 * well-formedness constraint only if standalone='yes'.
5394 *
5395 * [ WFC: Parsed Entity ]
5396 * An entity reference must not contain the name of an unparsed entity
5397 *
5398 * Returns the xmlEntityPtr if found, or NULL otherwise.
5399 */
5400xmlEntityPtr
5401xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5402 xmlChar *name;
5403 xmlEntityPtr ent = NULL;
5404
5405 GROW;
5406
5407 if (RAW == '&') {
5408 NEXT;
5409 name = xmlParseName(ctxt);
5410 if (name == NULL) {
5411 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5413 ctxt->sax->error(ctxt->userData,
5414 "xmlParseEntityRef: no name\n");
5415 ctxt->wellFormed = 0;
5416 ctxt->disableSAX = 1;
5417 } else {
5418 if (RAW == ';') {
5419 NEXT;
5420 /*
5421 * Ask first SAX for entity resolution, otherwise try the
5422 * predefined set.
5423 */
5424 if (ctxt->sax != NULL) {
5425 if (ctxt->sax->getEntity != NULL)
5426 ent = ctxt->sax->getEntity(ctxt->userData, name);
5427 if (ent == NULL)
5428 ent = xmlGetPredefinedEntity(name);
5429 }
5430 /*
5431 * [ WFC: Entity Declared ]
5432 * In a document without any DTD, a document with only an
5433 * internal DTD subset which contains no parameter entity
5434 * references, or a document with "standalone='yes'", the
5435 * Name given in the entity reference must match that in an
5436 * entity declaration, except that well-formed documents
5437 * need not declare any of the following entities: amp, lt,
5438 * gt, apos, quot.
5439 * The declaration of a parameter entity must precede any
5440 * reference to it.
5441 * Similarly, the declaration of a general entity must
5442 * precede any reference to it which appears in a default
5443 * value in an attribute-list declaration. Note that if
5444 * entities are declared in the external subset or in
5445 * external parameter entities, a non-validating processor
5446 * is not obligated to read and process their declarations;
5447 * for such documents, the rule that an entity must be
5448 * declared is a well-formedness constraint only if
5449 * standalone='yes'.
5450 */
5451 if (ent == NULL) {
5452 if ((ctxt->standalone == 1) ||
5453 ((ctxt->hasExternalSubset == 0) &&
5454 (ctxt->hasPErefs == 0))) {
5455 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5457 ctxt->sax->error(ctxt->userData,
5458 "Entity '%s' not defined\n", name);
5459 ctxt->wellFormed = 0;
5460 ctxt->disableSAX = 1;
5461 } else {
5462 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005463 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005464 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005465 "Entity '%s' not defined\n", name);
5466 }
5467 }
5468
5469 /*
5470 * [ WFC: Parsed Entity ]
5471 * An entity reference must not contain the name of an
5472 * unparsed entity
5473 */
5474 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5475 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5477 ctxt->sax->error(ctxt->userData,
5478 "Entity reference to unparsed entity %s\n", name);
5479 ctxt->wellFormed = 0;
5480 ctxt->disableSAX = 1;
5481 }
5482
5483 /*
5484 * [ WFC: No External Entity References ]
5485 * Attribute values cannot contain direct or indirect
5486 * entity references to external entities.
5487 */
5488 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5489 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5490 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5492 ctxt->sax->error(ctxt->userData,
5493 "Attribute references external entity '%s'\n", name);
5494 ctxt->wellFormed = 0;
5495 ctxt->disableSAX = 1;
5496 }
5497 /*
5498 * [ WFC: No < in Attribute Values ]
5499 * The replacement text of any entity referred to directly or
5500 * indirectly in an attribute value (other than "&lt;") must
5501 * not contain a <.
5502 */
5503 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5504 (ent != NULL) &&
5505 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5506 (ent->content != NULL) &&
5507 (xmlStrchr(ent->content, '<'))) {
5508 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5510 ctxt->sax->error(ctxt->userData,
5511 "'<' in entity '%s' is not allowed in attributes values\n", name);
5512 ctxt->wellFormed = 0;
5513 ctxt->disableSAX = 1;
5514 }
5515
5516 /*
5517 * Internal check, no parameter entities here ...
5518 */
5519 else {
5520 switch (ent->etype) {
5521 case XML_INTERNAL_PARAMETER_ENTITY:
5522 case XML_EXTERNAL_PARAMETER_ENTITY:
5523 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5525 ctxt->sax->error(ctxt->userData,
5526 "Attempt to reference the parameter entity '%s'\n", name);
5527 ctxt->wellFormed = 0;
5528 ctxt->disableSAX = 1;
5529 break;
5530 default:
5531 break;
5532 }
5533 }
5534
5535 /*
5536 * [ WFC: No Recursion ]
5537 * A parsed entity must not contain a recursive reference
5538 * to itself, either directly or indirectly.
5539 * Done somewhere else
5540 */
5541
5542 } else {
5543 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5545 ctxt->sax->error(ctxt->userData,
5546 "xmlParseEntityRef: expecting ';'\n");
5547 ctxt->wellFormed = 0;
5548 ctxt->disableSAX = 1;
5549 }
5550 xmlFree(name);
5551 }
5552 }
5553 return(ent);
5554}
5555
5556/**
5557 * xmlParseStringEntityRef:
5558 * @ctxt: an XML parser context
5559 * @str: a pointer to an index in the string
5560 *
5561 * parse ENTITY references declarations, but this version parses it from
5562 * a string value.
5563 *
5564 * [68] EntityRef ::= '&' Name ';'
5565 *
5566 * [ WFC: Entity Declared ]
5567 * In a document without any DTD, a document with only an internal DTD
5568 * subset which contains no parameter entity references, or a document
5569 * with "standalone='yes'", the Name given in the entity reference
5570 * must match that in an entity declaration, except that well-formed
5571 * documents need not declare any of the following entities: amp, lt,
5572 * gt, apos, quot. The declaration of a parameter entity must precede
5573 * any reference to it. Similarly, the declaration of a general entity
5574 * must precede any reference to it which appears in a default value in an
5575 * attribute-list declaration. Note that if entities are declared in the
5576 * external subset or in external parameter entities, a non-validating
5577 * processor is not obligated to read and process their declarations;
5578 * for such documents, the rule that an entity must be declared is a
5579 * well-formedness constraint only if standalone='yes'.
5580 *
5581 * [ WFC: Parsed Entity ]
5582 * An entity reference must not contain the name of an unparsed entity
5583 *
5584 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5585 * is updated to the current location in the string.
5586 */
5587xmlEntityPtr
5588xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5589 xmlChar *name;
5590 const xmlChar *ptr;
5591 xmlChar cur;
5592 xmlEntityPtr ent = NULL;
5593
5594 if ((str == NULL) || (*str == NULL))
5595 return(NULL);
5596 ptr = *str;
5597 cur = *ptr;
5598 if (cur == '&') {
5599 ptr++;
5600 cur = *ptr;
5601 name = xmlParseStringName(ctxt, &ptr);
5602 if (name == NULL) {
5603 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5605 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005606 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005607 ctxt->wellFormed = 0;
5608 ctxt->disableSAX = 1;
5609 } else {
5610 if (*ptr == ';') {
5611 ptr++;
5612 /*
5613 * Ask first SAX for entity resolution, otherwise try the
5614 * predefined set.
5615 */
5616 if (ctxt->sax != NULL) {
5617 if (ctxt->sax->getEntity != NULL)
5618 ent = ctxt->sax->getEntity(ctxt->userData, name);
5619 if (ent == NULL)
5620 ent = xmlGetPredefinedEntity(name);
5621 }
5622 /*
5623 * [ WFC: Entity Declared ]
5624 * In a document without any DTD, a document with only an
5625 * internal DTD subset which contains no parameter entity
5626 * references, or a document with "standalone='yes'", the
5627 * Name given in the entity reference must match that in an
5628 * entity declaration, except that well-formed documents
5629 * need not declare any of the following entities: amp, lt,
5630 * gt, apos, quot.
5631 * The declaration of a parameter entity must precede any
5632 * reference to it.
5633 * Similarly, the declaration of a general entity must
5634 * precede any reference to it which appears in a default
5635 * value in an attribute-list declaration. Note that if
5636 * entities are declared in the external subset or in
5637 * external parameter entities, a non-validating processor
5638 * is not obligated to read and process their declarations;
5639 * for such documents, the rule that an entity must be
5640 * declared is a well-formedness constraint only if
5641 * standalone='yes'.
5642 */
5643 if (ent == NULL) {
5644 if ((ctxt->standalone == 1) ||
5645 ((ctxt->hasExternalSubset == 0) &&
5646 (ctxt->hasPErefs == 0))) {
5647 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5649 ctxt->sax->error(ctxt->userData,
5650 "Entity '%s' not defined\n", name);
5651 ctxt->wellFormed = 0;
5652 ctxt->disableSAX = 1;
5653 } else {
5654 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5655 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5656 ctxt->sax->warning(ctxt->userData,
5657 "Entity '%s' not defined\n", name);
5658 }
5659 }
5660
5661 /*
5662 * [ WFC: Parsed Entity ]
5663 * An entity reference must not contain the name of an
5664 * unparsed entity
5665 */
5666 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5667 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5669 ctxt->sax->error(ctxt->userData,
5670 "Entity reference to unparsed entity %s\n", name);
5671 ctxt->wellFormed = 0;
5672 ctxt->disableSAX = 1;
5673 }
5674
5675 /*
5676 * [ WFC: No External Entity References ]
5677 * Attribute values cannot contain direct or indirect
5678 * entity references to external entities.
5679 */
5680 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5681 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5682 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5684 ctxt->sax->error(ctxt->userData,
5685 "Attribute references external entity '%s'\n", name);
5686 ctxt->wellFormed = 0;
5687 ctxt->disableSAX = 1;
5688 }
5689 /*
5690 * [ WFC: No < in Attribute Values ]
5691 * The replacement text of any entity referred to directly or
5692 * indirectly in an attribute value (other than "&lt;") must
5693 * not contain a <.
5694 */
5695 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5696 (ent != NULL) &&
5697 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5698 (ent->content != NULL) &&
5699 (xmlStrchr(ent->content, '<'))) {
5700 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5701 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5702 ctxt->sax->error(ctxt->userData,
5703 "'<' in entity '%s' is not allowed in attributes values\n", name);
5704 ctxt->wellFormed = 0;
5705 ctxt->disableSAX = 1;
5706 }
5707
5708 /*
5709 * Internal check, no parameter entities here ...
5710 */
5711 else {
5712 switch (ent->etype) {
5713 case XML_INTERNAL_PARAMETER_ENTITY:
5714 case XML_EXTERNAL_PARAMETER_ENTITY:
5715 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5717 ctxt->sax->error(ctxt->userData,
5718 "Attempt to reference the parameter entity '%s'\n", name);
5719 ctxt->wellFormed = 0;
5720 ctxt->disableSAX = 1;
5721 break;
5722 default:
5723 break;
5724 }
5725 }
5726
5727 /*
5728 * [ WFC: No Recursion ]
5729 * A parsed entity must not contain a recursive reference
5730 * to itself, either directly or indirectly.
5731 * Done somewhwere else
5732 */
5733
5734 } else {
5735 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5736 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5737 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005738 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005739 ctxt->wellFormed = 0;
5740 ctxt->disableSAX = 1;
5741 }
5742 xmlFree(name);
5743 }
5744 }
5745 *str = ptr;
5746 return(ent);
5747}
5748
5749/**
5750 * xmlParsePEReference:
5751 * @ctxt: an XML parser context
5752 *
5753 * parse PEReference declarations
5754 * The entity content is handled directly by pushing it's content as
5755 * a new input stream.
5756 *
5757 * [69] PEReference ::= '%' Name ';'
5758 *
5759 * [ WFC: No Recursion ]
5760 * A parsed entity must not contain a recursive
5761 * reference to itself, either directly or indirectly.
5762 *
5763 * [ WFC: Entity Declared ]
5764 * In a document without any DTD, a document with only an internal DTD
5765 * subset which contains no parameter entity references, or a document
5766 * with "standalone='yes'", ... ... The declaration of a parameter
5767 * entity must precede any reference to it...
5768 *
5769 * [ VC: Entity Declared ]
5770 * In a document with an external subset or external parameter entities
5771 * with "standalone='no'", ... ... The declaration of a parameter entity
5772 * must precede any reference to it...
5773 *
5774 * [ WFC: In DTD ]
5775 * Parameter-entity references may only appear in the DTD.
5776 * NOTE: misleading but this is handled.
5777 */
5778void
5779xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5780 xmlChar *name;
5781 xmlEntityPtr entity = NULL;
5782 xmlParserInputPtr input;
5783
5784 if (RAW == '%') {
5785 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005786 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005787 if (name == NULL) {
5788 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5790 ctxt->sax->error(ctxt->userData,
5791 "xmlParsePEReference: no name\n");
5792 ctxt->wellFormed = 0;
5793 ctxt->disableSAX = 1;
5794 } else {
5795 if (RAW == ';') {
5796 NEXT;
5797 if ((ctxt->sax != NULL) &&
5798 (ctxt->sax->getParameterEntity != NULL))
5799 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5800 name);
5801 if (entity == NULL) {
5802 /*
5803 * [ WFC: Entity Declared ]
5804 * In a document without any DTD, a document with only an
5805 * internal DTD subset which contains no parameter entity
5806 * references, or a document with "standalone='yes'", ...
5807 * ... The declaration of a parameter entity must precede
5808 * any reference to it...
5809 */
5810 if ((ctxt->standalone == 1) ||
5811 ((ctxt->hasExternalSubset == 0) &&
5812 (ctxt->hasPErefs == 0))) {
5813 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5814 if ((!ctxt->disableSAX) &&
5815 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5816 ctxt->sax->error(ctxt->userData,
5817 "PEReference: %%%s; not found\n", name);
5818 ctxt->wellFormed = 0;
5819 ctxt->disableSAX = 1;
5820 } else {
5821 /*
5822 * [ VC: Entity Declared ]
5823 * In a document with an external subset or external
5824 * parameter entities with "standalone='no'", ...
5825 * ... The declaration of a parameter entity must precede
5826 * any reference to it...
5827 */
5828 if ((!ctxt->disableSAX) &&
5829 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5830 ctxt->sax->warning(ctxt->userData,
5831 "PEReference: %%%s; not found\n", name);
5832 ctxt->valid = 0;
5833 }
5834 } else {
5835 /*
5836 * Internal checking in case the entity quest barfed
5837 */
5838 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5839 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5840 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5841 ctxt->sax->warning(ctxt->userData,
5842 "Internal: %%%s; is not a parameter entity\n", name);
5843 } else {
5844 /*
5845 * TODO !!!
5846 * handle the extra spaces added before and after
5847 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5848 */
5849 input = xmlNewEntityInputStream(ctxt, entity);
5850 xmlPushInput(ctxt, input);
5851 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5852 (RAW == '<') && (NXT(1) == '?') &&
5853 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5854 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5855 xmlParseTextDecl(ctxt);
5856 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5857 /*
5858 * The XML REC instructs us to stop parsing
5859 * right here
5860 */
5861 ctxt->instate = XML_PARSER_EOF;
5862 xmlFree(name);
5863 return;
5864 }
5865 }
5866 if (ctxt->token == 0)
5867 ctxt->token = ' ';
5868 }
5869 }
5870 ctxt->hasPErefs = 1;
5871 } else {
5872 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5874 ctxt->sax->error(ctxt->userData,
5875 "xmlParsePEReference: expecting ';'\n");
5876 ctxt->wellFormed = 0;
5877 ctxt->disableSAX = 1;
5878 }
5879 xmlFree(name);
5880 }
5881 }
5882}
5883
5884/**
5885 * xmlParseStringPEReference:
5886 * @ctxt: an XML parser context
5887 * @str: a pointer to an index in the string
5888 *
5889 * parse PEReference declarations
5890 *
5891 * [69] PEReference ::= '%' Name ';'
5892 *
5893 * [ WFC: No Recursion ]
5894 * A parsed entity must not contain a recursive
5895 * reference to itself, either directly or indirectly.
5896 *
5897 * [ WFC: Entity Declared ]
5898 * In a document without any DTD, a document with only an internal DTD
5899 * subset which contains no parameter entity references, or a document
5900 * with "standalone='yes'", ... ... The declaration of a parameter
5901 * entity must precede any reference to it...
5902 *
5903 * [ VC: Entity Declared ]
5904 * In a document with an external subset or external parameter entities
5905 * with "standalone='no'", ... ... The declaration of a parameter entity
5906 * must precede any reference to it...
5907 *
5908 * [ WFC: In DTD ]
5909 * Parameter-entity references may only appear in the DTD.
5910 * NOTE: misleading but this is handled.
5911 *
5912 * Returns the string of the entity content.
5913 * str is updated to the current value of the index
5914 */
5915xmlEntityPtr
5916xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5917 const xmlChar *ptr;
5918 xmlChar cur;
5919 xmlChar *name;
5920 xmlEntityPtr entity = NULL;
5921
5922 if ((str == NULL) || (*str == NULL)) return(NULL);
5923 ptr = *str;
5924 cur = *ptr;
5925 if (cur == '%') {
5926 ptr++;
5927 cur = *ptr;
5928 name = xmlParseStringName(ctxt, &ptr);
5929 if (name == NULL) {
5930 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5932 ctxt->sax->error(ctxt->userData,
5933 "xmlParseStringPEReference: no name\n");
5934 ctxt->wellFormed = 0;
5935 ctxt->disableSAX = 1;
5936 } else {
5937 cur = *ptr;
5938 if (cur == ';') {
5939 ptr++;
5940 cur = *ptr;
5941 if ((ctxt->sax != NULL) &&
5942 (ctxt->sax->getParameterEntity != NULL))
5943 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5944 name);
5945 if (entity == NULL) {
5946 /*
5947 * [ WFC: Entity Declared ]
5948 * In a document without any DTD, a document with only an
5949 * internal DTD subset which contains no parameter entity
5950 * references, or a document with "standalone='yes'", ...
5951 * ... The declaration of a parameter entity must precede
5952 * any reference to it...
5953 */
5954 if ((ctxt->standalone == 1) ||
5955 ((ctxt->hasExternalSubset == 0) &&
5956 (ctxt->hasPErefs == 0))) {
5957 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5959 ctxt->sax->error(ctxt->userData,
5960 "PEReference: %%%s; not found\n", name);
5961 ctxt->wellFormed = 0;
5962 ctxt->disableSAX = 1;
5963 } else {
5964 /*
5965 * [ VC: Entity Declared ]
5966 * In a document with an external subset or external
5967 * parameter entities with "standalone='no'", ...
5968 * ... The declaration of a parameter entity must
5969 * precede any reference to it...
5970 */
5971 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5972 ctxt->sax->warning(ctxt->userData,
5973 "PEReference: %%%s; not found\n", name);
5974 ctxt->valid = 0;
5975 }
5976 } else {
5977 /*
5978 * Internal checking in case the entity quest barfed
5979 */
5980 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5981 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5982 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5983 ctxt->sax->warning(ctxt->userData,
5984 "Internal: %%%s; is not a parameter entity\n", name);
5985 }
5986 }
5987 ctxt->hasPErefs = 1;
5988 } else {
5989 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5991 ctxt->sax->error(ctxt->userData,
5992 "xmlParseStringPEReference: expecting ';'\n");
5993 ctxt->wellFormed = 0;
5994 ctxt->disableSAX = 1;
5995 }
5996 xmlFree(name);
5997 }
5998 }
5999 *str = ptr;
6000 return(entity);
6001}
6002
6003/**
6004 * xmlParseDocTypeDecl:
6005 * @ctxt: an XML parser context
6006 *
6007 * parse a DOCTYPE declaration
6008 *
6009 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6010 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6011 *
6012 * [ VC: Root Element Type ]
6013 * The Name in the document type declaration must match the element
6014 * type of the root element.
6015 */
6016
6017void
6018xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6019 xmlChar *name = NULL;
6020 xmlChar *ExternalID = NULL;
6021 xmlChar *URI = NULL;
6022
6023 /*
6024 * We know that '<!DOCTYPE' has been detected.
6025 */
6026 SKIP(9);
6027
6028 SKIP_BLANKS;
6029
6030 /*
6031 * Parse the DOCTYPE name.
6032 */
6033 name = xmlParseName(ctxt);
6034 if (name == NULL) {
6035 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6037 ctxt->sax->error(ctxt->userData,
6038 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6039 ctxt->wellFormed = 0;
6040 ctxt->disableSAX = 1;
6041 }
6042 ctxt->intSubName = name;
6043
6044 SKIP_BLANKS;
6045
6046 /*
6047 * Check for SystemID and ExternalID
6048 */
6049 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6050
6051 if ((URI != NULL) || (ExternalID != NULL)) {
6052 ctxt->hasExternalSubset = 1;
6053 }
6054 ctxt->extSubURI = URI;
6055 ctxt->extSubSystem = ExternalID;
6056
6057 SKIP_BLANKS;
6058
6059 /*
6060 * Create and update the internal subset.
6061 */
6062 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6063 (!ctxt->disableSAX))
6064 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6065
6066 /*
6067 * Is there any internal subset declarations ?
6068 * they are handled separately in xmlParseInternalSubset()
6069 */
6070 if (RAW == '[')
6071 return;
6072
6073 /*
6074 * We should be at the end of the DOCTYPE declaration.
6075 */
6076 if (RAW != '>') {
6077 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006079 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006080 ctxt->wellFormed = 0;
6081 ctxt->disableSAX = 1;
6082 }
6083 NEXT;
6084}
6085
6086/**
6087 * xmlParseInternalsubset:
6088 * @ctxt: an XML parser context
6089 *
6090 * parse the internal subset declaration
6091 *
6092 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6093 */
6094
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006095static void
Owen Taylor3473f882001-02-23 17:55:21 +00006096xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6097 /*
6098 * Is there any DTD definition ?
6099 */
6100 if (RAW == '[') {
6101 ctxt->instate = XML_PARSER_DTD;
6102 NEXT;
6103 /*
6104 * Parse the succession of Markup declarations and
6105 * PEReferences.
6106 * Subsequence (markupdecl | PEReference | S)*
6107 */
6108 while (RAW != ']') {
6109 const xmlChar *check = CUR_PTR;
6110 int cons = ctxt->input->consumed;
6111
6112 SKIP_BLANKS;
6113 xmlParseMarkupDecl(ctxt);
6114 xmlParsePEReference(ctxt);
6115
6116 /*
6117 * Pop-up of finished entities.
6118 */
6119 while ((RAW == 0) && (ctxt->inputNr > 1))
6120 xmlPopInput(ctxt);
6121
6122 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6123 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6125 ctxt->sax->error(ctxt->userData,
6126 "xmlParseInternalSubset: error detected in Markup declaration\n");
6127 ctxt->wellFormed = 0;
6128 ctxt->disableSAX = 1;
6129 break;
6130 }
6131 }
6132 if (RAW == ']') {
6133 NEXT;
6134 SKIP_BLANKS;
6135 }
6136 }
6137
6138 /*
6139 * We should be at the end of the DOCTYPE declaration.
6140 */
6141 if (RAW != '>') {
6142 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006144 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006145 ctxt->wellFormed = 0;
6146 ctxt->disableSAX = 1;
6147 }
6148 NEXT;
6149}
6150
6151/**
6152 * xmlParseAttribute:
6153 * @ctxt: an XML parser context
6154 * @value: a xmlChar ** used to store the value of the attribute
6155 *
6156 * parse an attribute
6157 *
6158 * [41] Attribute ::= Name Eq AttValue
6159 *
6160 * [ WFC: No External Entity References ]
6161 * Attribute values cannot contain direct or indirect entity references
6162 * to external entities.
6163 *
6164 * [ WFC: No < in Attribute Values ]
6165 * The replacement text of any entity referred to directly or indirectly in
6166 * an attribute value (other than "&lt;") must not contain a <.
6167 *
6168 * [ VC: Attribute Value Type ]
6169 * The attribute must have been declared; the value must be of the type
6170 * declared for it.
6171 *
6172 * [25] Eq ::= S? '=' S?
6173 *
6174 * With namespace:
6175 *
6176 * [NS 11] Attribute ::= QName Eq AttValue
6177 *
6178 * Also the case QName == xmlns:??? is handled independently as a namespace
6179 * definition.
6180 *
6181 * Returns the attribute name, and the value in *value.
6182 */
6183
6184xmlChar *
6185xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6186 xmlChar *name, *val;
6187
6188 *value = NULL;
6189 name = xmlParseName(ctxt);
6190 if (name == NULL) {
6191 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6193 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6194 ctxt->wellFormed = 0;
6195 ctxt->disableSAX = 1;
6196 return(NULL);
6197 }
6198
6199 /*
6200 * read the value
6201 */
6202 SKIP_BLANKS;
6203 if (RAW == '=') {
6204 NEXT;
6205 SKIP_BLANKS;
6206 val = xmlParseAttValue(ctxt);
6207 ctxt->instate = XML_PARSER_CONTENT;
6208 } else {
6209 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6210 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6211 ctxt->sax->error(ctxt->userData,
6212 "Specification mandate value for attribute %s\n", name);
6213 ctxt->wellFormed = 0;
6214 ctxt->disableSAX = 1;
6215 xmlFree(name);
6216 return(NULL);
6217 }
6218
6219 /*
6220 * Check that xml:lang conforms to the specification
6221 * No more registered as an error, just generate a warning now
6222 * since this was deprecated in XML second edition
6223 */
6224 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6225 if (!xmlCheckLanguageID(val)) {
6226 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6227 ctxt->sax->warning(ctxt->userData,
6228 "Malformed value for xml:lang : %s\n", val);
6229 }
6230 }
6231
6232 /*
6233 * Check that xml:space conforms to the specification
6234 */
6235 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6236 if (xmlStrEqual(val, BAD_CAST "default"))
6237 *(ctxt->space) = 0;
6238 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6239 *(ctxt->space) = 1;
6240 else {
6241 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6242 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6243 ctxt->sax->error(ctxt->userData,
6244"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6245 val);
6246 ctxt->wellFormed = 0;
6247 ctxt->disableSAX = 1;
6248 }
6249 }
6250
6251 *value = val;
6252 return(name);
6253}
6254
6255/**
6256 * xmlParseStartTag:
6257 * @ctxt: an XML parser context
6258 *
6259 * parse a start of tag either for rule element or
6260 * EmptyElement. In both case we don't parse the tag closing chars.
6261 *
6262 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6263 *
6264 * [ WFC: Unique Att Spec ]
6265 * No attribute name may appear more than once in the same start-tag or
6266 * empty-element tag.
6267 *
6268 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6269 *
6270 * [ WFC: Unique Att Spec ]
6271 * No attribute name may appear more than once in the same start-tag or
6272 * empty-element tag.
6273 *
6274 * With namespace:
6275 *
6276 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6277 *
6278 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6279 *
6280 * Returns the element name parsed
6281 */
6282
6283xmlChar *
6284xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6285 xmlChar *name;
6286 xmlChar *attname;
6287 xmlChar *attvalue;
6288 const xmlChar **atts = NULL;
6289 int nbatts = 0;
6290 int maxatts = 0;
6291 int i;
6292
6293 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006294 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006295
6296 name = xmlParseName(ctxt);
6297 if (name == NULL) {
6298 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6300 ctxt->sax->error(ctxt->userData,
6301 "xmlParseStartTag: invalid element name\n");
6302 ctxt->wellFormed = 0;
6303 ctxt->disableSAX = 1;
6304 return(NULL);
6305 }
6306
6307 /*
6308 * Now parse the attributes, it ends up with the ending
6309 *
6310 * (S Attribute)* S?
6311 */
6312 SKIP_BLANKS;
6313 GROW;
6314
Daniel Veillard21a0f912001-02-25 19:54:14 +00006315 while ((RAW != '>') &&
6316 ((RAW != '/') || (NXT(1) != '>')) &&
6317 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006318 const xmlChar *q = CUR_PTR;
6319 int cons = ctxt->input->consumed;
6320
6321 attname = xmlParseAttribute(ctxt, &attvalue);
6322 if ((attname != NULL) && (attvalue != NULL)) {
6323 /*
6324 * [ WFC: Unique Att Spec ]
6325 * No attribute name may appear more than once in the same
6326 * start-tag or empty-element tag.
6327 */
6328 for (i = 0; i < nbatts;i += 2) {
6329 if (xmlStrEqual(atts[i], attname)) {
6330 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6332 ctxt->sax->error(ctxt->userData,
6333 "Attribute %s redefined\n",
6334 attname);
6335 ctxt->wellFormed = 0;
6336 ctxt->disableSAX = 1;
6337 xmlFree(attname);
6338 xmlFree(attvalue);
6339 goto failed;
6340 }
6341 }
6342
6343 /*
6344 * Add the pair to atts
6345 */
6346 if (atts == NULL) {
6347 maxatts = 10;
6348 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6349 if (atts == NULL) {
6350 xmlGenericError(xmlGenericErrorContext,
6351 "malloc of %ld byte failed\n",
6352 maxatts * (long)sizeof(xmlChar *));
6353 return(NULL);
6354 }
6355 } else if (nbatts + 4 > maxatts) {
6356 maxatts *= 2;
6357 atts = (const xmlChar **) xmlRealloc((void *) atts,
6358 maxatts * sizeof(xmlChar *));
6359 if (atts == NULL) {
6360 xmlGenericError(xmlGenericErrorContext,
6361 "realloc of %ld byte failed\n",
6362 maxatts * (long)sizeof(xmlChar *));
6363 return(NULL);
6364 }
6365 }
6366 atts[nbatts++] = attname;
6367 atts[nbatts++] = attvalue;
6368 atts[nbatts] = NULL;
6369 atts[nbatts + 1] = NULL;
6370 } else {
6371 if (attname != NULL)
6372 xmlFree(attname);
6373 if (attvalue != NULL)
6374 xmlFree(attvalue);
6375 }
6376
6377failed:
6378
6379 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6380 break;
6381 if (!IS_BLANK(RAW)) {
6382 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6384 ctxt->sax->error(ctxt->userData,
6385 "attributes construct error\n");
6386 ctxt->wellFormed = 0;
6387 ctxt->disableSAX = 1;
6388 }
6389 SKIP_BLANKS;
6390 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6391 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6393 ctxt->sax->error(ctxt->userData,
6394 "xmlParseStartTag: problem parsing attributes\n");
6395 ctxt->wellFormed = 0;
6396 ctxt->disableSAX = 1;
6397 break;
6398 }
6399 GROW;
6400 }
6401
6402 /*
6403 * SAX: Start of Element !
6404 */
6405 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6406 (!ctxt->disableSAX))
6407 ctxt->sax->startElement(ctxt->userData, name, atts);
6408
6409 if (atts != NULL) {
6410 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6411 xmlFree((void *) atts);
6412 }
6413 return(name);
6414}
6415
6416/**
6417 * xmlParseEndTag:
6418 * @ctxt: an XML parser context
6419 *
6420 * parse an end of tag
6421 *
6422 * [42] ETag ::= '</' Name S? '>'
6423 *
6424 * With namespace
6425 *
6426 * [NS 9] ETag ::= '</' QName S? '>'
6427 */
6428
6429void
6430xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6431 xmlChar *name;
6432 xmlChar *oldname;
6433
6434 GROW;
6435 if ((RAW != '<') || (NXT(1) != '/')) {
6436 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6437 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6438 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6439 ctxt->wellFormed = 0;
6440 ctxt->disableSAX = 1;
6441 return;
6442 }
6443 SKIP(2);
6444
6445 name = xmlParseName(ctxt);
6446
6447 /*
6448 * We should definitely be at the ending "S? '>'" part
6449 */
6450 GROW;
6451 SKIP_BLANKS;
6452 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6453 ctxt->errNo = XML_ERR_GT_REQUIRED;
6454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6455 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6456 ctxt->wellFormed = 0;
6457 ctxt->disableSAX = 1;
6458 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006459 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006460
6461 /*
6462 * [ WFC: Element Type Match ]
6463 * The Name in an element's end-tag must match the element type in the
6464 * start-tag.
6465 *
6466 */
6467 if ((name == NULL) || (ctxt->name == NULL) ||
6468 (!xmlStrEqual(name, ctxt->name))) {
6469 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6471 if ((name != NULL) && (ctxt->name != NULL)) {
6472 ctxt->sax->error(ctxt->userData,
6473 "Opening and ending tag mismatch: %s and %s\n",
6474 ctxt->name, name);
6475 } else if (ctxt->name != NULL) {
6476 ctxt->sax->error(ctxt->userData,
6477 "Ending tag eror for: %s\n", ctxt->name);
6478 } else {
6479 ctxt->sax->error(ctxt->userData,
6480 "Ending tag error: internal error ???\n");
6481 }
6482
6483 }
6484 ctxt->wellFormed = 0;
6485 ctxt->disableSAX = 1;
6486 }
6487
6488 /*
6489 * SAX: End of Tag
6490 */
6491 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6492 (!ctxt->disableSAX))
6493 ctxt->sax->endElement(ctxt->userData, name);
6494
6495 if (name != NULL)
6496 xmlFree(name);
6497 oldname = namePop(ctxt);
6498 spacePop(ctxt);
6499 if (oldname != NULL) {
6500#ifdef DEBUG_STACK
6501 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6502#endif
6503 xmlFree(oldname);
6504 }
6505 return;
6506}
6507
6508/**
6509 * xmlParseCDSect:
6510 * @ctxt: an XML parser context
6511 *
6512 * Parse escaped pure raw content.
6513 *
6514 * [18] CDSect ::= CDStart CData CDEnd
6515 *
6516 * [19] CDStart ::= '<![CDATA['
6517 *
6518 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6519 *
6520 * [21] CDEnd ::= ']]>'
6521 */
6522void
6523xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6524 xmlChar *buf = NULL;
6525 int len = 0;
6526 int size = XML_PARSER_BUFFER_SIZE;
6527 int r, rl;
6528 int s, sl;
6529 int cur, l;
6530 int count = 0;
6531
6532 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6533 (NXT(2) == '[') && (NXT(3) == 'C') &&
6534 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6535 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6536 (NXT(8) == '[')) {
6537 SKIP(9);
6538 } else
6539 return;
6540
6541 ctxt->instate = XML_PARSER_CDATA_SECTION;
6542 r = CUR_CHAR(rl);
6543 if (!IS_CHAR(r)) {
6544 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6546 ctxt->sax->error(ctxt->userData,
6547 "CData section not finished\n");
6548 ctxt->wellFormed = 0;
6549 ctxt->disableSAX = 1;
6550 ctxt->instate = XML_PARSER_CONTENT;
6551 return;
6552 }
6553 NEXTL(rl);
6554 s = CUR_CHAR(sl);
6555 if (!IS_CHAR(s)) {
6556 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6558 ctxt->sax->error(ctxt->userData,
6559 "CData section not finished\n");
6560 ctxt->wellFormed = 0;
6561 ctxt->disableSAX = 1;
6562 ctxt->instate = XML_PARSER_CONTENT;
6563 return;
6564 }
6565 NEXTL(sl);
6566 cur = CUR_CHAR(l);
6567 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6568 if (buf == NULL) {
6569 xmlGenericError(xmlGenericErrorContext,
6570 "malloc of %d byte failed\n", size);
6571 return;
6572 }
6573 while (IS_CHAR(cur) &&
6574 ((r != ']') || (s != ']') || (cur != '>'))) {
6575 if (len + 5 >= size) {
6576 size *= 2;
6577 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6578 if (buf == NULL) {
6579 xmlGenericError(xmlGenericErrorContext,
6580 "realloc of %d byte failed\n", size);
6581 return;
6582 }
6583 }
6584 COPY_BUF(rl,buf,len,r);
6585 r = s;
6586 rl = sl;
6587 s = cur;
6588 sl = l;
6589 count++;
6590 if (count > 50) {
6591 GROW;
6592 count = 0;
6593 }
6594 NEXTL(l);
6595 cur = CUR_CHAR(l);
6596 }
6597 buf[len] = 0;
6598 ctxt->instate = XML_PARSER_CONTENT;
6599 if (cur != '>') {
6600 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6602 ctxt->sax->error(ctxt->userData,
6603 "CData section not finished\n%.50s\n", buf);
6604 ctxt->wellFormed = 0;
6605 ctxt->disableSAX = 1;
6606 xmlFree(buf);
6607 return;
6608 }
6609 NEXTL(l);
6610
6611 /*
6612 * Ok the buffer is to be consumed as cdata.
6613 */
6614 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6615 if (ctxt->sax->cdataBlock != NULL)
6616 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006617 else if (ctxt->sax->characters != NULL)
6618 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006619 }
6620 xmlFree(buf);
6621}
6622
6623/**
6624 * xmlParseContent:
6625 * @ctxt: an XML parser context
6626 *
6627 * Parse a content:
6628 *
6629 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6630 */
6631
6632void
6633xmlParseContent(xmlParserCtxtPtr ctxt) {
6634 GROW;
6635 while (((RAW != 0) || (ctxt->token != 0)) &&
6636 ((RAW != '<') || (NXT(1) != '/'))) {
6637 const xmlChar *test = CUR_PTR;
6638 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006639 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006640 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006641
6642 /*
6643 * Handle possible processed charrefs.
6644 */
6645 if (ctxt->token != 0) {
6646 xmlParseCharData(ctxt, 0);
6647 }
6648 /*
6649 * First case : a Processing Instruction.
6650 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006651 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006652 xmlParsePI(ctxt);
6653 }
6654
6655 /*
6656 * Second case : a CDSection
6657 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006658 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006659 (NXT(2) == '[') && (NXT(3) == 'C') &&
6660 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6661 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6662 (NXT(8) == '[')) {
6663 xmlParseCDSect(ctxt);
6664 }
6665
6666 /*
6667 * Third case : a comment
6668 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006669 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006670 (NXT(2) == '-') && (NXT(3) == '-')) {
6671 xmlParseComment(ctxt);
6672 ctxt->instate = XML_PARSER_CONTENT;
6673 }
6674
6675 /*
6676 * Fourth case : a sub-element.
6677 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006678 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006679 xmlParseElement(ctxt);
6680 }
6681
6682 /*
6683 * Fifth case : a reference. If if has not been resolved,
6684 * parsing returns it's Name, create the node
6685 */
6686
Daniel Veillard21a0f912001-02-25 19:54:14 +00006687 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006688 xmlParseReference(ctxt);
6689 }
6690
6691 /*
6692 * Last case, text. Note that References are handled directly.
6693 */
6694 else {
6695 xmlParseCharData(ctxt, 0);
6696 }
6697
6698 GROW;
6699 /*
6700 * Pop-up of finished entities.
6701 */
6702 while ((RAW == 0) && (ctxt->inputNr > 1))
6703 xmlPopInput(ctxt);
6704 SHRINK;
6705
6706 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6707 (tok == ctxt->token)) {
6708 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6710 ctxt->sax->error(ctxt->userData,
6711 "detected an error in element content\n");
6712 ctxt->wellFormed = 0;
6713 ctxt->disableSAX = 1;
6714 ctxt->instate = XML_PARSER_EOF;
6715 break;
6716 }
6717 }
6718}
6719
6720/**
6721 * xmlParseElement:
6722 * @ctxt: an XML parser context
6723 *
6724 * parse an XML element, this is highly recursive
6725 *
6726 * [39] element ::= EmptyElemTag | STag content ETag
6727 *
6728 * [ WFC: Element Type Match ]
6729 * The Name in an element's end-tag must match the element type in the
6730 * start-tag.
6731 *
6732 * [ VC: Element Valid ]
6733 * An element is valid if there is a declaration matching elementdecl
6734 * where the Name matches the element type and one of the following holds:
6735 * - The declaration matches EMPTY and the element has no content.
6736 * - The declaration matches children and the sequence of child elements
6737 * belongs to the language generated by the regular expression in the
6738 * content model, with optional white space (characters matching the
6739 * nonterminal S) between each pair of child elements.
6740 * - The declaration matches Mixed and the content consists of character
6741 * data and child elements whose types match names in the content model.
6742 * - The declaration matches ANY, and the types of any child elements have
6743 * been declared.
6744 */
6745
6746void
6747xmlParseElement(xmlParserCtxtPtr ctxt) {
6748 const xmlChar *openTag = CUR_PTR;
6749 xmlChar *name;
6750 xmlChar *oldname;
6751 xmlParserNodeInfo node_info;
6752 xmlNodePtr ret;
6753
6754 /* Capture start position */
6755 if (ctxt->record_info) {
6756 node_info.begin_pos = ctxt->input->consumed +
6757 (CUR_PTR - ctxt->input->base);
6758 node_info.begin_line = ctxt->input->line;
6759 }
6760
6761 if (ctxt->spaceNr == 0)
6762 spacePush(ctxt, -1);
6763 else
6764 spacePush(ctxt, *ctxt->space);
6765
6766 name = xmlParseStartTag(ctxt);
6767 if (name == NULL) {
6768 spacePop(ctxt);
6769 return;
6770 }
6771 namePush(ctxt, name);
6772 ret = ctxt->node;
6773
6774 /*
6775 * [ VC: Root Element Type ]
6776 * The Name in the document type declaration must match the element
6777 * type of the root element.
6778 */
6779 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6780 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6781 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6782
6783 /*
6784 * Check for an Empty Element.
6785 */
6786 if ((RAW == '/') && (NXT(1) == '>')) {
6787 SKIP(2);
6788 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6789 (!ctxt->disableSAX))
6790 ctxt->sax->endElement(ctxt->userData, name);
6791 oldname = namePop(ctxt);
6792 spacePop(ctxt);
6793 if (oldname != NULL) {
6794#ifdef DEBUG_STACK
6795 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6796#endif
6797 xmlFree(oldname);
6798 }
6799 if ( ret != NULL && ctxt->record_info ) {
6800 node_info.end_pos = ctxt->input->consumed +
6801 (CUR_PTR - ctxt->input->base);
6802 node_info.end_line = ctxt->input->line;
6803 node_info.node = ret;
6804 xmlParserAddNodeInfo(ctxt, &node_info);
6805 }
6806 return;
6807 }
6808 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006809 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006810 } else {
6811 ctxt->errNo = XML_ERR_GT_REQUIRED;
6812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6813 ctxt->sax->error(ctxt->userData,
6814 "Couldn't find end of Start Tag\n%.30s\n",
6815 openTag);
6816 ctxt->wellFormed = 0;
6817 ctxt->disableSAX = 1;
6818
6819 /*
6820 * end of parsing of this node.
6821 */
6822 nodePop(ctxt);
6823 oldname = namePop(ctxt);
6824 spacePop(ctxt);
6825 if (oldname != NULL) {
6826#ifdef DEBUG_STACK
6827 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6828#endif
6829 xmlFree(oldname);
6830 }
6831
6832 /*
6833 * Capture end position and add node
6834 */
6835 if ( ret != NULL && ctxt->record_info ) {
6836 node_info.end_pos = ctxt->input->consumed +
6837 (CUR_PTR - ctxt->input->base);
6838 node_info.end_line = ctxt->input->line;
6839 node_info.node = ret;
6840 xmlParserAddNodeInfo(ctxt, &node_info);
6841 }
6842 return;
6843 }
6844
6845 /*
6846 * Parse the content of the element:
6847 */
6848 xmlParseContent(ctxt);
6849 if (!IS_CHAR(RAW)) {
6850 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6852 ctxt->sax->error(ctxt->userData,
6853 "Premature end of data in tag %.30s\n", openTag);
6854 ctxt->wellFormed = 0;
6855 ctxt->disableSAX = 1;
6856
6857 /*
6858 * end of parsing of this node.
6859 */
6860 nodePop(ctxt);
6861 oldname = namePop(ctxt);
6862 spacePop(ctxt);
6863 if (oldname != NULL) {
6864#ifdef DEBUG_STACK
6865 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6866#endif
6867 xmlFree(oldname);
6868 }
6869 return;
6870 }
6871
6872 /*
6873 * parse the end of tag: '</' should be here.
6874 */
6875 xmlParseEndTag(ctxt);
6876
6877 /*
6878 * Capture end position and add node
6879 */
6880 if ( ret != NULL && ctxt->record_info ) {
6881 node_info.end_pos = ctxt->input->consumed +
6882 (CUR_PTR - ctxt->input->base);
6883 node_info.end_line = ctxt->input->line;
6884 node_info.node = ret;
6885 xmlParserAddNodeInfo(ctxt, &node_info);
6886 }
6887}
6888
6889/**
6890 * xmlParseVersionNum:
6891 * @ctxt: an XML parser context
6892 *
6893 * parse the XML version value.
6894 *
6895 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6896 *
6897 * Returns the string giving the XML version number, or NULL
6898 */
6899xmlChar *
6900xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6901 xmlChar *buf = NULL;
6902 int len = 0;
6903 int size = 10;
6904 xmlChar cur;
6905
6906 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6907 if (buf == NULL) {
6908 xmlGenericError(xmlGenericErrorContext,
6909 "malloc of %d byte failed\n", size);
6910 return(NULL);
6911 }
6912 cur = CUR;
6913 while (((cur >= 'a') && (cur <= 'z')) ||
6914 ((cur >= 'A') && (cur <= 'Z')) ||
6915 ((cur >= '0') && (cur <= '9')) ||
6916 (cur == '_') || (cur == '.') ||
6917 (cur == ':') || (cur == '-')) {
6918 if (len + 1 >= size) {
6919 size *= 2;
6920 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6921 if (buf == NULL) {
6922 xmlGenericError(xmlGenericErrorContext,
6923 "realloc of %d byte failed\n", size);
6924 return(NULL);
6925 }
6926 }
6927 buf[len++] = cur;
6928 NEXT;
6929 cur=CUR;
6930 }
6931 buf[len] = 0;
6932 return(buf);
6933}
6934
6935/**
6936 * xmlParseVersionInfo:
6937 * @ctxt: an XML parser context
6938 *
6939 * parse the XML version.
6940 *
6941 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6942 *
6943 * [25] Eq ::= S? '=' S?
6944 *
6945 * Returns the version string, e.g. "1.0"
6946 */
6947
6948xmlChar *
6949xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6950 xmlChar *version = NULL;
6951 const xmlChar *q;
6952
6953 if ((RAW == 'v') && (NXT(1) == 'e') &&
6954 (NXT(2) == 'r') && (NXT(3) == 's') &&
6955 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6956 (NXT(6) == 'n')) {
6957 SKIP(7);
6958 SKIP_BLANKS;
6959 if (RAW != '=') {
6960 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6961 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6962 ctxt->sax->error(ctxt->userData,
6963 "xmlParseVersionInfo : expected '='\n");
6964 ctxt->wellFormed = 0;
6965 ctxt->disableSAX = 1;
6966 return(NULL);
6967 }
6968 NEXT;
6969 SKIP_BLANKS;
6970 if (RAW == '"') {
6971 NEXT;
6972 q = CUR_PTR;
6973 version = xmlParseVersionNum(ctxt);
6974 if (RAW != '"') {
6975 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6977 ctxt->sax->error(ctxt->userData,
6978 "String not closed\n%.50s\n", q);
6979 ctxt->wellFormed = 0;
6980 ctxt->disableSAX = 1;
6981 } else
6982 NEXT;
6983 } else if (RAW == '\''){
6984 NEXT;
6985 q = CUR_PTR;
6986 version = xmlParseVersionNum(ctxt);
6987 if (RAW != '\'') {
6988 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6990 ctxt->sax->error(ctxt->userData,
6991 "String not closed\n%.50s\n", q);
6992 ctxt->wellFormed = 0;
6993 ctxt->disableSAX = 1;
6994 } else
6995 NEXT;
6996 } else {
6997 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6999 ctxt->sax->error(ctxt->userData,
7000 "xmlParseVersionInfo : expected ' or \"\n");
7001 ctxt->wellFormed = 0;
7002 ctxt->disableSAX = 1;
7003 }
7004 }
7005 return(version);
7006}
7007
7008/**
7009 * xmlParseEncName:
7010 * @ctxt: an XML parser context
7011 *
7012 * parse the XML encoding name
7013 *
7014 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7015 *
7016 * Returns the encoding name value or NULL
7017 */
7018xmlChar *
7019xmlParseEncName(xmlParserCtxtPtr ctxt) {
7020 xmlChar *buf = NULL;
7021 int len = 0;
7022 int size = 10;
7023 xmlChar cur;
7024
7025 cur = CUR;
7026 if (((cur >= 'a') && (cur <= 'z')) ||
7027 ((cur >= 'A') && (cur <= 'Z'))) {
7028 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7029 if (buf == NULL) {
7030 xmlGenericError(xmlGenericErrorContext,
7031 "malloc of %d byte failed\n", size);
7032 return(NULL);
7033 }
7034
7035 buf[len++] = cur;
7036 NEXT;
7037 cur = CUR;
7038 while (((cur >= 'a') && (cur <= 'z')) ||
7039 ((cur >= 'A') && (cur <= 'Z')) ||
7040 ((cur >= '0') && (cur <= '9')) ||
7041 (cur == '.') || (cur == '_') ||
7042 (cur == '-')) {
7043 if (len + 1 >= size) {
7044 size *= 2;
7045 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7046 if (buf == NULL) {
7047 xmlGenericError(xmlGenericErrorContext,
7048 "realloc of %d byte failed\n", size);
7049 return(NULL);
7050 }
7051 }
7052 buf[len++] = cur;
7053 NEXT;
7054 cur = CUR;
7055 if (cur == 0) {
7056 SHRINK;
7057 GROW;
7058 cur = CUR;
7059 }
7060 }
7061 buf[len] = 0;
7062 } else {
7063 ctxt->errNo = XML_ERR_ENCODING_NAME;
7064 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7065 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7066 ctxt->wellFormed = 0;
7067 ctxt->disableSAX = 1;
7068 }
7069 return(buf);
7070}
7071
7072/**
7073 * xmlParseEncodingDecl:
7074 * @ctxt: an XML parser context
7075 *
7076 * parse the XML encoding declaration
7077 *
7078 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7079 *
7080 * this setups the conversion filters.
7081 *
7082 * Returns the encoding value or NULL
7083 */
7084
7085xmlChar *
7086xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7087 xmlChar *encoding = NULL;
7088 const xmlChar *q;
7089
7090 SKIP_BLANKS;
7091 if ((RAW == 'e') && (NXT(1) == 'n') &&
7092 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7093 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7094 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7095 SKIP(8);
7096 SKIP_BLANKS;
7097 if (RAW != '=') {
7098 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7100 ctxt->sax->error(ctxt->userData,
7101 "xmlParseEncodingDecl : expected '='\n");
7102 ctxt->wellFormed = 0;
7103 ctxt->disableSAX = 1;
7104 return(NULL);
7105 }
7106 NEXT;
7107 SKIP_BLANKS;
7108 if (RAW == '"') {
7109 NEXT;
7110 q = CUR_PTR;
7111 encoding = xmlParseEncName(ctxt);
7112 if (RAW != '"') {
7113 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7114 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7115 ctxt->sax->error(ctxt->userData,
7116 "String not closed\n%.50s\n", q);
7117 ctxt->wellFormed = 0;
7118 ctxt->disableSAX = 1;
7119 } else
7120 NEXT;
7121 } else if (RAW == '\''){
7122 NEXT;
7123 q = CUR_PTR;
7124 encoding = xmlParseEncName(ctxt);
7125 if (RAW != '\'') {
7126 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7128 ctxt->sax->error(ctxt->userData,
7129 "String not closed\n%.50s\n", q);
7130 ctxt->wellFormed = 0;
7131 ctxt->disableSAX = 1;
7132 } else
7133 NEXT;
7134 } else if (RAW == '"'){
7135 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7136 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7137 ctxt->sax->error(ctxt->userData,
7138 "xmlParseEncodingDecl : expected ' or \"\n");
7139 ctxt->wellFormed = 0;
7140 ctxt->disableSAX = 1;
7141 }
7142 if (encoding != NULL) {
7143 xmlCharEncoding enc;
7144 xmlCharEncodingHandlerPtr handler;
7145
7146 if (ctxt->input->encoding != NULL)
7147 xmlFree((xmlChar *) ctxt->input->encoding);
7148 ctxt->input->encoding = encoding;
7149
7150 enc = xmlParseCharEncoding((const char *) encoding);
7151 /*
7152 * registered set of known encodings
7153 */
7154 if (enc != XML_CHAR_ENCODING_ERROR) {
7155 xmlSwitchEncoding(ctxt, enc);
7156 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7157 xmlFree(encoding);
7158 return(NULL);
7159 }
7160 } else {
7161 /*
7162 * fallback for unknown encodings
7163 */
7164 handler = xmlFindCharEncodingHandler((const char *) encoding);
7165 if (handler != NULL) {
7166 xmlSwitchToEncoding(ctxt, handler);
7167 } else {
7168 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7169 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7170 ctxt->sax->error(ctxt->userData,
7171 "Unsupported encoding %s\n", encoding);
7172 return(NULL);
7173 }
7174 }
7175 }
7176 }
7177 return(encoding);
7178}
7179
7180/**
7181 * xmlParseSDDecl:
7182 * @ctxt: an XML parser context
7183 *
7184 * parse the XML standalone declaration
7185 *
7186 * [32] SDDecl ::= S 'standalone' Eq
7187 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7188 *
7189 * [ VC: Standalone Document Declaration ]
7190 * TODO The standalone document declaration must have the value "no"
7191 * if any external markup declarations contain declarations of:
7192 * - attributes with default values, if elements to which these
7193 * attributes apply appear in the document without specifications
7194 * of values for these attributes, or
7195 * - entities (other than amp, lt, gt, apos, quot), if references
7196 * to those entities appear in the document, or
7197 * - attributes with values subject to normalization, where the
7198 * attribute appears in the document with a value which will change
7199 * as a result of normalization, or
7200 * - element types with element content, if white space occurs directly
7201 * within any instance of those types.
7202 *
7203 * Returns 1 if standalone, 0 otherwise
7204 */
7205
7206int
7207xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7208 int standalone = -1;
7209
7210 SKIP_BLANKS;
7211 if ((RAW == 's') && (NXT(1) == 't') &&
7212 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7213 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7214 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7215 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7216 SKIP(10);
7217 SKIP_BLANKS;
7218 if (RAW != '=') {
7219 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7220 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7221 ctxt->sax->error(ctxt->userData,
7222 "XML standalone declaration : expected '='\n");
7223 ctxt->wellFormed = 0;
7224 ctxt->disableSAX = 1;
7225 return(standalone);
7226 }
7227 NEXT;
7228 SKIP_BLANKS;
7229 if (RAW == '\''){
7230 NEXT;
7231 if ((RAW == 'n') && (NXT(1) == 'o')) {
7232 standalone = 0;
7233 SKIP(2);
7234 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7235 (NXT(2) == 's')) {
7236 standalone = 1;
7237 SKIP(3);
7238 } else {
7239 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7241 ctxt->sax->error(ctxt->userData,
7242 "standalone accepts only 'yes' or 'no'\n");
7243 ctxt->wellFormed = 0;
7244 ctxt->disableSAX = 1;
7245 }
7246 if (RAW != '\'') {
7247 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7249 ctxt->sax->error(ctxt->userData, "String not closed\n");
7250 ctxt->wellFormed = 0;
7251 ctxt->disableSAX = 1;
7252 } else
7253 NEXT;
7254 } else if (RAW == '"'){
7255 NEXT;
7256 if ((RAW == 'n') && (NXT(1) == 'o')) {
7257 standalone = 0;
7258 SKIP(2);
7259 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7260 (NXT(2) == 's')) {
7261 standalone = 1;
7262 SKIP(3);
7263 } else {
7264 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7266 ctxt->sax->error(ctxt->userData,
7267 "standalone accepts only 'yes' or 'no'\n");
7268 ctxt->wellFormed = 0;
7269 ctxt->disableSAX = 1;
7270 }
7271 if (RAW != '"') {
7272 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7274 ctxt->sax->error(ctxt->userData, "String not closed\n");
7275 ctxt->wellFormed = 0;
7276 ctxt->disableSAX = 1;
7277 } else
7278 NEXT;
7279 } else {
7280 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7281 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7282 ctxt->sax->error(ctxt->userData,
7283 "Standalone value not found\n");
7284 ctxt->wellFormed = 0;
7285 ctxt->disableSAX = 1;
7286 }
7287 }
7288 return(standalone);
7289}
7290
7291/**
7292 * xmlParseXMLDecl:
7293 * @ctxt: an XML parser context
7294 *
7295 * parse an XML declaration header
7296 *
7297 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7298 */
7299
7300void
7301xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7302 xmlChar *version;
7303
7304 /*
7305 * We know that '<?xml' is here.
7306 */
7307 SKIP(5);
7308
7309 if (!IS_BLANK(RAW)) {
7310 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7311 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7312 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7313 ctxt->wellFormed = 0;
7314 ctxt->disableSAX = 1;
7315 }
7316 SKIP_BLANKS;
7317
7318 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007319 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007320 */
7321 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007322 if (version == NULL) {
7323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7324 ctxt->sax->error(ctxt->userData,
7325 "Malformed declaration expecting version\n");
7326 ctxt->wellFormed = 0;
7327 ctxt->disableSAX = 1;
7328 } else {
7329 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7330 /*
7331 * TODO: Blueberry should be detected here
7332 */
7333 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7334 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7335 version);
7336 }
7337 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007338 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007339 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007340 }
Owen Taylor3473f882001-02-23 17:55:21 +00007341
7342 /*
7343 * We may have the encoding declaration
7344 */
7345 if (!IS_BLANK(RAW)) {
7346 if ((RAW == '?') && (NXT(1) == '>')) {
7347 SKIP(2);
7348 return;
7349 }
7350 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7352 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7353 ctxt->wellFormed = 0;
7354 ctxt->disableSAX = 1;
7355 }
7356 xmlParseEncodingDecl(ctxt);
7357 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7358 /*
7359 * The XML REC instructs us to stop parsing right here
7360 */
7361 return;
7362 }
7363
7364 /*
7365 * We may have the standalone status.
7366 */
7367 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7368 if ((RAW == '?') && (NXT(1) == '>')) {
7369 SKIP(2);
7370 return;
7371 }
7372 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7374 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7375 ctxt->wellFormed = 0;
7376 ctxt->disableSAX = 1;
7377 }
7378 SKIP_BLANKS;
7379 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7380
7381 SKIP_BLANKS;
7382 if ((RAW == '?') && (NXT(1) == '>')) {
7383 SKIP(2);
7384 } else if (RAW == '>') {
7385 /* Deprecated old WD ... */
7386 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7388 ctxt->sax->error(ctxt->userData,
7389 "XML declaration must end-up with '?>'\n");
7390 ctxt->wellFormed = 0;
7391 ctxt->disableSAX = 1;
7392 NEXT;
7393 } else {
7394 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7395 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7396 ctxt->sax->error(ctxt->userData,
7397 "parsing XML declaration: '?>' expected\n");
7398 ctxt->wellFormed = 0;
7399 ctxt->disableSAX = 1;
7400 MOVETO_ENDTAG(CUR_PTR);
7401 NEXT;
7402 }
7403}
7404
7405/**
7406 * xmlParseMisc:
7407 * @ctxt: an XML parser context
7408 *
7409 * parse an XML Misc* optionnal field.
7410 *
7411 * [27] Misc ::= Comment | PI | S
7412 */
7413
7414void
7415xmlParseMisc(xmlParserCtxtPtr ctxt) {
7416 while (((RAW == '<') && (NXT(1) == '?')) ||
7417 ((RAW == '<') && (NXT(1) == '!') &&
7418 (NXT(2) == '-') && (NXT(3) == '-')) ||
7419 IS_BLANK(CUR)) {
7420 if ((RAW == '<') && (NXT(1) == '?')) {
7421 xmlParsePI(ctxt);
7422 } else if (IS_BLANK(CUR)) {
7423 NEXT;
7424 } else
7425 xmlParseComment(ctxt);
7426 }
7427}
7428
7429/**
7430 * xmlParseDocument:
7431 * @ctxt: an XML parser context
7432 *
7433 * parse an XML document (and build a tree if using the standard SAX
7434 * interface).
7435 *
7436 * [1] document ::= prolog element Misc*
7437 *
7438 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7439 *
7440 * Returns 0, -1 in case of error. the parser context is augmented
7441 * as a result of the parsing.
7442 */
7443
7444int
7445xmlParseDocument(xmlParserCtxtPtr ctxt) {
7446 xmlChar start[4];
7447 xmlCharEncoding enc;
7448
7449 xmlInitParser();
7450
7451 GROW;
7452
7453 /*
7454 * SAX: beginning of the document processing.
7455 */
7456 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7457 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7458
Daniel Veillard50f34372001-08-03 12:06:36 +00007459 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007460 /*
7461 * Get the 4 first bytes and decode the charset
7462 * if enc != XML_CHAR_ENCODING_NONE
7463 * plug some encoding conversion routines.
7464 */
7465 start[0] = RAW;
7466 start[1] = NXT(1);
7467 start[2] = NXT(2);
7468 start[3] = NXT(3);
7469 enc = xmlDetectCharEncoding(start, 4);
7470 if (enc != XML_CHAR_ENCODING_NONE) {
7471 xmlSwitchEncoding(ctxt, enc);
7472 }
Owen Taylor3473f882001-02-23 17:55:21 +00007473 }
7474
7475
7476 if (CUR == 0) {
7477 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7479 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7480 ctxt->wellFormed = 0;
7481 ctxt->disableSAX = 1;
7482 }
7483
7484 /*
7485 * Check for the XMLDecl in the Prolog.
7486 */
7487 GROW;
7488 if ((RAW == '<') && (NXT(1) == '?') &&
7489 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7490 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7491
7492 /*
7493 * Note that we will switch encoding on the fly.
7494 */
7495 xmlParseXMLDecl(ctxt);
7496 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7497 /*
7498 * The XML REC instructs us to stop parsing right here
7499 */
7500 return(-1);
7501 }
7502 ctxt->standalone = ctxt->input->standalone;
7503 SKIP_BLANKS;
7504 } else {
7505 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7506 }
7507 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7508 ctxt->sax->startDocument(ctxt->userData);
7509
7510 /*
7511 * The Misc part of the Prolog
7512 */
7513 GROW;
7514 xmlParseMisc(ctxt);
7515
7516 /*
7517 * Then possibly doc type declaration(s) and more Misc
7518 * (doctypedecl Misc*)?
7519 */
7520 GROW;
7521 if ((RAW == '<') && (NXT(1) == '!') &&
7522 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7523 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7524 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7525 (NXT(8) == 'E')) {
7526
7527 ctxt->inSubset = 1;
7528 xmlParseDocTypeDecl(ctxt);
7529 if (RAW == '[') {
7530 ctxt->instate = XML_PARSER_DTD;
7531 xmlParseInternalSubset(ctxt);
7532 }
7533
7534 /*
7535 * Create and update the external subset.
7536 */
7537 ctxt->inSubset = 2;
7538 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7539 (!ctxt->disableSAX))
7540 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7541 ctxt->extSubSystem, ctxt->extSubURI);
7542 ctxt->inSubset = 0;
7543
7544
7545 ctxt->instate = XML_PARSER_PROLOG;
7546 xmlParseMisc(ctxt);
7547 }
7548
7549 /*
7550 * Time to start parsing the tree itself
7551 */
7552 GROW;
7553 if (RAW != '<') {
7554 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7556 ctxt->sax->error(ctxt->userData,
7557 "Start tag expected, '<' not found\n");
7558 ctxt->wellFormed = 0;
7559 ctxt->disableSAX = 1;
7560 ctxt->instate = XML_PARSER_EOF;
7561 } else {
7562 ctxt->instate = XML_PARSER_CONTENT;
7563 xmlParseElement(ctxt);
7564 ctxt->instate = XML_PARSER_EPILOG;
7565
7566
7567 /*
7568 * The Misc part at the end
7569 */
7570 xmlParseMisc(ctxt);
7571
7572 if (RAW != 0) {
7573 ctxt->errNo = XML_ERR_DOCUMENT_END;
7574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7575 ctxt->sax->error(ctxt->userData,
7576 "Extra content at the end of the document\n");
7577 ctxt->wellFormed = 0;
7578 ctxt->disableSAX = 1;
7579 }
7580 ctxt->instate = XML_PARSER_EOF;
7581 }
7582
7583 /*
7584 * SAX: end of the document processing.
7585 */
7586 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7587 (!ctxt->disableSAX))
7588 ctxt->sax->endDocument(ctxt->userData);
7589
7590 if (! ctxt->wellFormed) return(-1);
7591 return(0);
7592}
7593
7594/**
7595 * xmlParseExtParsedEnt:
7596 * @ctxt: an XML parser context
7597 *
7598 * parse a genreral parsed entity
7599 * An external general parsed entity is well-formed if it matches the
7600 * production labeled extParsedEnt.
7601 *
7602 * [78] extParsedEnt ::= TextDecl? content
7603 *
7604 * Returns 0, -1 in case of error. the parser context is augmented
7605 * as a result of the parsing.
7606 */
7607
7608int
7609xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7610 xmlChar start[4];
7611 xmlCharEncoding enc;
7612
7613 xmlDefaultSAXHandlerInit();
7614
7615 GROW;
7616
7617 /*
7618 * SAX: beginning of the document processing.
7619 */
7620 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7621 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7622
7623 /*
7624 * Get the 4 first bytes and decode the charset
7625 * if enc != XML_CHAR_ENCODING_NONE
7626 * plug some encoding conversion routines.
7627 */
7628 start[0] = RAW;
7629 start[1] = NXT(1);
7630 start[2] = NXT(2);
7631 start[3] = NXT(3);
7632 enc = xmlDetectCharEncoding(start, 4);
7633 if (enc != XML_CHAR_ENCODING_NONE) {
7634 xmlSwitchEncoding(ctxt, enc);
7635 }
7636
7637
7638 if (CUR == 0) {
7639 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7641 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7642 ctxt->wellFormed = 0;
7643 ctxt->disableSAX = 1;
7644 }
7645
7646 /*
7647 * Check for the XMLDecl in the Prolog.
7648 */
7649 GROW;
7650 if ((RAW == '<') && (NXT(1) == '?') &&
7651 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7652 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7653
7654 /*
7655 * Note that we will switch encoding on the fly.
7656 */
7657 xmlParseXMLDecl(ctxt);
7658 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7659 /*
7660 * The XML REC instructs us to stop parsing right here
7661 */
7662 return(-1);
7663 }
7664 SKIP_BLANKS;
7665 } else {
7666 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7667 }
7668 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7669 ctxt->sax->startDocument(ctxt->userData);
7670
7671 /*
7672 * Doing validity checking on chunk doesn't make sense
7673 */
7674 ctxt->instate = XML_PARSER_CONTENT;
7675 ctxt->validate = 0;
7676 ctxt->loadsubset = 0;
7677 ctxt->depth = 0;
7678
7679 xmlParseContent(ctxt);
7680
7681 if ((RAW == '<') && (NXT(1) == '/')) {
7682 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7684 ctxt->sax->error(ctxt->userData,
7685 "chunk is not well balanced\n");
7686 ctxt->wellFormed = 0;
7687 ctxt->disableSAX = 1;
7688 } else if (RAW != 0) {
7689 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7690 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7691 ctxt->sax->error(ctxt->userData,
7692 "extra content at the end of well balanced chunk\n");
7693 ctxt->wellFormed = 0;
7694 ctxt->disableSAX = 1;
7695 }
7696
7697 /*
7698 * SAX: end of the document processing.
7699 */
7700 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7701 (!ctxt->disableSAX))
7702 ctxt->sax->endDocument(ctxt->userData);
7703
7704 if (! ctxt->wellFormed) return(-1);
7705 return(0);
7706}
7707
7708/************************************************************************
7709 * *
7710 * Progressive parsing interfaces *
7711 * *
7712 ************************************************************************/
7713
7714/**
7715 * xmlParseLookupSequence:
7716 * @ctxt: an XML parser context
7717 * @first: the first char to lookup
7718 * @next: the next char to lookup or zero
7719 * @third: the next char to lookup or zero
7720 *
7721 * Try to find if a sequence (first, next, third) or just (first next) or
7722 * (first) is available in the input stream.
7723 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7724 * to avoid rescanning sequences of bytes, it DOES change the state of the
7725 * parser, do not use liberally.
7726 *
7727 * Returns the index to the current parsing point if the full sequence
7728 * is available, -1 otherwise.
7729 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007730static int
Owen Taylor3473f882001-02-23 17:55:21 +00007731xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7732 xmlChar next, xmlChar third) {
7733 int base, len;
7734 xmlParserInputPtr in;
7735 const xmlChar *buf;
7736
7737 in = ctxt->input;
7738 if (in == NULL) return(-1);
7739 base = in->cur - in->base;
7740 if (base < 0) return(-1);
7741 if (ctxt->checkIndex > base)
7742 base = ctxt->checkIndex;
7743 if (in->buf == NULL) {
7744 buf = in->base;
7745 len = in->length;
7746 } else {
7747 buf = in->buf->buffer->content;
7748 len = in->buf->buffer->use;
7749 }
7750 /* take into account the sequence length */
7751 if (third) len -= 2;
7752 else if (next) len --;
7753 for (;base < len;base++) {
7754 if (buf[base] == first) {
7755 if (third != 0) {
7756 if ((buf[base + 1] != next) ||
7757 (buf[base + 2] != third)) continue;
7758 } else if (next != 0) {
7759 if (buf[base + 1] != next) continue;
7760 }
7761 ctxt->checkIndex = 0;
7762#ifdef DEBUG_PUSH
7763 if (next == 0)
7764 xmlGenericError(xmlGenericErrorContext,
7765 "PP: lookup '%c' found at %d\n",
7766 first, base);
7767 else if (third == 0)
7768 xmlGenericError(xmlGenericErrorContext,
7769 "PP: lookup '%c%c' found at %d\n",
7770 first, next, base);
7771 else
7772 xmlGenericError(xmlGenericErrorContext,
7773 "PP: lookup '%c%c%c' found at %d\n",
7774 first, next, third, base);
7775#endif
7776 return(base - (in->cur - in->base));
7777 }
7778 }
7779 ctxt->checkIndex = base;
7780#ifdef DEBUG_PUSH
7781 if (next == 0)
7782 xmlGenericError(xmlGenericErrorContext,
7783 "PP: lookup '%c' failed\n", first);
7784 else if (third == 0)
7785 xmlGenericError(xmlGenericErrorContext,
7786 "PP: lookup '%c%c' failed\n", first, next);
7787 else
7788 xmlGenericError(xmlGenericErrorContext,
7789 "PP: lookup '%c%c%c' failed\n", first, next, third);
7790#endif
7791 return(-1);
7792}
7793
7794/**
7795 * xmlParseTryOrFinish:
7796 * @ctxt: an XML parser context
7797 * @terminate: last chunk indicator
7798 *
7799 * Try to progress on parsing
7800 *
7801 * Returns zero if no parsing was possible
7802 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007803static int
Owen Taylor3473f882001-02-23 17:55:21 +00007804xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7805 int ret = 0;
7806 int avail;
7807 xmlChar cur, next;
7808
7809#ifdef DEBUG_PUSH
7810 switch (ctxt->instate) {
7811 case XML_PARSER_EOF:
7812 xmlGenericError(xmlGenericErrorContext,
7813 "PP: try EOF\n"); break;
7814 case XML_PARSER_START:
7815 xmlGenericError(xmlGenericErrorContext,
7816 "PP: try START\n"); break;
7817 case XML_PARSER_MISC:
7818 xmlGenericError(xmlGenericErrorContext,
7819 "PP: try MISC\n");break;
7820 case XML_PARSER_COMMENT:
7821 xmlGenericError(xmlGenericErrorContext,
7822 "PP: try COMMENT\n");break;
7823 case XML_PARSER_PROLOG:
7824 xmlGenericError(xmlGenericErrorContext,
7825 "PP: try PROLOG\n");break;
7826 case XML_PARSER_START_TAG:
7827 xmlGenericError(xmlGenericErrorContext,
7828 "PP: try START_TAG\n");break;
7829 case XML_PARSER_CONTENT:
7830 xmlGenericError(xmlGenericErrorContext,
7831 "PP: try CONTENT\n");break;
7832 case XML_PARSER_CDATA_SECTION:
7833 xmlGenericError(xmlGenericErrorContext,
7834 "PP: try CDATA_SECTION\n");break;
7835 case XML_PARSER_END_TAG:
7836 xmlGenericError(xmlGenericErrorContext,
7837 "PP: try END_TAG\n");break;
7838 case XML_PARSER_ENTITY_DECL:
7839 xmlGenericError(xmlGenericErrorContext,
7840 "PP: try ENTITY_DECL\n");break;
7841 case XML_PARSER_ENTITY_VALUE:
7842 xmlGenericError(xmlGenericErrorContext,
7843 "PP: try ENTITY_VALUE\n");break;
7844 case XML_PARSER_ATTRIBUTE_VALUE:
7845 xmlGenericError(xmlGenericErrorContext,
7846 "PP: try ATTRIBUTE_VALUE\n");break;
7847 case XML_PARSER_DTD:
7848 xmlGenericError(xmlGenericErrorContext,
7849 "PP: try DTD\n");break;
7850 case XML_PARSER_EPILOG:
7851 xmlGenericError(xmlGenericErrorContext,
7852 "PP: try EPILOG\n");break;
7853 case XML_PARSER_PI:
7854 xmlGenericError(xmlGenericErrorContext,
7855 "PP: try PI\n");break;
7856 case XML_PARSER_IGNORE:
7857 xmlGenericError(xmlGenericErrorContext,
7858 "PP: try IGNORE\n");break;
7859 }
7860#endif
7861
7862 while (1) {
7863 /*
7864 * Pop-up of finished entities.
7865 */
7866 while ((RAW == 0) && (ctxt->inputNr > 1))
7867 xmlPopInput(ctxt);
7868
7869 if (ctxt->input ==NULL) break;
7870 if (ctxt->input->buf == NULL)
7871 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7872 else
7873 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7874 if (avail < 1)
7875 goto done;
7876 switch (ctxt->instate) {
7877 case XML_PARSER_EOF:
7878 /*
7879 * Document parsing is done !
7880 */
7881 goto done;
7882 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007883 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7884 xmlChar start[4];
7885 xmlCharEncoding enc;
7886
7887 /*
7888 * Very first chars read from the document flow.
7889 */
7890 if (avail < 4)
7891 goto done;
7892
7893 /*
7894 * Get the 4 first bytes and decode the charset
7895 * if enc != XML_CHAR_ENCODING_NONE
7896 * plug some encoding conversion routines.
7897 */
7898 start[0] = RAW;
7899 start[1] = NXT(1);
7900 start[2] = NXT(2);
7901 start[3] = NXT(3);
7902 enc = xmlDetectCharEncoding(start, 4);
7903 if (enc != XML_CHAR_ENCODING_NONE) {
7904 xmlSwitchEncoding(ctxt, enc);
7905 }
7906 break;
7907 }
Owen Taylor3473f882001-02-23 17:55:21 +00007908
7909 cur = ctxt->input->cur[0];
7910 next = ctxt->input->cur[1];
7911 if (cur == 0) {
7912 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7913 ctxt->sax->setDocumentLocator(ctxt->userData,
7914 &xmlDefaultSAXLocator);
7915 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7917 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7918 ctxt->wellFormed = 0;
7919 ctxt->disableSAX = 1;
7920 ctxt->instate = XML_PARSER_EOF;
7921#ifdef DEBUG_PUSH
7922 xmlGenericError(xmlGenericErrorContext,
7923 "PP: entering EOF\n");
7924#endif
7925 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7926 ctxt->sax->endDocument(ctxt->userData);
7927 goto done;
7928 }
7929 if ((cur == '<') && (next == '?')) {
7930 /* PI or XML decl */
7931 if (avail < 5) return(ret);
7932 if ((!terminate) &&
7933 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7934 return(ret);
7935 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7936 ctxt->sax->setDocumentLocator(ctxt->userData,
7937 &xmlDefaultSAXLocator);
7938 if ((ctxt->input->cur[2] == 'x') &&
7939 (ctxt->input->cur[3] == 'm') &&
7940 (ctxt->input->cur[4] == 'l') &&
7941 (IS_BLANK(ctxt->input->cur[5]))) {
7942 ret += 5;
7943#ifdef DEBUG_PUSH
7944 xmlGenericError(xmlGenericErrorContext,
7945 "PP: Parsing XML Decl\n");
7946#endif
7947 xmlParseXMLDecl(ctxt);
7948 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7949 /*
7950 * The XML REC instructs us to stop parsing right
7951 * here
7952 */
7953 ctxt->instate = XML_PARSER_EOF;
7954 return(0);
7955 }
7956 ctxt->standalone = ctxt->input->standalone;
7957 if ((ctxt->encoding == NULL) &&
7958 (ctxt->input->encoding != NULL))
7959 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7960 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7961 (!ctxt->disableSAX))
7962 ctxt->sax->startDocument(ctxt->userData);
7963 ctxt->instate = XML_PARSER_MISC;
7964#ifdef DEBUG_PUSH
7965 xmlGenericError(xmlGenericErrorContext,
7966 "PP: entering MISC\n");
7967#endif
7968 } else {
7969 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7970 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7971 (!ctxt->disableSAX))
7972 ctxt->sax->startDocument(ctxt->userData);
7973 ctxt->instate = XML_PARSER_MISC;
7974#ifdef DEBUG_PUSH
7975 xmlGenericError(xmlGenericErrorContext,
7976 "PP: entering MISC\n");
7977#endif
7978 }
7979 } else {
7980 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7981 ctxt->sax->setDocumentLocator(ctxt->userData,
7982 &xmlDefaultSAXLocator);
7983 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7984 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7985 (!ctxt->disableSAX))
7986 ctxt->sax->startDocument(ctxt->userData);
7987 ctxt->instate = XML_PARSER_MISC;
7988#ifdef DEBUG_PUSH
7989 xmlGenericError(xmlGenericErrorContext,
7990 "PP: entering MISC\n");
7991#endif
7992 }
7993 break;
7994 case XML_PARSER_MISC:
7995 SKIP_BLANKS;
7996 if (ctxt->input->buf == NULL)
7997 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7998 else
7999 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8000 if (avail < 2)
8001 goto done;
8002 cur = ctxt->input->cur[0];
8003 next = ctxt->input->cur[1];
8004 if ((cur == '<') && (next == '?')) {
8005 if ((!terminate) &&
8006 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8007 goto done;
8008#ifdef DEBUG_PUSH
8009 xmlGenericError(xmlGenericErrorContext,
8010 "PP: Parsing PI\n");
8011#endif
8012 xmlParsePI(ctxt);
8013 } else if ((cur == '<') && (next == '!') &&
8014 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8015 if ((!terminate) &&
8016 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8017 goto done;
8018#ifdef DEBUG_PUSH
8019 xmlGenericError(xmlGenericErrorContext,
8020 "PP: Parsing Comment\n");
8021#endif
8022 xmlParseComment(ctxt);
8023 ctxt->instate = XML_PARSER_MISC;
8024 } else if ((cur == '<') && (next == '!') &&
8025 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8026 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8027 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8028 (ctxt->input->cur[8] == 'E')) {
8029 if ((!terminate) &&
8030 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8031 goto done;
8032#ifdef DEBUG_PUSH
8033 xmlGenericError(xmlGenericErrorContext,
8034 "PP: Parsing internal subset\n");
8035#endif
8036 ctxt->inSubset = 1;
8037 xmlParseDocTypeDecl(ctxt);
8038 if (RAW == '[') {
8039 ctxt->instate = XML_PARSER_DTD;
8040#ifdef DEBUG_PUSH
8041 xmlGenericError(xmlGenericErrorContext,
8042 "PP: entering DTD\n");
8043#endif
8044 } else {
8045 /*
8046 * Create and update the external subset.
8047 */
8048 ctxt->inSubset = 2;
8049 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8050 (ctxt->sax->externalSubset != NULL))
8051 ctxt->sax->externalSubset(ctxt->userData,
8052 ctxt->intSubName, ctxt->extSubSystem,
8053 ctxt->extSubURI);
8054 ctxt->inSubset = 0;
8055 ctxt->instate = XML_PARSER_PROLOG;
8056#ifdef DEBUG_PUSH
8057 xmlGenericError(xmlGenericErrorContext,
8058 "PP: entering PROLOG\n");
8059#endif
8060 }
8061 } else if ((cur == '<') && (next == '!') &&
8062 (avail < 9)) {
8063 goto done;
8064 } else {
8065 ctxt->instate = XML_PARSER_START_TAG;
8066#ifdef DEBUG_PUSH
8067 xmlGenericError(xmlGenericErrorContext,
8068 "PP: entering START_TAG\n");
8069#endif
8070 }
8071 break;
8072 case XML_PARSER_IGNORE:
8073 xmlGenericError(xmlGenericErrorContext,
8074 "PP: internal error, state == IGNORE");
8075 ctxt->instate = XML_PARSER_DTD;
8076#ifdef DEBUG_PUSH
8077 xmlGenericError(xmlGenericErrorContext,
8078 "PP: entering DTD\n");
8079#endif
8080 break;
8081 case XML_PARSER_PROLOG:
8082 SKIP_BLANKS;
8083 if (ctxt->input->buf == NULL)
8084 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8085 else
8086 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8087 if (avail < 2)
8088 goto done;
8089 cur = ctxt->input->cur[0];
8090 next = ctxt->input->cur[1];
8091 if ((cur == '<') && (next == '?')) {
8092 if ((!terminate) &&
8093 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8094 goto done;
8095#ifdef DEBUG_PUSH
8096 xmlGenericError(xmlGenericErrorContext,
8097 "PP: Parsing PI\n");
8098#endif
8099 xmlParsePI(ctxt);
8100 } else if ((cur == '<') && (next == '!') &&
8101 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8102 if ((!terminate) &&
8103 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8104 goto done;
8105#ifdef DEBUG_PUSH
8106 xmlGenericError(xmlGenericErrorContext,
8107 "PP: Parsing Comment\n");
8108#endif
8109 xmlParseComment(ctxt);
8110 ctxt->instate = XML_PARSER_PROLOG;
8111 } else if ((cur == '<') && (next == '!') &&
8112 (avail < 4)) {
8113 goto done;
8114 } else {
8115 ctxt->instate = XML_PARSER_START_TAG;
8116#ifdef DEBUG_PUSH
8117 xmlGenericError(xmlGenericErrorContext,
8118 "PP: entering START_TAG\n");
8119#endif
8120 }
8121 break;
8122 case XML_PARSER_EPILOG:
8123 SKIP_BLANKS;
8124 if (ctxt->input->buf == NULL)
8125 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8126 else
8127 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8128 if (avail < 2)
8129 goto done;
8130 cur = ctxt->input->cur[0];
8131 next = ctxt->input->cur[1];
8132 if ((cur == '<') && (next == '?')) {
8133 if ((!terminate) &&
8134 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8135 goto done;
8136#ifdef DEBUG_PUSH
8137 xmlGenericError(xmlGenericErrorContext,
8138 "PP: Parsing PI\n");
8139#endif
8140 xmlParsePI(ctxt);
8141 ctxt->instate = XML_PARSER_EPILOG;
8142 } else if ((cur == '<') && (next == '!') &&
8143 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8144 if ((!terminate) &&
8145 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8146 goto done;
8147#ifdef DEBUG_PUSH
8148 xmlGenericError(xmlGenericErrorContext,
8149 "PP: Parsing Comment\n");
8150#endif
8151 xmlParseComment(ctxt);
8152 ctxt->instate = XML_PARSER_EPILOG;
8153 } else if ((cur == '<') && (next == '!') &&
8154 (avail < 4)) {
8155 goto done;
8156 } else {
8157 ctxt->errNo = XML_ERR_DOCUMENT_END;
8158 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8159 ctxt->sax->error(ctxt->userData,
8160 "Extra content at the end of the document\n");
8161 ctxt->wellFormed = 0;
8162 ctxt->disableSAX = 1;
8163 ctxt->instate = XML_PARSER_EOF;
8164#ifdef DEBUG_PUSH
8165 xmlGenericError(xmlGenericErrorContext,
8166 "PP: entering EOF\n");
8167#endif
8168 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8169 (!ctxt->disableSAX))
8170 ctxt->sax->endDocument(ctxt->userData);
8171 goto done;
8172 }
8173 break;
8174 case XML_PARSER_START_TAG: {
8175 xmlChar *name, *oldname;
8176
8177 if ((avail < 2) && (ctxt->inputNr == 1))
8178 goto done;
8179 cur = ctxt->input->cur[0];
8180 if (cur != '<') {
8181 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8183 ctxt->sax->error(ctxt->userData,
8184 "Start tag expect, '<' not found\n");
8185 ctxt->wellFormed = 0;
8186 ctxt->disableSAX = 1;
8187 ctxt->instate = XML_PARSER_EOF;
8188#ifdef DEBUG_PUSH
8189 xmlGenericError(xmlGenericErrorContext,
8190 "PP: entering EOF\n");
8191#endif
8192 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8193 (!ctxt->disableSAX))
8194 ctxt->sax->endDocument(ctxt->userData);
8195 goto done;
8196 }
8197 if ((!terminate) &&
8198 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8199 goto done;
8200 if (ctxt->spaceNr == 0)
8201 spacePush(ctxt, -1);
8202 else
8203 spacePush(ctxt, *ctxt->space);
8204 name = xmlParseStartTag(ctxt);
8205 if (name == NULL) {
8206 spacePop(ctxt);
8207 ctxt->instate = XML_PARSER_EOF;
8208#ifdef DEBUG_PUSH
8209 xmlGenericError(xmlGenericErrorContext,
8210 "PP: entering EOF\n");
8211#endif
8212 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8213 (!ctxt->disableSAX))
8214 ctxt->sax->endDocument(ctxt->userData);
8215 goto done;
8216 }
8217 namePush(ctxt, xmlStrdup(name));
8218
8219 /*
8220 * [ VC: Root Element Type ]
8221 * The Name in the document type declaration must match
8222 * the element type of the root element.
8223 */
8224 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8225 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8226 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8227
8228 /*
8229 * Check for an Empty Element.
8230 */
8231 if ((RAW == '/') && (NXT(1) == '>')) {
8232 SKIP(2);
8233 if ((ctxt->sax != NULL) &&
8234 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8235 ctxt->sax->endElement(ctxt->userData, name);
8236 xmlFree(name);
8237 oldname = namePop(ctxt);
8238 spacePop(ctxt);
8239 if (oldname != NULL) {
8240#ifdef DEBUG_STACK
8241 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8242#endif
8243 xmlFree(oldname);
8244 }
8245 if (ctxt->name == NULL) {
8246 ctxt->instate = XML_PARSER_EPILOG;
8247#ifdef DEBUG_PUSH
8248 xmlGenericError(xmlGenericErrorContext,
8249 "PP: entering EPILOG\n");
8250#endif
8251 } else {
8252 ctxt->instate = XML_PARSER_CONTENT;
8253#ifdef DEBUG_PUSH
8254 xmlGenericError(xmlGenericErrorContext,
8255 "PP: entering CONTENT\n");
8256#endif
8257 }
8258 break;
8259 }
8260 if (RAW == '>') {
8261 NEXT;
8262 } else {
8263 ctxt->errNo = XML_ERR_GT_REQUIRED;
8264 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8265 ctxt->sax->error(ctxt->userData,
8266 "Couldn't find end of Start Tag %s\n",
8267 name);
8268 ctxt->wellFormed = 0;
8269 ctxt->disableSAX = 1;
8270
8271 /*
8272 * end of parsing of this node.
8273 */
8274 nodePop(ctxt);
8275 oldname = namePop(ctxt);
8276 spacePop(ctxt);
8277 if (oldname != NULL) {
8278#ifdef DEBUG_STACK
8279 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8280#endif
8281 xmlFree(oldname);
8282 }
8283 }
8284 xmlFree(name);
8285 ctxt->instate = XML_PARSER_CONTENT;
8286#ifdef DEBUG_PUSH
8287 xmlGenericError(xmlGenericErrorContext,
8288 "PP: entering CONTENT\n");
8289#endif
8290 break;
8291 }
8292 case XML_PARSER_CONTENT: {
8293 const xmlChar *test;
8294 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008295 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008296
8297 /*
8298 * Handle preparsed entities and charRef
8299 */
8300 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008301 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008302
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008303 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008304 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8305 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008306 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008307 ctxt->token = 0;
8308 }
8309 if ((avail < 2) && (ctxt->inputNr == 1))
8310 goto done;
8311 cur = ctxt->input->cur[0];
8312 next = ctxt->input->cur[1];
8313
8314 test = CUR_PTR;
8315 cons = ctxt->input->consumed;
8316 tok = ctxt->token;
8317 if ((cur == '<') && (next == '?')) {
8318 if ((!terminate) &&
8319 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8320 goto done;
8321#ifdef DEBUG_PUSH
8322 xmlGenericError(xmlGenericErrorContext,
8323 "PP: Parsing PI\n");
8324#endif
8325 xmlParsePI(ctxt);
8326 } else if ((cur == '<') && (next == '!') &&
8327 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8328 if ((!terminate) &&
8329 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8330 goto done;
8331#ifdef DEBUG_PUSH
8332 xmlGenericError(xmlGenericErrorContext,
8333 "PP: Parsing Comment\n");
8334#endif
8335 xmlParseComment(ctxt);
8336 ctxt->instate = XML_PARSER_CONTENT;
8337 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8338 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8339 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8340 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8341 (ctxt->input->cur[8] == '[')) {
8342 SKIP(9);
8343 ctxt->instate = XML_PARSER_CDATA_SECTION;
8344#ifdef DEBUG_PUSH
8345 xmlGenericError(xmlGenericErrorContext,
8346 "PP: entering CDATA_SECTION\n");
8347#endif
8348 break;
8349 } else if ((cur == '<') && (next == '!') &&
8350 (avail < 9)) {
8351 goto done;
8352 } else if ((cur == '<') && (next == '/')) {
8353 ctxt->instate = XML_PARSER_END_TAG;
8354#ifdef DEBUG_PUSH
8355 xmlGenericError(xmlGenericErrorContext,
8356 "PP: entering END_TAG\n");
8357#endif
8358 break;
8359 } else if (cur == '<') {
8360 ctxt->instate = XML_PARSER_START_TAG;
8361#ifdef DEBUG_PUSH
8362 xmlGenericError(xmlGenericErrorContext,
8363 "PP: entering START_TAG\n");
8364#endif
8365 break;
8366 } else if (cur == '&') {
8367 if ((!terminate) &&
8368 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8369 goto done;
8370#ifdef DEBUG_PUSH
8371 xmlGenericError(xmlGenericErrorContext,
8372 "PP: Parsing Reference\n");
8373#endif
8374 xmlParseReference(ctxt);
8375 } else {
8376 /* TODO Avoid the extra copy, handle directly !!! */
8377 /*
8378 * Goal of the following test is:
8379 * - minimize calls to the SAX 'character' callback
8380 * when they are mergeable
8381 * - handle an problem for isBlank when we only parse
8382 * a sequence of blank chars and the next one is
8383 * not available to check against '<' presence.
8384 * - tries to homogenize the differences in SAX
8385 * callbacks beween the push and pull versions
8386 * of the parser.
8387 */
8388 if ((ctxt->inputNr == 1) &&
8389 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8390 if ((!terminate) &&
8391 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8392 goto done;
8393 }
8394 ctxt->checkIndex = 0;
8395#ifdef DEBUG_PUSH
8396 xmlGenericError(xmlGenericErrorContext,
8397 "PP: Parsing char data\n");
8398#endif
8399 xmlParseCharData(ctxt, 0);
8400 }
8401 /*
8402 * Pop-up of finished entities.
8403 */
8404 while ((RAW == 0) && (ctxt->inputNr > 1))
8405 xmlPopInput(ctxt);
8406 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8407 (tok == ctxt->token)) {
8408 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8410 ctxt->sax->error(ctxt->userData,
8411 "detected an error in element content\n");
8412 ctxt->wellFormed = 0;
8413 ctxt->disableSAX = 1;
8414 ctxt->instate = XML_PARSER_EOF;
8415 break;
8416 }
8417 break;
8418 }
8419 case XML_PARSER_CDATA_SECTION: {
8420 /*
8421 * The Push mode need to have the SAX callback for
8422 * cdataBlock merge back contiguous callbacks.
8423 */
8424 int base;
8425
8426 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8427 if (base < 0) {
8428 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8429 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8430 if (ctxt->sax->cdataBlock != NULL)
8431 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8432 XML_PARSER_BIG_BUFFER_SIZE);
8433 }
8434 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8435 ctxt->checkIndex = 0;
8436 }
8437 goto done;
8438 } else {
8439 if ((ctxt->sax != NULL) && (base > 0) &&
8440 (!ctxt->disableSAX)) {
8441 if (ctxt->sax->cdataBlock != NULL)
8442 ctxt->sax->cdataBlock(ctxt->userData,
8443 ctxt->input->cur, base);
8444 }
8445 SKIP(base + 3);
8446 ctxt->checkIndex = 0;
8447 ctxt->instate = XML_PARSER_CONTENT;
8448#ifdef DEBUG_PUSH
8449 xmlGenericError(xmlGenericErrorContext,
8450 "PP: entering CONTENT\n");
8451#endif
8452 }
8453 break;
8454 }
8455 case XML_PARSER_END_TAG:
8456 if (avail < 2)
8457 goto done;
8458 if ((!terminate) &&
8459 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8460 goto done;
8461 xmlParseEndTag(ctxt);
8462 if (ctxt->name == NULL) {
8463 ctxt->instate = XML_PARSER_EPILOG;
8464#ifdef DEBUG_PUSH
8465 xmlGenericError(xmlGenericErrorContext,
8466 "PP: entering EPILOG\n");
8467#endif
8468 } else {
8469 ctxt->instate = XML_PARSER_CONTENT;
8470#ifdef DEBUG_PUSH
8471 xmlGenericError(xmlGenericErrorContext,
8472 "PP: entering CONTENT\n");
8473#endif
8474 }
8475 break;
8476 case XML_PARSER_DTD: {
8477 /*
8478 * Sorry but progressive parsing of the internal subset
8479 * is not expected to be supported. We first check that
8480 * the full content of the internal subset is available and
8481 * the parsing is launched only at that point.
8482 * Internal subset ends up with "']' S? '>'" in an unescaped
8483 * section and not in a ']]>' sequence which are conditional
8484 * sections (whoever argued to keep that crap in XML deserve
8485 * a place in hell !).
8486 */
8487 int base, i;
8488 xmlChar *buf;
8489 xmlChar quote = 0;
8490
8491 base = ctxt->input->cur - ctxt->input->base;
8492 if (base < 0) return(0);
8493 if (ctxt->checkIndex > base)
8494 base = ctxt->checkIndex;
8495 buf = ctxt->input->buf->buffer->content;
8496 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8497 base++) {
8498 if (quote != 0) {
8499 if (buf[base] == quote)
8500 quote = 0;
8501 continue;
8502 }
8503 if (buf[base] == '"') {
8504 quote = '"';
8505 continue;
8506 }
8507 if (buf[base] == '\'') {
8508 quote = '\'';
8509 continue;
8510 }
8511 if (buf[base] == ']') {
8512 if ((unsigned int) base +1 >=
8513 ctxt->input->buf->buffer->use)
8514 break;
8515 if (buf[base + 1] == ']') {
8516 /* conditional crap, skip both ']' ! */
8517 base++;
8518 continue;
8519 }
8520 for (i = 0;
8521 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8522 i++) {
8523 if (buf[base + i] == '>')
8524 goto found_end_int_subset;
8525 }
8526 break;
8527 }
8528 }
8529 /*
8530 * We didn't found the end of the Internal subset
8531 */
8532 if (quote == 0)
8533 ctxt->checkIndex = base;
8534#ifdef DEBUG_PUSH
8535 if (next == 0)
8536 xmlGenericError(xmlGenericErrorContext,
8537 "PP: lookup of int subset end filed\n");
8538#endif
8539 goto done;
8540
8541found_end_int_subset:
8542 xmlParseInternalSubset(ctxt);
8543 ctxt->inSubset = 2;
8544 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8545 (ctxt->sax->externalSubset != NULL))
8546 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8547 ctxt->extSubSystem, ctxt->extSubURI);
8548 ctxt->inSubset = 0;
8549 ctxt->instate = XML_PARSER_PROLOG;
8550 ctxt->checkIndex = 0;
8551#ifdef DEBUG_PUSH
8552 xmlGenericError(xmlGenericErrorContext,
8553 "PP: entering PROLOG\n");
8554#endif
8555 break;
8556 }
8557 case XML_PARSER_COMMENT:
8558 xmlGenericError(xmlGenericErrorContext,
8559 "PP: internal error, state == COMMENT\n");
8560 ctxt->instate = XML_PARSER_CONTENT;
8561#ifdef DEBUG_PUSH
8562 xmlGenericError(xmlGenericErrorContext,
8563 "PP: entering CONTENT\n");
8564#endif
8565 break;
8566 case XML_PARSER_PI:
8567 xmlGenericError(xmlGenericErrorContext,
8568 "PP: internal error, state == PI\n");
8569 ctxt->instate = XML_PARSER_CONTENT;
8570#ifdef DEBUG_PUSH
8571 xmlGenericError(xmlGenericErrorContext,
8572 "PP: entering CONTENT\n");
8573#endif
8574 break;
8575 case XML_PARSER_ENTITY_DECL:
8576 xmlGenericError(xmlGenericErrorContext,
8577 "PP: internal error, state == ENTITY_DECL\n");
8578 ctxt->instate = XML_PARSER_DTD;
8579#ifdef DEBUG_PUSH
8580 xmlGenericError(xmlGenericErrorContext,
8581 "PP: entering DTD\n");
8582#endif
8583 break;
8584 case XML_PARSER_ENTITY_VALUE:
8585 xmlGenericError(xmlGenericErrorContext,
8586 "PP: internal error, state == ENTITY_VALUE\n");
8587 ctxt->instate = XML_PARSER_CONTENT;
8588#ifdef DEBUG_PUSH
8589 xmlGenericError(xmlGenericErrorContext,
8590 "PP: entering DTD\n");
8591#endif
8592 break;
8593 case XML_PARSER_ATTRIBUTE_VALUE:
8594 xmlGenericError(xmlGenericErrorContext,
8595 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8596 ctxt->instate = XML_PARSER_START_TAG;
8597#ifdef DEBUG_PUSH
8598 xmlGenericError(xmlGenericErrorContext,
8599 "PP: entering START_TAG\n");
8600#endif
8601 break;
8602 case XML_PARSER_SYSTEM_LITERAL:
8603 xmlGenericError(xmlGenericErrorContext,
8604 "PP: internal error, state == SYSTEM_LITERAL\n");
8605 ctxt->instate = XML_PARSER_START_TAG;
8606#ifdef DEBUG_PUSH
8607 xmlGenericError(xmlGenericErrorContext,
8608 "PP: entering START_TAG\n");
8609#endif
8610 break;
8611 }
8612 }
8613done:
8614#ifdef DEBUG_PUSH
8615 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8616#endif
8617 return(ret);
8618}
8619
8620/**
Owen Taylor3473f882001-02-23 17:55:21 +00008621 * xmlParseChunk:
8622 * @ctxt: an XML parser context
8623 * @chunk: an char array
8624 * @size: the size in byte of the chunk
8625 * @terminate: last chunk indicator
8626 *
8627 * Parse a Chunk of memory
8628 *
8629 * Returns zero if no error, the xmlParserErrors otherwise.
8630 */
8631int
8632xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8633 int terminate) {
8634 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8635 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8636 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8637 int cur = ctxt->input->cur - ctxt->input->base;
8638
8639 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8640 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8641 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008642 ctxt->input->end =
8643 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008644#ifdef DEBUG_PUSH
8645 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8646#endif
8647
8648 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8649 xmlParseTryOrFinish(ctxt, terminate);
8650 } else if (ctxt->instate != XML_PARSER_EOF) {
8651 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8652 xmlParserInputBufferPtr in = ctxt->input->buf;
8653 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8654 (in->raw != NULL)) {
8655 int nbchars;
8656
8657 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8658 if (nbchars < 0) {
8659 xmlGenericError(xmlGenericErrorContext,
8660 "xmlParseChunk: encoder error\n");
8661 return(XML_ERR_INVALID_ENCODING);
8662 }
8663 }
8664 }
8665 }
8666 xmlParseTryOrFinish(ctxt, terminate);
8667 if (terminate) {
8668 /*
8669 * Check for termination
8670 */
8671 if ((ctxt->instate != XML_PARSER_EOF) &&
8672 (ctxt->instate != XML_PARSER_EPILOG)) {
8673 ctxt->errNo = XML_ERR_DOCUMENT_END;
8674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8675 ctxt->sax->error(ctxt->userData,
8676 "Extra content at the end of the document\n");
8677 ctxt->wellFormed = 0;
8678 ctxt->disableSAX = 1;
8679 }
8680 if (ctxt->instate != XML_PARSER_EOF) {
8681 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8682 (!ctxt->disableSAX))
8683 ctxt->sax->endDocument(ctxt->userData);
8684 }
8685 ctxt->instate = XML_PARSER_EOF;
8686 }
8687 return((xmlParserErrors) ctxt->errNo);
8688}
8689
8690/************************************************************************
8691 * *
8692 * I/O front end functions to the parser *
8693 * *
8694 ************************************************************************/
8695
8696/**
8697 * xmlStopParser:
8698 * @ctxt: an XML parser context
8699 *
8700 * Blocks further parser processing
8701 */
8702void
8703xmlStopParser(xmlParserCtxtPtr ctxt) {
8704 ctxt->instate = XML_PARSER_EOF;
8705 if (ctxt->input != NULL)
8706 ctxt->input->cur = BAD_CAST"";
8707}
8708
8709/**
8710 * xmlCreatePushParserCtxt:
8711 * @sax: a SAX handler
8712 * @user_data: The user data returned on SAX callbacks
8713 * @chunk: a pointer to an array of chars
8714 * @size: number of chars in the array
8715 * @filename: an optional file name or URI
8716 *
8717 * Create a parser context for using the XML parser in push mode
8718 * To allow content encoding detection, @size should be >= 4
8719 * The value of @filename is used for fetching external entities
8720 * and error/warning reports.
8721 *
8722 * Returns the new parser context or NULL
8723 */
8724xmlParserCtxtPtr
8725xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8726 const char *chunk, int size, const char *filename) {
8727 xmlParserCtxtPtr ctxt;
8728 xmlParserInputPtr inputStream;
8729 xmlParserInputBufferPtr buf;
8730 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8731
8732 /*
8733 * plug some encoding conversion routines
8734 */
8735 if ((chunk != NULL) && (size >= 4))
8736 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8737
8738 buf = xmlAllocParserInputBuffer(enc);
8739 if (buf == NULL) return(NULL);
8740
8741 ctxt = xmlNewParserCtxt();
8742 if (ctxt == NULL) {
8743 xmlFree(buf);
8744 return(NULL);
8745 }
8746 if (sax != NULL) {
8747 if (ctxt->sax != &xmlDefaultSAXHandler)
8748 xmlFree(ctxt->sax);
8749 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8750 if (ctxt->sax == NULL) {
8751 xmlFree(buf);
8752 xmlFree(ctxt);
8753 return(NULL);
8754 }
8755 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8756 if (user_data != NULL)
8757 ctxt->userData = user_data;
8758 }
8759 if (filename == NULL) {
8760 ctxt->directory = NULL;
8761 } else {
8762 ctxt->directory = xmlParserGetDirectory(filename);
8763 }
8764
8765 inputStream = xmlNewInputStream(ctxt);
8766 if (inputStream == NULL) {
8767 xmlFreeParserCtxt(ctxt);
8768 return(NULL);
8769 }
8770
8771 if (filename == NULL)
8772 inputStream->filename = NULL;
8773 else
8774 inputStream->filename = xmlMemStrdup(filename);
8775 inputStream->buf = buf;
8776 inputStream->base = inputStream->buf->buffer->content;
8777 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008778 inputStream->end =
8779 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008780
8781 inputPush(ctxt, inputStream);
8782
8783 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8784 (ctxt->input->buf != NULL)) {
8785 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8786#ifdef DEBUG_PUSH
8787 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8788#endif
8789 }
8790
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008791 if (enc != XML_CHAR_ENCODING_NONE) {
8792 xmlSwitchEncoding(ctxt, enc);
8793 }
8794
Owen Taylor3473f882001-02-23 17:55:21 +00008795 return(ctxt);
8796}
8797
8798/**
8799 * xmlCreateIOParserCtxt:
8800 * @sax: a SAX handler
8801 * @user_data: The user data returned on SAX callbacks
8802 * @ioread: an I/O read function
8803 * @ioclose: an I/O close function
8804 * @ioctx: an I/O handler
8805 * @enc: the charset encoding if known
8806 *
8807 * Create a parser context for using the XML parser with an existing
8808 * I/O stream
8809 *
8810 * Returns the new parser context or NULL
8811 */
8812xmlParserCtxtPtr
8813xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8814 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8815 void *ioctx, xmlCharEncoding enc) {
8816 xmlParserCtxtPtr ctxt;
8817 xmlParserInputPtr inputStream;
8818 xmlParserInputBufferPtr buf;
8819
8820 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8821 if (buf == NULL) return(NULL);
8822
8823 ctxt = xmlNewParserCtxt();
8824 if (ctxt == NULL) {
8825 xmlFree(buf);
8826 return(NULL);
8827 }
8828 if (sax != NULL) {
8829 if (ctxt->sax != &xmlDefaultSAXHandler)
8830 xmlFree(ctxt->sax);
8831 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8832 if (ctxt->sax == NULL) {
8833 xmlFree(buf);
8834 xmlFree(ctxt);
8835 return(NULL);
8836 }
8837 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8838 if (user_data != NULL)
8839 ctxt->userData = user_data;
8840 }
8841
8842 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8843 if (inputStream == NULL) {
8844 xmlFreeParserCtxt(ctxt);
8845 return(NULL);
8846 }
8847 inputPush(ctxt, inputStream);
8848
8849 return(ctxt);
8850}
8851
8852/************************************************************************
8853 * *
8854 * Front ends when parsing a Dtd *
8855 * *
8856 ************************************************************************/
8857
8858/**
8859 * xmlIOParseDTD:
8860 * @sax: the SAX handler block or NULL
8861 * @input: an Input Buffer
8862 * @enc: the charset encoding if known
8863 *
8864 * Load and parse a DTD
8865 *
8866 * Returns the resulting xmlDtdPtr or NULL in case of error.
8867 * @input will be freed at parsing end.
8868 */
8869
8870xmlDtdPtr
8871xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8872 xmlCharEncoding enc) {
8873 xmlDtdPtr ret = NULL;
8874 xmlParserCtxtPtr ctxt;
8875 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008876 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008877
8878 if (input == NULL)
8879 return(NULL);
8880
8881 ctxt = xmlNewParserCtxt();
8882 if (ctxt == NULL) {
8883 return(NULL);
8884 }
8885
8886 /*
8887 * Set-up the SAX context
8888 */
8889 if (sax != NULL) {
8890 if (ctxt->sax != NULL)
8891 xmlFree(ctxt->sax);
8892 ctxt->sax = sax;
8893 ctxt->userData = NULL;
8894 }
8895
8896 /*
8897 * generate a parser input from the I/O handler
8898 */
8899
8900 pinput = xmlNewIOInputStream(ctxt, input, enc);
8901 if (pinput == NULL) {
8902 if (sax != NULL) ctxt->sax = NULL;
8903 xmlFreeParserCtxt(ctxt);
8904 return(NULL);
8905 }
8906
8907 /*
8908 * plug some encoding conversion routines here.
8909 */
8910 xmlPushInput(ctxt, pinput);
8911
8912 pinput->filename = NULL;
8913 pinput->line = 1;
8914 pinput->col = 1;
8915 pinput->base = ctxt->input->cur;
8916 pinput->cur = ctxt->input->cur;
8917 pinput->free = NULL;
8918
8919 /*
8920 * let's parse that entity knowing it's an external subset.
8921 */
8922 ctxt->inSubset = 2;
8923 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8924 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8925 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008926
8927 if (enc == XML_CHAR_ENCODING_NONE) {
8928 /*
8929 * Get the 4 first bytes and decode the charset
8930 * if enc != XML_CHAR_ENCODING_NONE
8931 * plug some encoding conversion routines.
8932 */
8933 start[0] = RAW;
8934 start[1] = NXT(1);
8935 start[2] = NXT(2);
8936 start[3] = NXT(3);
8937 enc = xmlDetectCharEncoding(start, 4);
8938 if (enc != XML_CHAR_ENCODING_NONE) {
8939 xmlSwitchEncoding(ctxt, enc);
8940 }
8941 }
8942
Owen Taylor3473f882001-02-23 17:55:21 +00008943 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8944
8945 if (ctxt->myDoc != NULL) {
8946 if (ctxt->wellFormed) {
8947 ret = ctxt->myDoc->extSubset;
8948 ctxt->myDoc->extSubset = NULL;
8949 } else {
8950 ret = NULL;
8951 }
8952 xmlFreeDoc(ctxt->myDoc);
8953 ctxt->myDoc = NULL;
8954 }
8955 if (sax != NULL) ctxt->sax = NULL;
8956 xmlFreeParserCtxt(ctxt);
8957
8958 return(ret);
8959}
8960
8961/**
8962 * xmlSAXParseDTD:
8963 * @sax: the SAX handler block
8964 * @ExternalID: a NAME* containing the External ID of the DTD
8965 * @SystemID: a NAME* containing the URL to the DTD
8966 *
8967 * Load and parse an external subset.
8968 *
8969 * Returns the resulting xmlDtdPtr or NULL in case of error.
8970 */
8971
8972xmlDtdPtr
8973xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8974 const xmlChar *SystemID) {
8975 xmlDtdPtr ret = NULL;
8976 xmlParserCtxtPtr ctxt;
8977 xmlParserInputPtr input = NULL;
8978 xmlCharEncoding enc;
8979
8980 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8981
8982 ctxt = xmlNewParserCtxt();
8983 if (ctxt == NULL) {
8984 return(NULL);
8985 }
8986
8987 /*
8988 * Set-up the SAX context
8989 */
8990 if (sax != NULL) {
8991 if (ctxt->sax != NULL)
8992 xmlFree(ctxt->sax);
8993 ctxt->sax = sax;
8994 ctxt->userData = NULL;
8995 }
8996
8997 /*
8998 * Ask the Entity resolver to load the damn thing
8999 */
9000
9001 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9002 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9003 if (input == NULL) {
9004 if (sax != NULL) ctxt->sax = NULL;
9005 xmlFreeParserCtxt(ctxt);
9006 return(NULL);
9007 }
9008
9009 /*
9010 * plug some encoding conversion routines here.
9011 */
9012 xmlPushInput(ctxt, input);
9013 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9014 xmlSwitchEncoding(ctxt, enc);
9015
9016 if (input->filename == NULL)
9017 input->filename = (char *) xmlStrdup(SystemID);
9018 input->line = 1;
9019 input->col = 1;
9020 input->base = ctxt->input->cur;
9021 input->cur = ctxt->input->cur;
9022 input->free = NULL;
9023
9024 /*
9025 * let's parse that entity knowing it's an external subset.
9026 */
9027 ctxt->inSubset = 2;
9028 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9029 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9030 ExternalID, SystemID);
9031 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9032
9033 if (ctxt->myDoc != NULL) {
9034 if (ctxt->wellFormed) {
9035 ret = ctxt->myDoc->extSubset;
9036 ctxt->myDoc->extSubset = NULL;
9037 } else {
9038 ret = NULL;
9039 }
9040 xmlFreeDoc(ctxt->myDoc);
9041 ctxt->myDoc = NULL;
9042 }
9043 if (sax != NULL) ctxt->sax = NULL;
9044 xmlFreeParserCtxt(ctxt);
9045
9046 return(ret);
9047}
9048
9049/**
9050 * xmlParseDTD:
9051 * @ExternalID: a NAME* containing the External ID of the DTD
9052 * @SystemID: a NAME* containing the URL to the DTD
9053 *
9054 * Load and parse an external subset.
9055 *
9056 * Returns the resulting xmlDtdPtr or NULL in case of error.
9057 */
9058
9059xmlDtdPtr
9060xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9061 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9062}
9063
9064/************************************************************************
9065 * *
9066 * Front ends when parsing an Entity *
9067 * *
9068 ************************************************************************/
9069
9070/**
Owen Taylor3473f882001-02-23 17:55:21 +00009071 * xmlParseCtxtExternalEntity:
9072 * @ctx: the existing parsing context
9073 * @URL: the URL for the entity to load
9074 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009075 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009076 *
9077 * Parse an external general entity within an existing parsing context
9078 * An external general parsed entity is well-formed if it matches the
9079 * production labeled extParsedEnt.
9080 *
9081 * [78] extParsedEnt ::= TextDecl? content
9082 *
9083 * Returns 0 if the entity is well formed, -1 in case of args problem and
9084 * the parser error code otherwise
9085 */
9086
9087int
9088xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009089 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009090 xmlParserCtxtPtr ctxt;
9091 xmlDocPtr newDoc;
9092 xmlSAXHandlerPtr oldsax = NULL;
9093 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009094 xmlChar start[4];
9095 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009096
9097 if (ctx->depth > 40) {
9098 return(XML_ERR_ENTITY_LOOP);
9099 }
9100
Daniel Veillardcda96922001-08-21 10:56:31 +00009101 if (lst != NULL)
9102 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009103 if ((URL == NULL) && (ID == NULL))
9104 return(-1);
9105 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9106 return(-1);
9107
9108
9109 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9110 if (ctxt == NULL) return(-1);
9111 ctxt->userData = ctxt;
9112 oldsax = ctxt->sax;
9113 ctxt->sax = ctx->sax;
9114 newDoc = xmlNewDoc(BAD_CAST "1.0");
9115 if (newDoc == NULL) {
9116 xmlFreeParserCtxt(ctxt);
9117 return(-1);
9118 }
9119 if (ctx->myDoc != NULL) {
9120 newDoc->intSubset = ctx->myDoc->intSubset;
9121 newDoc->extSubset = ctx->myDoc->extSubset;
9122 }
9123 if (ctx->myDoc->URL != NULL) {
9124 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9125 }
9126 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9127 if (newDoc->children == NULL) {
9128 ctxt->sax = oldsax;
9129 xmlFreeParserCtxt(ctxt);
9130 newDoc->intSubset = NULL;
9131 newDoc->extSubset = NULL;
9132 xmlFreeDoc(newDoc);
9133 return(-1);
9134 }
9135 nodePush(ctxt, newDoc->children);
9136 if (ctx->myDoc == NULL) {
9137 ctxt->myDoc = newDoc;
9138 } else {
9139 ctxt->myDoc = ctx->myDoc;
9140 newDoc->children->doc = ctx->myDoc;
9141 }
9142
Daniel Veillard87a764e2001-06-20 17:41:10 +00009143 /*
9144 * Get the 4 first bytes and decode the charset
9145 * if enc != XML_CHAR_ENCODING_NONE
9146 * plug some encoding conversion routines.
9147 */
9148 GROW
9149 start[0] = RAW;
9150 start[1] = NXT(1);
9151 start[2] = NXT(2);
9152 start[3] = NXT(3);
9153 enc = xmlDetectCharEncoding(start, 4);
9154 if (enc != XML_CHAR_ENCODING_NONE) {
9155 xmlSwitchEncoding(ctxt, enc);
9156 }
9157
Owen Taylor3473f882001-02-23 17:55:21 +00009158 /*
9159 * Parse a possible text declaration first
9160 */
Owen Taylor3473f882001-02-23 17:55:21 +00009161 if ((RAW == '<') && (NXT(1) == '?') &&
9162 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9163 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9164 xmlParseTextDecl(ctxt);
9165 }
9166
9167 /*
9168 * Doing validity checking on chunk doesn't make sense
9169 */
9170 ctxt->instate = XML_PARSER_CONTENT;
9171 ctxt->validate = ctx->validate;
9172 ctxt->loadsubset = ctx->loadsubset;
9173 ctxt->depth = ctx->depth + 1;
9174 ctxt->replaceEntities = ctx->replaceEntities;
9175 if (ctxt->validate) {
9176 ctxt->vctxt.error = ctx->vctxt.error;
9177 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009178 } else {
9179 ctxt->vctxt.error = NULL;
9180 ctxt->vctxt.warning = NULL;
9181 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009182 ctxt->vctxt.nodeTab = NULL;
9183 ctxt->vctxt.nodeNr = 0;
9184 ctxt->vctxt.nodeMax = 0;
9185 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009186
9187 xmlParseContent(ctxt);
9188
9189 if ((RAW == '<') && (NXT(1) == '/')) {
9190 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9191 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9192 ctxt->sax->error(ctxt->userData,
9193 "chunk is not well balanced\n");
9194 ctxt->wellFormed = 0;
9195 ctxt->disableSAX = 1;
9196 } else if (RAW != 0) {
9197 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9199 ctxt->sax->error(ctxt->userData,
9200 "extra content at the end of well balanced chunk\n");
9201 ctxt->wellFormed = 0;
9202 ctxt->disableSAX = 1;
9203 }
9204 if (ctxt->node != newDoc->children) {
9205 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9206 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9207 ctxt->sax->error(ctxt->userData,
9208 "chunk is not well balanced\n");
9209 ctxt->wellFormed = 0;
9210 ctxt->disableSAX = 1;
9211 }
9212
9213 if (!ctxt->wellFormed) {
9214 if (ctxt->errNo == 0)
9215 ret = 1;
9216 else
9217 ret = ctxt->errNo;
9218 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009219 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009220 xmlNodePtr cur;
9221
9222 /*
9223 * Return the newly created nodeset after unlinking it from
9224 * they pseudo parent.
9225 */
9226 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009227 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009228 while (cur != NULL) {
9229 cur->parent = NULL;
9230 cur = cur->next;
9231 }
9232 newDoc->children->children = NULL;
9233 }
9234 ret = 0;
9235 }
9236 ctxt->sax = oldsax;
9237 xmlFreeParserCtxt(ctxt);
9238 newDoc->intSubset = NULL;
9239 newDoc->extSubset = NULL;
9240 xmlFreeDoc(newDoc);
9241
9242 return(ret);
9243}
9244
9245/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009246 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009247 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009248 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009249 * @sax: the SAX handler bloc (possibly NULL)
9250 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9251 * @depth: Used for loop detection, use 0
9252 * @URL: the URL for the entity to load
9253 * @ID: the System ID for the entity to load
9254 * @list: the return value for the set of parsed nodes
9255 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009256 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009257 *
9258 * Returns 0 if the entity is well formed, -1 in case of args problem and
9259 * the parser error code otherwise
9260 */
9261
Daniel Veillard257d9102001-05-08 10:41:44 +00009262static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009263xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9264 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009265 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009266 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009267 xmlParserCtxtPtr ctxt;
9268 xmlDocPtr newDoc;
9269 xmlSAXHandlerPtr oldsax = NULL;
9270 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009271 xmlChar start[4];
9272 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009273
9274 if (depth > 40) {
9275 return(XML_ERR_ENTITY_LOOP);
9276 }
9277
9278
9279
9280 if (list != NULL)
9281 *list = NULL;
9282 if ((URL == NULL) && (ID == NULL))
9283 return(-1);
9284 if (doc == NULL) /* @@ relax but check for dereferences */
9285 return(-1);
9286
9287
9288 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9289 if (ctxt == NULL) return(-1);
9290 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009291 if (oldctxt != NULL) {
9292 ctxt->_private = oldctxt->_private;
9293 ctxt->loadsubset = oldctxt->loadsubset;
9294 ctxt->validate = oldctxt->validate;
9295 ctxt->external = oldctxt->external;
9296 } else {
9297 /*
9298 * Doing validity checking on chunk without context
9299 * doesn't make sense
9300 */
9301 ctxt->_private = NULL;
9302 ctxt->validate = 0;
9303 ctxt->external = 2;
9304 ctxt->loadsubset = 0;
9305 }
Owen Taylor3473f882001-02-23 17:55:21 +00009306 if (sax != NULL) {
9307 oldsax = ctxt->sax;
9308 ctxt->sax = sax;
9309 if (user_data != NULL)
9310 ctxt->userData = user_data;
9311 }
9312 newDoc = xmlNewDoc(BAD_CAST "1.0");
9313 if (newDoc == NULL) {
9314 xmlFreeParserCtxt(ctxt);
9315 return(-1);
9316 }
9317 if (doc != NULL) {
9318 newDoc->intSubset = doc->intSubset;
9319 newDoc->extSubset = doc->extSubset;
9320 }
9321 if (doc->URL != NULL) {
9322 newDoc->URL = xmlStrdup(doc->URL);
9323 }
9324 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9325 if (newDoc->children == NULL) {
9326 if (sax != NULL)
9327 ctxt->sax = oldsax;
9328 xmlFreeParserCtxt(ctxt);
9329 newDoc->intSubset = NULL;
9330 newDoc->extSubset = NULL;
9331 xmlFreeDoc(newDoc);
9332 return(-1);
9333 }
9334 nodePush(ctxt, newDoc->children);
9335 if (doc == NULL) {
9336 ctxt->myDoc = newDoc;
9337 } else {
9338 ctxt->myDoc = doc;
9339 newDoc->children->doc = doc;
9340 }
9341
Daniel Veillard87a764e2001-06-20 17:41:10 +00009342 /*
9343 * Get the 4 first bytes and decode the charset
9344 * if enc != XML_CHAR_ENCODING_NONE
9345 * plug some encoding conversion routines.
9346 */
9347 GROW;
9348 start[0] = RAW;
9349 start[1] = NXT(1);
9350 start[2] = NXT(2);
9351 start[3] = NXT(3);
9352 enc = xmlDetectCharEncoding(start, 4);
9353 if (enc != XML_CHAR_ENCODING_NONE) {
9354 xmlSwitchEncoding(ctxt, enc);
9355 }
9356
Owen Taylor3473f882001-02-23 17:55:21 +00009357 /*
9358 * Parse a possible text declaration first
9359 */
Owen Taylor3473f882001-02-23 17:55:21 +00009360 if ((RAW == '<') && (NXT(1) == '?') &&
9361 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9362 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9363 xmlParseTextDecl(ctxt);
9364 }
9365
Owen Taylor3473f882001-02-23 17:55:21 +00009366 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009367 ctxt->depth = depth;
9368
9369 xmlParseContent(ctxt);
9370
9371 if ((RAW == '<') && (NXT(1) == '/')) {
9372 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9374 ctxt->sax->error(ctxt->userData,
9375 "chunk is not well balanced\n");
9376 ctxt->wellFormed = 0;
9377 ctxt->disableSAX = 1;
9378 } else if (RAW != 0) {
9379 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9381 ctxt->sax->error(ctxt->userData,
9382 "extra content at the end of well balanced chunk\n");
9383 ctxt->wellFormed = 0;
9384 ctxt->disableSAX = 1;
9385 }
9386 if (ctxt->node != newDoc->children) {
9387 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9389 ctxt->sax->error(ctxt->userData,
9390 "chunk is not well balanced\n");
9391 ctxt->wellFormed = 0;
9392 ctxt->disableSAX = 1;
9393 }
9394
9395 if (!ctxt->wellFormed) {
9396 if (ctxt->errNo == 0)
9397 ret = 1;
9398 else
9399 ret = ctxt->errNo;
9400 } else {
9401 if (list != NULL) {
9402 xmlNodePtr cur;
9403
9404 /*
9405 * Return the newly created nodeset after unlinking it from
9406 * they pseudo parent.
9407 */
9408 cur = newDoc->children->children;
9409 *list = cur;
9410 while (cur != NULL) {
9411 cur->parent = NULL;
9412 cur = cur->next;
9413 }
9414 newDoc->children->children = NULL;
9415 }
9416 ret = 0;
9417 }
9418 if (sax != NULL)
9419 ctxt->sax = oldsax;
9420 xmlFreeParserCtxt(ctxt);
9421 newDoc->intSubset = NULL;
9422 newDoc->extSubset = NULL;
9423 xmlFreeDoc(newDoc);
9424
9425 return(ret);
9426}
9427
9428/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009429 * xmlParseExternalEntity:
9430 * @doc: the document the chunk pertains to
9431 * @sax: the SAX handler bloc (possibly NULL)
9432 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9433 * @depth: Used for loop detection, use 0
9434 * @URL: the URL for the entity to load
9435 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009436 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009437 *
9438 * Parse an external general entity
9439 * An external general parsed entity is well-formed if it matches the
9440 * production labeled extParsedEnt.
9441 *
9442 * [78] extParsedEnt ::= TextDecl? content
9443 *
9444 * Returns 0 if the entity is well formed, -1 in case of args problem and
9445 * the parser error code otherwise
9446 */
9447
9448int
9449xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009450 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009451 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009452 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009453}
9454
9455/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009456 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009457 * @doc: the document the chunk pertains to
9458 * @sax: the SAX handler bloc (possibly NULL)
9459 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9460 * @depth: Used for loop detection, use 0
9461 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009462 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009463 *
9464 * Parse a well-balanced chunk of an XML document
9465 * called by the parser
9466 * The allowed sequence for the Well Balanced Chunk is the one defined by
9467 * the content production in the XML grammar:
9468 *
9469 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9470 *
9471 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9472 * the parser error code otherwise
9473 */
9474
9475int
9476xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009477 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009478 xmlParserCtxtPtr ctxt;
9479 xmlDocPtr newDoc;
9480 xmlSAXHandlerPtr oldsax = NULL;
9481 int size;
9482 int ret = 0;
9483
9484 if (depth > 40) {
9485 return(XML_ERR_ENTITY_LOOP);
9486 }
9487
9488
Daniel Veillardcda96922001-08-21 10:56:31 +00009489 if (lst != NULL)
9490 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009491 if (string == NULL)
9492 return(-1);
9493
9494 size = xmlStrlen(string);
9495
9496 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9497 if (ctxt == NULL) return(-1);
9498 ctxt->userData = ctxt;
9499 if (sax != NULL) {
9500 oldsax = ctxt->sax;
9501 ctxt->sax = sax;
9502 if (user_data != NULL)
9503 ctxt->userData = user_data;
9504 }
9505 newDoc = xmlNewDoc(BAD_CAST "1.0");
9506 if (newDoc == NULL) {
9507 xmlFreeParserCtxt(ctxt);
9508 return(-1);
9509 }
9510 if (doc != NULL) {
9511 newDoc->intSubset = doc->intSubset;
9512 newDoc->extSubset = doc->extSubset;
9513 }
9514 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9515 if (newDoc->children == NULL) {
9516 if (sax != NULL)
9517 ctxt->sax = oldsax;
9518 xmlFreeParserCtxt(ctxt);
9519 newDoc->intSubset = NULL;
9520 newDoc->extSubset = NULL;
9521 xmlFreeDoc(newDoc);
9522 return(-1);
9523 }
9524 nodePush(ctxt, newDoc->children);
9525 if (doc == NULL) {
9526 ctxt->myDoc = newDoc;
9527 } else {
9528 ctxt->myDoc = doc;
9529 newDoc->children->doc = doc;
9530 }
9531 ctxt->instate = XML_PARSER_CONTENT;
9532 ctxt->depth = depth;
9533
9534 /*
9535 * Doing validity checking on chunk doesn't make sense
9536 */
9537 ctxt->validate = 0;
9538 ctxt->loadsubset = 0;
9539
9540 xmlParseContent(ctxt);
9541
9542 if ((RAW == '<') && (NXT(1) == '/')) {
9543 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9545 ctxt->sax->error(ctxt->userData,
9546 "chunk is not well balanced\n");
9547 ctxt->wellFormed = 0;
9548 ctxt->disableSAX = 1;
9549 } else if (RAW != 0) {
9550 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9552 ctxt->sax->error(ctxt->userData,
9553 "extra content at the end of well balanced chunk\n");
9554 ctxt->wellFormed = 0;
9555 ctxt->disableSAX = 1;
9556 }
9557 if (ctxt->node != newDoc->children) {
9558 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9560 ctxt->sax->error(ctxt->userData,
9561 "chunk is not well balanced\n");
9562 ctxt->wellFormed = 0;
9563 ctxt->disableSAX = 1;
9564 }
9565
9566 if (!ctxt->wellFormed) {
9567 if (ctxt->errNo == 0)
9568 ret = 1;
9569 else
9570 ret = ctxt->errNo;
9571 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009572 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009573 xmlNodePtr cur;
9574
9575 /*
9576 * Return the newly created nodeset after unlinking it from
9577 * they pseudo parent.
9578 */
9579 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009580 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009581 while (cur != NULL) {
9582 cur->parent = NULL;
9583 cur = cur->next;
9584 }
9585 newDoc->children->children = NULL;
9586 }
9587 ret = 0;
9588 }
9589 if (sax != NULL)
9590 ctxt->sax = oldsax;
9591 xmlFreeParserCtxt(ctxt);
9592 newDoc->intSubset = NULL;
9593 newDoc->extSubset = NULL;
9594 xmlFreeDoc(newDoc);
9595
9596 return(ret);
9597}
9598
9599/**
9600 * xmlSAXParseEntity:
9601 * @sax: the SAX handler block
9602 * @filename: the filename
9603 *
9604 * parse an XML external entity out of context and build a tree.
9605 * It use the given SAX function block to handle the parsing callback.
9606 * If sax is NULL, fallback to the default DOM tree building routines.
9607 *
9608 * [78] extParsedEnt ::= TextDecl? content
9609 *
9610 * This correspond to a "Well Balanced" chunk
9611 *
9612 * Returns the resulting document tree
9613 */
9614
9615xmlDocPtr
9616xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9617 xmlDocPtr ret;
9618 xmlParserCtxtPtr ctxt;
9619 char *directory = NULL;
9620
9621 ctxt = xmlCreateFileParserCtxt(filename);
9622 if (ctxt == NULL) {
9623 return(NULL);
9624 }
9625 if (sax != NULL) {
9626 if (ctxt->sax != NULL)
9627 xmlFree(ctxt->sax);
9628 ctxt->sax = sax;
9629 ctxt->userData = NULL;
9630 }
9631
9632 if ((ctxt->directory == NULL) && (directory == NULL))
9633 directory = xmlParserGetDirectory(filename);
9634
9635 xmlParseExtParsedEnt(ctxt);
9636
9637 if (ctxt->wellFormed)
9638 ret = ctxt->myDoc;
9639 else {
9640 ret = NULL;
9641 xmlFreeDoc(ctxt->myDoc);
9642 ctxt->myDoc = NULL;
9643 }
9644 if (sax != NULL)
9645 ctxt->sax = NULL;
9646 xmlFreeParserCtxt(ctxt);
9647
9648 return(ret);
9649}
9650
9651/**
9652 * xmlParseEntity:
9653 * @filename: the filename
9654 *
9655 * parse an XML external entity out of context and build a tree.
9656 *
9657 * [78] extParsedEnt ::= TextDecl? content
9658 *
9659 * This correspond to a "Well Balanced" chunk
9660 *
9661 * Returns the resulting document tree
9662 */
9663
9664xmlDocPtr
9665xmlParseEntity(const char *filename) {
9666 return(xmlSAXParseEntity(NULL, filename));
9667}
9668
9669/**
9670 * xmlCreateEntityParserCtxt:
9671 * @URL: the entity URL
9672 * @ID: the entity PUBLIC ID
9673 * @base: a posible base for the target URI
9674 *
9675 * Create a parser context for an external entity
9676 * Automatic support for ZLIB/Compress compressed document is provided
9677 * by default if found at compile-time.
9678 *
9679 * Returns the new parser context or NULL
9680 */
9681xmlParserCtxtPtr
9682xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9683 const xmlChar *base) {
9684 xmlParserCtxtPtr ctxt;
9685 xmlParserInputPtr inputStream;
9686 char *directory = NULL;
9687 xmlChar *uri;
9688
9689 ctxt = xmlNewParserCtxt();
9690 if (ctxt == NULL) {
9691 return(NULL);
9692 }
9693
9694 uri = xmlBuildURI(URL, base);
9695
9696 if (uri == NULL) {
9697 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9698 if (inputStream == NULL) {
9699 xmlFreeParserCtxt(ctxt);
9700 return(NULL);
9701 }
9702
9703 inputPush(ctxt, inputStream);
9704
9705 if ((ctxt->directory == NULL) && (directory == NULL))
9706 directory = xmlParserGetDirectory((char *)URL);
9707 if ((ctxt->directory == NULL) && (directory != NULL))
9708 ctxt->directory = directory;
9709 } else {
9710 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9711 if (inputStream == NULL) {
9712 xmlFree(uri);
9713 xmlFreeParserCtxt(ctxt);
9714 return(NULL);
9715 }
9716
9717 inputPush(ctxt, inputStream);
9718
9719 if ((ctxt->directory == NULL) && (directory == NULL))
9720 directory = xmlParserGetDirectory((char *)uri);
9721 if ((ctxt->directory == NULL) && (directory != NULL))
9722 ctxt->directory = directory;
9723 xmlFree(uri);
9724 }
9725
9726 return(ctxt);
9727}
9728
9729/************************************************************************
9730 * *
9731 * Front ends when parsing from a file *
9732 * *
9733 ************************************************************************/
9734
9735/**
9736 * xmlCreateFileParserCtxt:
9737 * @filename: the filename
9738 *
9739 * Create a parser context for a file content.
9740 * Automatic support for ZLIB/Compress compressed document is provided
9741 * by default if found at compile-time.
9742 *
9743 * Returns the new parser context or NULL
9744 */
9745xmlParserCtxtPtr
9746xmlCreateFileParserCtxt(const char *filename)
9747{
9748 xmlParserCtxtPtr ctxt;
9749 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009750 char *directory = NULL;
9751
Owen Taylor3473f882001-02-23 17:55:21 +00009752 ctxt = xmlNewParserCtxt();
9753 if (ctxt == NULL) {
9754 if (xmlDefaultSAXHandler.error != NULL) {
9755 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9756 }
9757 return(NULL);
9758 }
9759
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009760 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009761 if (inputStream == NULL) {
9762 xmlFreeParserCtxt(ctxt);
9763 return(NULL);
9764 }
9765
Owen Taylor3473f882001-02-23 17:55:21 +00009766 inputPush(ctxt, inputStream);
9767 if ((ctxt->directory == NULL) && (directory == NULL))
9768 directory = xmlParserGetDirectory(filename);
9769 if ((ctxt->directory == NULL) && (directory != NULL))
9770 ctxt->directory = directory;
9771
9772 return(ctxt);
9773}
9774
9775/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009776 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009777 * @sax: the SAX handler block
9778 * @filename: the filename
9779 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9780 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009781 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009782 *
9783 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9784 * compressed document is provided by default if found at compile-time.
9785 * It use the given SAX function block to handle the parsing callback.
9786 * If sax is NULL, fallback to the default DOM tree building routines.
9787 *
Daniel Veillarda293c322001-10-02 13:54:14 +00009788 * User data (void *) is stored within the parser context, so it is
9789 * available nearly everywhere in libxml.
9790 *
Owen Taylor3473f882001-02-23 17:55:21 +00009791 * Returns the resulting document tree
9792 */
9793
9794xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009795xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9796 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009797 xmlDocPtr ret;
9798 xmlParserCtxtPtr ctxt;
9799 char *directory = NULL;
9800
Daniel Veillard635ef722001-10-29 11:48:19 +00009801 xmlInitParser();
9802
Owen Taylor3473f882001-02-23 17:55:21 +00009803 ctxt = xmlCreateFileParserCtxt(filename);
9804 if (ctxt == NULL) {
9805 return(NULL);
9806 }
9807 if (sax != NULL) {
9808 if (ctxt->sax != NULL)
9809 xmlFree(ctxt->sax);
9810 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009811 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009812 if (data!=NULL) {
9813 ctxt->_private=data;
9814 }
Owen Taylor3473f882001-02-23 17:55:21 +00009815
9816 if ((ctxt->directory == NULL) && (directory == NULL))
9817 directory = xmlParserGetDirectory(filename);
9818 if ((ctxt->directory == NULL) && (directory != NULL))
9819 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9820
9821 xmlParseDocument(ctxt);
9822
9823 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9824 else {
9825 ret = NULL;
9826 xmlFreeDoc(ctxt->myDoc);
9827 ctxt->myDoc = NULL;
9828 }
9829 if (sax != NULL)
9830 ctxt->sax = NULL;
9831 xmlFreeParserCtxt(ctxt);
9832
9833 return(ret);
9834}
9835
9836/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009837 * xmlSAXParseFile:
9838 * @sax: the SAX handler block
9839 * @filename: the filename
9840 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9841 * documents
9842 *
9843 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9844 * compressed document is provided by default if found at compile-time.
9845 * It use the given SAX function block to handle the parsing callback.
9846 * If sax is NULL, fallback to the default DOM tree building routines.
9847 *
9848 * Returns the resulting document tree
9849 */
9850
9851xmlDocPtr
9852xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9853 int recovery) {
9854 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
9855}
9856
9857/**
Owen Taylor3473f882001-02-23 17:55:21 +00009858 * xmlRecoverDoc:
9859 * @cur: a pointer to an array of xmlChar
9860 *
9861 * parse an XML in-memory document and build a tree.
9862 * In the case the document is not Well Formed, a tree is built anyway
9863 *
9864 * Returns the resulting document tree
9865 */
9866
9867xmlDocPtr
9868xmlRecoverDoc(xmlChar *cur) {
9869 return(xmlSAXParseDoc(NULL, cur, 1));
9870}
9871
9872/**
9873 * xmlParseFile:
9874 * @filename: the filename
9875 *
9876 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9877 * compressed document is provided by default if found at compile-time.
9878 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +00009879 * Returns the resulting document tree if the file was wellformed,
9880 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +00009881 */
9882
9883xmlDocPtr
9884xmlParseFile(const char *filename) {
9885 return(xmlSAXParseFile(NULL, filename, 0));
9886}
9887
9888/**
9889 * xmlRecoverFile:
9890 * @filename: the filename
9891 *
9892 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9893 * compressed document is provided by default if found at compile-time.
9894 * In the case the document is not Well Formed, a tree is built anyway
9895 *
9896 * Returns the resulting document tree
9897 */
9898
9899xmlDocPtr
9900xmlRecoverFile(const char *filename) {
9901 return(xmlSAXParseFile(NULL, filename, 1));
9902}
9903
9904
9905/**
9906 * xmlSetupParserForBuffer:
9907 * @ctxt: an XML parser context
9908 * @buffer: a xmlChar * buffer
9909 * @filename: a file name
9910 *
9911 * Setup the parser context to parse a new buffer; Clears any prior
9912 * contents from the parser context. The buffer parameter must not be
9913 * NULL, but the filename parameter can be
9914 */
9915void
9916xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9917 const char* filename)
9918{
9919 xmlParserInputPtr input;
9920
9921 input = xmlNewInputStream(ctxt);
9922 if (input == NULL) {
9923 perror("malloc");
9924 xmlFree(ctxt);
9925 return;
9926 }
9927
9928 xmlClearParserCtxt(ctxt);
9929 if (filename != NULL)
9930 input->filename = xmlMemStrdup(filename);
9931 input->base = buffer;
9932 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009933 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009934 inputPush(ctxt, input);
9935}
9936
9937/**
9938 * xmlSAXUserParseFile:
9939 * @sax: a SAX handler
9940 * @user_data: The user data returned on SAX callbacks
9941 * @filename: a file name
9942 *
9943 * parse an XML file and call the given SAX handler routines.
9944 * Automatic support for ZLIB/Compress compressed document is provided
9945 *
9946 * Returns 0 in case of success or a error number otherwise
9947 */
9948int
9949xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9950 const char *filename) {
9951 int ret = 0;
9952 xmlParserCtxtPtr ctxt;
9953
9954 ctxt = xmlCreateFileParserCtxt(filename);
9955 if (ctxt == NULL) return -1;
9956 if (ctxt->sax != &xmlDefaultSAXHandler)
9957 xmlFree(ctxt->sax);
9958 ctxt->sax = sax;
9959 if (user_data != NULL)
9960 ctxt->userData = user_data;
9961
9962 xmlParseDocument(ctxt);
9963
9964 if (ctxt->wellFormed)
9965 ret = 0;
9966 else {
9967 if (ctxt->errNo != 0)
9968 ret = ctxt->errNo;
9969 else
9970 ret = -1;
9971 }
9972 if (sax != NULL)
9973 ctxt->sax = NULL;
9974 xmlFreeParserCtxt(ctxt);
9975
9976 return ret;
9977}
9978
9979/************************************************************************
9980 * *
9981 * Front ends when parsing from memory *
9982 * *
9983 ************************************************************************/
9984
9985/**
9986 * xmlCreateMemoryParserCtxt:
9987 * @buffer: a pointer to a char array
9988 * @size: the size of the array
9989 *
9990 * Create a parser context for an XML in-memory document.
9991 *
9992 * Returns the new parser context or NULL
9993 */
9994xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009995xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009996 xmlParserCtxtPtr ctxt;
9997 xmlParserInputPtr input;
9998 xmlParserInputBufferPtr buf;
9999
10000 if (buffer == NULL)
10001 return(NULL);
10002 if (size <= 0)
10003 return(NULL);
10004
10005 ctxt = xmlNewParserCtxt();
10006 if (ctxt == NULL)
10007 return(NULL);
10008
10009 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10010 if (buf == NULL) return(NULL);
10011
10012 input = xmlNewInputStream(ctxt);
10013 if (input == NULL) {
10014 xmlFreeParserCtxt(ctxt);
10015 return(NULL);
10016 }
10017
10018 input->filename = NULL;
10019 input->buf = buf;
10020 input->base = input->buf->buffer->content;
10021 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010022 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010023
10024 inputPush(ctxt, input);
10025 return(ctxt);
10026}
10027
10028/**
10029 * xmlSAXParseMemory:
10030 * @sax: the SAX handler block
10031 * @buffer: an pointer to a char array
10032 * @size: the size of the array
10033 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10034 * documents
10035 *
10036 * parse an XML in-memory block and use the given SAX function block
10037 * to handle the parsing callback. If sax is NULL, fallback to the default
10038 * DOM tree building routines.
10039 *
10040 * Returns the resulting document tree
10041 */
10042xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010043xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10044 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010045 xmlDocPtr ret;
10046 xmlParserCtxtPtr ctxt;
10047
10048 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10049 if (ctxt == NULL) return(NULL);
10050 if (sax != NULL) {
10051 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010052 }
10053
10054 xmlParseDocument(ctxt);
10055
10056 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10057 else {
10058 ret = NULL;
10059 xmlFreeDoc(ctxt->myDoc);
10060 ctxt->myDoc = NULL;
10061 }
10062 if (sax != NULL)
10063 ctxt->sax = NULL;
10064 xmlFreeParserCtxt(ctxt);
10065
10066 return(ret);
10067}
10068
10069/**
10070 * xmlParseMemory:
10071 * @buffer: an pointer to a char array
10072 * @size: the size of the array
10073 *
10074 * parse an XML in-memory block and build a tree.
10075 *
10076 * Returns the resulting document tree
10077 */
10078
Daniel Veillard50822cb2001-07-26 20:05:51 +000010079xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010080 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10081}
10082
10083/**
10084 * xmlRecoverMemory:
10085 * @buffer: an pointer to a char array
10086 * @size: the size of the array
10087 *
10088 * parse an XML in-memory block and build a tree.
10089 * In the case the document is not Well Formed, a tree is built anyway
10090 *
10091 * Returns the resulting document tree
10092 */
10093
Daniel Veillard50822cb2001-07-26 20:05:51 +000010094xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010095 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10096}
10097
10098/**
10099 * xmlSAXUserParseMemory:
10100 * @sax: a SAX handler
10101 * @user_data: The user data returned on SAX callbacks
10102 * @buffer: an in-memory XML document input
10103 * @size: the length of the XML document in bytes
10104 *
10105 * A better SAX parsing routine.
10106 * parse an XML in-memory buffer and call the given SAX handler routines.
10107 *
10108 * Returns 0 in case of success or a error number otherwise
10109 */
10110int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010111 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010112 int ret = 0;
10113 xmlParserCtxtPtr ctxt;
10114 xmlSAXHandlerPtr oldsax = NULL;
10115
10116 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10117 if (ctxt == NULL) return -1;
10118 if (sax != NULL) {
10119 oldsax = ctxt->sax;
10120 ctxt->sax = sax;
10121 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010122 if (user_data != NULL)
10123 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010124
10125 xmlParseDocument(ctxt);
10126
10127 if (ctxt->wellFormed)
10128 ret = 0;
10129 else {
10130 if (ctxt->errNo != 0)
10131 ret = ctxt->errNo;
10132 else
10133 ret = -1;
10134 }
10135 if (sax != NULL) {
10136 ctxt->sax = oldsax;
10137 }
10138 xmlFreeParserCtxt(ctxt);
10139
10140 return ret;
10141}
10142
10143/**
10144 * xmlCreateDocParserCtxt:
10145 * @cur: a pointer to an array of xmlChar
10146 *
10147 * Creates a parser context for an XML in-memory document.
10148 *
10149 * Returns the new parser context or NULL
10150 */
10151xmlParserCtxtPtr
10152xmlCreateDocParserCtxt(xmlChar *cur) {
10153 int len;
10154
10155 if (cur == NULL)
10156 return(NULL);
10157 len = xmlStrlen(cur);
10158 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10159}
10160
10161/**
10162 * xmlSAXParseDoc:
10163 * @sax: the SAX handler block
10164 * @cur: a pointer to an array of xmlChar
10165 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10166 * documents
10167 *
10168 * parse an XML in-memory document and build a tree.
10169 * It use the given SAX function block to handle the parsing callback.
10170 * If sax is NULL, fallback to the default DOM tree building routines.
10171 *
10172 * Returns the resulting document tree
10173 */
10174
10175xmlDocPtr
10176xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10177 xmlDocPtr ret;
10178 xmlParserCtxtPtr ctxt;
10179
10180 if (cur == NULL) return(NULL);
10181
10182
10183 ctxt = xmlCreateDocParserCtxt(cur);
10184 if (ctxt == NULL) return(NULL);
10185 if (sax != NULL) {
10186 ctxt->sax = sax;
10187 ctxt->userData = NULL;
10188 }
10189
10190 xmlParseDocument(ctxt);
10191 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10192 else {
10193 ret = NULL;
10194 xmlFreeDoc(ctxt->myDoc);
10195 ctxt->myDoc = NULL;
10196 }
10197 if (sax != NULL)
10198 ctxt->sax = NULL;
10199 xmlFreeParserCtxt(ctxt);
10200
10201 return(ret);
10202}
10203
10204/**
10205 * xmlParseDoc:
10206 * @cur: a pointer to an array of xmlChar
10207 *
10208 * parse an XML in-memory document and build a tree.
10209 *
10210 * Returns the resulting document tree
10211 */
10212
10213xmlDocPtr
10214xmlParseDoc(xmlChar *cur) {
10215 return(xmlSAXParseDoc(NULL, cur, 0));
10216}
10217
10218
10219/************************************************************************
10220 * *
10221 * Miscellaneous *
10222 * *
10223 ************************************************************************/
10224
10225#ifdef LIBXML_XPATH_ENABLED
10226#include <libxml/xpath.h>
10227#endif
10228
10229static int xmlParserInitialized = 0;
10230
10231/**
10232 * xmlInitParser:
10233 *
10234 * Initialization function for the XML parser.
10235 * This is not reentrant. Call once before processing in case of
10236 * use in multithreaded programs.
10237 */
10238
10239void
10240xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010241 if (xmlParserInitialized != 0)
10242 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010243
Daniel Veillard6f350292001-10-14 09:56:15 +000010244 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010245 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010246 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010247 xmlInitCharEncodingHandlers();
10248 xmlInitializePredefinedEntities();
10249 xmlDefaultSAXHandlerInit();
10250 xmlRegisterDefaultInputCallbacks();
10251 xmlRegisterDefaultOutputCallbacks();
10252#ifdef LIBXML_HTML_ENABLED
10253 htmlInitAutoClose();
10254 htmlDefaultSAXHandlerInit();
10255#endif
10256#ifdef LIBXML_XPATH_ENABLED
10257 xmlXPathInit();
10258#endif
10259 xmlParserInitialized = 1;
10260}
10261
10262/**
10263 * xmlCleanupParser:
10264 *
10265 * Cleanup function for the XML parser. It tries to reclaim all
10266 * parsing related global memory allocated for the parser processing.
10267 * It doesn't deallocate any document related memory. Calling this
10268 * function should not prevent reusing the parser.
10269 */
10270
10271void
10272xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010273 xmlCleanupCharEncodingHandlers();
10274 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010275#ifdef LIBXML_CATALOG_ENABLED
10276 xmlCatalogCleanup();
10277#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010278 xmlCleanupThreads();
10279 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010280}