blob: 6510a49c8bb80530f92c73c61beafc3bd45e1bf4 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillard5d96fff2001-08-31 14:55:30 +000025 * from the SAX callbacks or as standalones functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Owen Taylor3473f882001-02-23 17:55:21 +000035#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
44#include <libxml/tree.h>
45#include <libxml/parser.h>
46#include <libxml/parserInternals.h>
47#include <libxml/valid.h>
48#include <libxml/entities.h>
49#include <libxml/xmlerror.h>
50#include <libxml/encoding.h>
51#include <libxml/xmlIO.h>
52#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000053#ifdef LIBXML_CATALOG_ENABLED
54#include <libxml/catalog.h>
55#endif
Owen Taylor3473f882001-02-23 17:55:21 +000056
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
Owen Taylor3473f882001-02-23 17:55:21 +000083int xmlParserDebugEntities = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000084
85/*
86 * List of XML prefixed PI allowed by W3C specs
87 */
88
89const char *xmlW3CPIs[] = {
90 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
95void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
96xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
97 const xmlChar **str);
98
Daniel Veillard257d9102001-05-08 10:41:44 +000099static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000100xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
101 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000102 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000103 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000104
105/************************************************************************
106 * *
107 * Parser stacks related functions and macros *
108 * *
109 ************************************************************************/
110
111xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
112 const xmlChar ** str);
113
114/*
115 * Generic function for accessing stacks in the Parser Context
116 */
117
118#define PUSH_AND_POP(scope, type, name) \
119scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
120 if (ctxt->name##Nr >= ctxt->name##Max) { \
121 ctxt->name##Max *= 2; \
122 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
123 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
124 if (ctxt->name##Tab == NULL) { \
125 xmlGenericError(xmlGenericErrorContext, \
126 "realloc failed !\n"); \
127 return(0); \
128 } \
129 } \
130 ctxt->name##Tab[ctxt->name##Nr] = value; \
131 ctxt->name = value; \
132 return(ctxt->name##Nr++); \
133} \
134scope type name##Pop(xmlParserCtxtPtr ctxt) { \
135 type ret; \
136 if (ctxt->name##Nr <= 0) return(0); \
137 ctxt->name##Nr--; \
138 if (ctxt->name##Nr > 0) \
139 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
140 else \
141 ctxt->name = NULL; \
142 ret = ctxt->name##Tab[ctxt->name##Nr]; \
143 ctxt->name##Tab[ctxt->name##Nr] = 0; \
144 return(ret); \
145} \
146
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
148 * inputPop:
149 * @ctxt: an XML parser context
150 *
151 * Pops the top parser input from the input stack
152 *
153 * Returns the input just removed
154 */
155/**
156 * inputPush:
157 * @ctxt: an XML parser context
158 * @input: the parser input
159 *
160 * Pushes a new parser input on top of the input stack
161 */
162/**
163 * namePop:
164 * @ctxt: an XML parser context
165 *
166 * Pops the top element name from the name stack
167 *
168 * Returns the name just removed
169 */
170/**
171 * namePush:
172 * @ctxt: an XML parser context
173 * @name: the element name
174 *
175 * Pushes a new element name on top of the name stack
176 */
177/**
178 * nodePop:
179 * @ctxt: an XML parser context
180 *
181 * Pops the top element node from the node stack
182 *
183 * Returns the node just removed
184 */
185/**
186 * nodePush:
187 * @ctxt: an XML parser context
188 * @node: the element node
189 *
190 * Pushes a new element node on top of the node stack
191 */
Owen Taylor3473f882001-02-23 17:55:21 +0000192/*
193 * Those macros actually generate the functions
194 */
195PUSH_AND_POP(extern, xmlParserInputPtr, input)
196PUSH_AND_POP(extern, xmlNodePtr, node)
197PUSH_AND_POP(extern, xmlChar*, name)
198
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000199static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000200 if (ctxt->spaceNr >= ctxt->spaceMax) {
201 ctxt->spaceMax *= 2;
202 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
203 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
204 if (ctxt->spaceTab == NULL) {
205 xmlGenericError(xmlGenericErrorContext,
206 "realloc failed !\n");
207 return(0);
208 }
209 }
210 ctxt->spaceTab[ctxt->spaceNr] = val;
211 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
212 return(ctxt->spaceNr++);
213}
214
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000215static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000216 int ret;
217 if (ctxt->spaceNr <= 0) return(0);
218 ctxt->spaceNr--;
219 if (ctxt->spaceNr > 0)
220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
221 else
222 ctxt->space = NULL;
223 ret = ctxt->spaceTab[ctxt->spaceNr];
224 ctxt->spaceTab[ctxt->spaceNr] = -1;
225 return(ret);
226}
227
228/*
229 * Macros for accessing the content. Those should be used only by the parser,
230 * and not exported.
231 *
232 * Dirty macros, i.e. one often need to make assumption on the context to
233 * use them
234 *
235 * CUR_PTR return the current pointer to the xmlChar to be parsed.
236 * To be used with extreme caution since operations consuming
237 * characters may move the input buffer to a different location !
238 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
239 * This should be used internally by the parser
240 * only to compare to ASCII values otherwise it would break when
241 * running with UTF-8 encoding.
242 * RAW same as CUR but in the input buffer, bypass any token
243 * extraction that may have been done
244 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
245 * to compare on ASCII based substring.
246 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
247 * strings within the parser.
248 *
249 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
250 *
251 * NEXT Skip to the next character, this does the proper decoding
252 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
253 * NEXTL(l) Skip l xmlChars in the input buffer
254 * CUR_CHAR(l) returns the current unicode character (int), set l
255 * to the number of xmlChars used for the encoding [0-5].
256 * CUR_SCHAR same but operate on a string instead of the context
257 * COPY_BUF copy the current unicode char to the target buffer, increment
258 * the index
259 * GROW, SHRINK handling of input buffers
260 */
261
262#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
263#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
264#define NXT(val) ctxt->input->cur[(val)]
265#define CUR_PTR ctxt->input->cur
266
267#define SKIP(val) do { \
268 ctxt->nbChars += (val),ctxt->input->cur += (val); \
269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000270 if ((*ctxt->input->cur == 0) && \
271 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
272 xmlPopInput(ctxt); \
273 } while (0)
274
Daniel Veillard48b2f892001-02-25 16:11:03 +0000275#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000276 xmlParserInputShrink(ctxt->input); \
277 if ((*ctxt->input->cur == 0) && \
278 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
279 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000280 }
Owen Taylor3473f882001-02-23 17:55:21 +0000281
Daniel Veillard48b2f892001-02-25 16:11:03 +0000282#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000283 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
284 if ((*ctxt->input->cur == 0) && \
285 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
286 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000287 }
Owen Taylor3473f882001-02-23 17:55:21 +0000288
289#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
290
291#define NEXT xmlNextChar(ctxt)
292
Daniel Veillard21a0f912001-02-25 19:54:14 +0000293#define NEXT1 { \
294 ctxt->input->cur++; \
295 ctxt->nbChars++; \
296 if (*ctxt->input->cur == 0) \
297 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
298 }
299
Owen Taylor3473f882001-02-23 17:55:21 +0000300#define NEXTL(l) do { \
301 if (*(ctxt->input->cur) == '\n') { \
302 ctxt->input->line++; ctxt->input->col = 1; \
303 } else ctxt->input->col++; \
304 ctxt->token = 0; ctxt->input->cur += l; \
305 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000306 } while (0)
307
308#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
309#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
310
311#define COPY_BUF(l,b,i,v) \
312 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000313 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000314
315/**
316 * xmlSkipBlankChars:
317 * @ctxt: the XML parser context
318 *
319 * skip all blanks character found at that point in the input streams.
320 * It pops up finished entities in the process if allowable at that point.
321 *
322 * Returns the number of space chars skipped
323 */
324
325int
326xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000327 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000328
Daniel Veillard02141ea2001-04-30 11:46:40 +0000329 if (ctxt->token != 0) {
330 if (!IS_BLANK(ctxt->token))
331 return(0);
332 ctxt->token = 0;
333 res++;
334 }
Owen Taylor3473f882001-02-23 17:55:21 +0000335 /*
336 * It's Okay to use CUR/NEXT here since all the blanks are on
337 * the ASCII range.
338 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000339 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
340 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000341 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000342 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000343 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000344 cur = ctxt->input->cur;
345 while (IS_BLANK(*cur)) {
346 if (*cur == '\n') {
347 ctxt->input->line++; ctxt->input->col = 1;
348 }
349 cur++;
350 res++;
351 if (*cur == 0) {
352 ctxt->input->cur = cur;
353 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
354 cur = ctxt->input->cur;
355 }
356 }
357 ctxt->input->cur = cur;
358 } else {
359 int cur;
360 do {
361 cur = CUR;
362 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
363 NEXT;
364 cur = CUR;
365 res++;
366 }
367 while ((cur == 0) && (ctxt->inputNr > 1) &&
368 (ctxt->instate != XML_PARSER_COMMENT)) {
369 xmlPopInput(ctxt);
370 cur = CUR;
371 }
372 /*
373 * Need to handle support of entities branching here
374 */
375 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
376 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
377 }
Owen Taylor3473f882001-02-23 17:55:21 +0000378 return(res);
379}
380
381/************************************************************************
382 * *
383 * Commodity functions to handle entities *
384 * *
385 ************************************************************************/
386
387/**
388 * xmlPopInput:
389 * @ctxt: an XML parser context
390 *
391 * xmlPopInput: the current input pointed by ctxt->input came to an end
392 * pop it and return the next char.
393 *
394 * Returns the current xmlChar in the parser context
395 */
396xmlChar
397xmlPopInput(xmlParserCtxtPtr ctxt) {
398 if (ctxt->inputNr == 1) return(0); /* End of main Input */
399 if (xmlParserDebugEntities)
400 xmlGenericError(xmlGenericErrorContext,
401 "Popping input %d\n", ctxt->inputNr);
402 xmlFreeInputStream(inputPop(ctxt));
403 if ((*ctxt->input->cur == 0) &&
404 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
405 return(xmlPopInput(ctxt));
406 return(CUR);
407}
408
409/**
410 * xmlPushInput:
411 * @ctxt: an XML parser context
412 * @input: an XML parser input fragment (entity, XML fragment ...).
413 *
414 * xmlPushInput: switch to a new input stream which is stacked on top
415 * of the previous one(s).
416 */
417void
418xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
419 if (input == NULL) return;
420
421 if (xmlParserDebugEntities) {
422 if ((ctxt->input != NULL) && (ctxt->input->filename))
423 xmlGenericError(xmlGenericErrorContext,
424 "%s(%d): ", ctxt->input->filename,
425 ctxt->input->line);
426 xmlGenericError(xmlGenericErrorContext,
427 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
428 }
429 inputPush(ctxt, input);
430 GROW;
431}
432
433/**
434 * xmlParseCharRef:
435 * @ctxt: an XML parser context
436 *
437 * parse Reference declarations
438 *
439 * [66] CharRef ::= '&#' [0-9]+ ';' |
440 * '&#x' [0-9a-fA-F]+ ';'
441 *
442 * [ WFC: Legal Character ]
443 * Characters referred to using character references must match the
444 * production for Char.
445 *
446 * Returns the value parsed (as an int), 0 in case of error
447 */
448int
449xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000450 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000451 int count = 0;
452
453 if (ctxt->token != 0) {
454 val = ctxt->token;
455 ctxt->token = 0;
456 return(val);
457 }
458 /*
459 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
460 */
461 if ((RAW == '&') && (NXT(1) == '#') &&
462 (NXT(2) == 'x')) {
463 SKIP(3);
464 GROW;
465 while (RAW != ';') { /* loop blocked by count */
466 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
467 val = val * 16 + (CUR - '0');
468 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
469 val = val * 16 + (CUR - 'a') + 10;
470 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
471 val = val * 16 + (CUR - 'A') + 10;
472 else {
473 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
475 ctxt->sax->error(ctxt->userData,
476 "xmlParseCharRef: invalid hexadecimal value\n");
477 ctxt->wellFormed = 0;
478 ctxt->disableSAX = 1;
479 val = 0;
480 break;
481 }
482 NEXT;
483 count++;
484 }
485 if (RAW == ';') {
486 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
487 ctxt->nbChars ++;
488 ctxt->input->cur++;
489 }
490 } else if ((RAW == '&') && (NXT(1) == '#')) {
491 SKIP(2);
492 GROW;
493 while (RAW != ';') { /* loop blocked by count */
494 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
495 val = val * 10 + (CUR - '0');
496 else {
497 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
499 ctxt->sax->error(ctxt->userData,
500 "xmlParseCharRef: invalid decimal value\n");
501 ctxt->wellFormed = 0;
502 ctxt->disableSAX = 1;
503 val = 0;
504 break;
505 }
506 NEXT;
507 count++;
508 }
509 if (RAW == ';') {
510 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
511 ctxt->nbChars ++;
512 ctxt->input->cur++;
513 }
514 } else {
515 ctxt->errNo = XML_ERR_INVALID_CHARREF;
516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
517 ctxt->sax->error(ctxt->userData,
518 "xmlParseCharRef: invalid value\n");
519 ctxt->wellFormed = 0;
520 ctxt->disableSAX = 1;
521 }
522
523 /*
524 * [ WFC: Legal Character ]
525 * Characters referred to using character references must match the
526 * production for Char.
527 */
528 if (IS_CHAR(val)) {
529 return(val);
530 } else {
531 ctxt->errNo = XML_ERR_INVALID_CHAR;
532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
533 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
534 val);
535 ctxt->wellFormed = 0;
536 ctxt->disableSAX = 1;
537 }
538 return(0);
539}
540
541/**
542 * xmlParseStringCharRef:
543 * @ctxt: an XML parser context
544 * @str: a pointer to an index in the string
545 *
546 * parse Reference declarations, variant parsing from a string rather
547 * than an an input flow.
548 *
549 * [66] CharRef ::= '&#' [0-9]+ ';' |
550 * '&#x' [0-9a-fA-F]+ ';'
551 *
552 * [ WFC: Legal Character ]
553 * Characters referred to using character references must match the
554 * production for Char.
555 *
556 * Returns the value parsed (as an int), 0 in case of error, str will be
557 * updated to the current value of the index
558 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000559static int
Owen Taylor3473f882001-02-23 17:55:21 +0000560xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
561 const xmlChar *ptr;
562 xmlChar cur;
563 int val = 0;
564
565 if ((str == NULL) || (*str == NULL)) return(0);
566 ptr = *str;
567 cur = *ptr;
568 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
569 ptr += 3;
570 cur = *ptr;
571 while (cur != ';') { /* Non input consuming loop */
572 if ((cur >= '0') && (cur <= '9'))
573 val = val * 16 + (cur - '0');
574 else if ((cur >= 'a') && (cur <= 'f'))
575 val = val * 16 + (cur - 'a') + 10;
576 else if ((cur >= 'A') && (cur <= 'F'))
577 val = val * 16 + (cur - 'A') + 10;
578 else {
579 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
581 ctxt->sax->error(ctxt->userData,
582 "xmlParseStringCharRef: invalid hexadecimal value\n");
583 ctxt->wellFormed = 0;
584 ctxt->disableSAX = 1;
585 val = 0;
586 break;
587 }
588 ptr++;
589 cur = *ptr;
590 }
591 if (cur == ';')
592 ptr++;
593 } else if ((cur == '&') && (ptr[1] == '#')){
594 ptr += 2;
595 cur = *ptr;
596 while (cur != ';') { /* Non input consuming loops */
597 if ((cur >= '0') && (cur <= '9'))
598 val = val * 10 + (cur - '0');
599 else {
600 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
602 ctxt->sax->error(ctxt->userData,
603 "xmlParseStringCharRef: invalid decimal value\n");
604 ctxt->wellFormed = 0;
605 ctxt->disableSAX = 1;
606 val = 0;
607 break;
608 }
609 ptr++;
610 cur = *ptr;
611 }
612 if (cur == ';')
613 ptr++;
614 } else {
615 ctxt->errNo = XML_ERR_INVALID_CHARREF;
616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
617 ctxt->sax->error(ctxt->userData,
618 "xmlParseCharRef: invalid value\n");
619 ctxt->wellFormed = 0;
620 ctxt->disableSAX = 1;
621 return(0);
622 }
623 *str = ptr;
624
625 /*
626 * [ WFC: Legal Character ]
627 * Characters referred to using character references must match the
628 * production for Char.
629 */
630 if (IS_CHAR(val)) {
631 return(val);
632 } else {
633 ctxt->errNo = XML_ERR_INVALID_CHAR;
634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
635 ctxt->sax->error(ctxt->userData,
636 "CharRef: invalid xmlChar value %d\n", val);
637 ctxt->wellFormed = 0;
638 ctxt->disableSAX = 1;
639 }
640 return(0);
641}
642
643/**
644 * xmlParserHandlePEReference:
645 * @ctxt: the parser context
646 *
647 * [69] PEReference ::= '%' Name ';'
648 *
649 * [ WFC: No Recursion ]
650 * A parsed entity must not contain a recursive
651 * reference to itself, either directly or indirectly.
652 *
653 * [ WFC: Entity Declared ]
654 * In a document without any DTD, a document with only an internal DTD
655 * subset which contains no parameter entity references, or a document
656 * with "standalone='yes'", ... ... The declaration of a parameter
657 * entity must precede any reference to it...
658 *
659 * [ VC: Entity Declared ]
660 * In a document with an external subset or external parameter entities
661 * with "standalone='no'", ... ... The declaration of a parameter entity
662 * must precede any reference to it...
663 *
664 * [ WFC: In DTD ]
665 * Parameter-entity references may only appear in the DTD.
666 * NOTE: misleading but this is handled.
667 *
668 * A PEReference may have been detected in the current input stream
669 * the handling is done accordingly to
670 * http://www.w3.org/TR/REC-xml#entproc
671 * i.e.
672 * - Included in literal in entity values
673 * - Included as Paraemeter Entity reference within DTDs
674 */
675void
676xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
677 xmlChar *name;
678 xmlEntityPtr entity = NULL;
679 xmlParserInputPtr input;
680
681 if (ctxt->token != 0) {
682 return;
683 }
684 if (RAW != '%') return;
685 switch(ctxt->instate) {
686 case XML_PARSER_CDATA_SECTION:
687 return;
688 case XML_PARSER_COMMENT:
689 return;
690 case XML_PARSER_START_TAG:
691 return;
692 case XML_PARSER_END_TAG:
693 return;
694 case XML_PARSER_EOF:
695 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
697 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
698 ctxt->wellFormed = 0;
699 ctxt->disableSAX = 1;
700 return;
701 case XML_PARSER_PROLOG:
702 case XML_PARSER_START:
703 case XML_PARSER_MISC:
704 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
706 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
707 ctxt->wellFormed = 0;
708 ctxt->disableSAX = 1;
709 return;
710 case XML_PARSER_ENTITY_DECL:
711 case XML_PARSER_CONTENT:
712 case XML_PARSER_ATTRIBUTE_VALUE:
713 case XML_PARSER_PI:
714 case XML_PARSER_SYSTEM_LITERAL:
715 /* we just ignore it there */
716 return;
717 case XML_PARSER_EPILOG:
718 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
720 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
721 ctxt->wellFormed = 0;
722 ctxt->disableSAX = 1;
723 return;
724 case XML_PARSER_ENTITY_VALUE:
725 /*
726 * NOTE: in the case of entity values, we don't do the
727 * substitution here since we need the literal
728 * entity value to be able to save the internal
729 * subset of the document.
730 * This will be handled by xmlStringDecodeEntities
731 */
732 return;
733 case XML_PARSER_DTD:
734 /*
735 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
736 * In the internal DTD subset, parameter-entity references
737 * can occur only where markup declarations can occur, not
738 * within markup declarations.
739 * In that case this is handled in xmlParseMarkupDecl
740 */
741 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
742 return;
743 break;
744 case XML_PARSER_IGNORE:
745 return;
746 }
747
748 NEXT;
749 name = xmlParseName(ctxt);
750 if (xmlParserDebugEntities)
751 xmlGenericError(xmlGenericErrorContext,
752 "PE Reference: %s\n", name);
753 if (name == NULL) {
754 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
756 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
757 ctxt->wellFormed = 0;
758 ctxt->disableSAX = 1;
759 } else {
760 if (RAW == ';') {
761 NEXT;
762 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
763 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
764 if (entity == NULL) {
765
766 /*
767 * [ WFC: Entity Declared ]
768 * In a document without any DTD, a document with only an
769 * internal DTD subset which contains no parameter entity
770 * references, or a document with "standalone='yes'", ...
771 * ... The declaration of a parameter entity must precede
772 * any reference to it...
773 */
774 if ((ctxt->standalone == 1) ||
775 ((ctxt->hasExternalSubset == 0) &&
776 (ctxt->hasPErefs == 0))) {
777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
778 ctxt->sax->error(ctxt->userData,
779 "PEReference: %%%s; not found\n", name);
780 ctxt->wellFormed = 0;
781 ctxt->disableSAX = 1;
782 } else {
783 /*
784 * [ VC: Entity Declared ]
785 * In a document with an external subset or external
786 * parameter entities with "standalone='no'", ...
787 * ... The declaration of a parameter entity must precede
788 * any reference to it...
789 */
790 if ((!ctxt->disableSAX) &&
791 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
792 ctxt->vctxt.error(ctxt->vctxt.userData,
793 "PEReference: %%%s; not found\n", name);
794 } else if ((!ctxt->disableSAX) &&
795 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
796 ctxt->sax->warning(ctxt->userData,
797 "PEReference: %%%s; not found\n", name);
798 ctxt->valid = 0;
799 }
800 } else {
801 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
802 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000803 xmlChar start[4];
804 xmlCharEncoding enc;
805
Owen Taylor3473f882001-02-23 17:55:21 +0000806 /*
807 * handle the extra spaces added before and after
808 * c.f. http://www.w3.org/TR/REC-xml#as-PE
809 * this is done independantly.
810 */
811 input = xmlNewEntityInputStream(ctxt, entity);
812 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000813
814 /*
815 * Get the 4 first bytes and decode the charset
816 * if enc != XML_CHAR_ENCODING_NONE
817 * plug some encoding conversion routines.
818 */
819 GROW
820 start[0] = RAW;
821 start[1] = NXT(1);
822 start[2] = NXT(2);
823 start[3] = NXT(3);
824 enc = xmlDetectCharEncoding(start, 4);
825 if (enc != XML_CHAR_ENCODING_NONE) {
826 xmlSwitchEncoding(ctxt, enc);
827 }
828
Owen Taylor3473f882001-02-23 17:55:21 +0000829 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
830 (RAW == '<') && (NXT(1) == '?') &&
831 (NXT(2) == 'x') && (NXT(3) == 'm') &&
832 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
833 xmlParseTextDecl(ctxt);
834 }
835 if (ctxt->token == 0)
836 ctxt->token = ' ';
837 } else {
838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
839 ctxt->sax->error(ctxt->userData,
840 "xmlHandlePEReference: %s is not a parameter entity\n",
841 name);
842 ctxt->wellFormed = 0;
843 ctxt->disableSAX = 1;
844 }
845 }
846 } else {
847 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
849 ctxt->sax->error(ctxt->userData,
850 "xmlHandlePEReference: expecting ';'\n");
851 ctxt->wellFormed = 0;
852 ctxt->disableSAX = 1;
853 }
854 xmlFree(name);
855 }
856}
857
858/*
859 * Macro used to grow the current buffer.
860 */
861#define growBuffer(buffer) { \
862 buffer##_size *= 2; \
863 buffer = (xmlChar *) \
864 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
865 if (buffer == NULL) { \
866 perror("realloc failed"); \
867 return(NULL); \
868 } \
869}
870
871/**
872 * xmlStringDecodeEntities:
873 * @ctxt: the parser context
874 * @str: the input string
875 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
876 * @end: an end marker xmlChar, 0 if none
877 * @end2: an end marker xmlChar, 0 if none
878 * @end3: an end marker xmlChar, 0 if none
879 *
880 * Takes a entity string content and process to do the adequate subtitutions.
881 *
882 * [67] Reference ::= EntityRef | CharRef
883 *
884 * [69] PEReference ::= '%' Name ';'
885 *
886 * Returns A newly allocated string with the substitution done. The caller
887 * must deallocate it !
888 */
889xmlChar *
890xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
891 xmlChar end, xmlChar end2, xmlChar end3) {
892 xmlChar *buffer = NULL;
893 int buffer_size = 0;
894
895 xmlChar *current = NULL;
896 xmlEntityPtr ent;
897 int c,l;
898 int nbchars = 0;
899
900 if (str == NULL)
901 return(NULL);
902
903 if (ctxt->depth > 40) {
904 ctxt->errNo = XML_ERR_ENTITY_LOOP;
905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
906 ctxt->sax->error(ctxt->userData,
907 "Detected entity reference loop\n");
908 ctxt->wellFormed = 0;
909 ctxt->disableSAX = 1;
910 return(NULL);
911 }
912
913 /*
914 * allocate a translation buffer.
915 */
916 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
917 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
918 if (buffer == NULL) {
919 perror("xmlDecodeEntities: malloc failed");
920 return(NULL);
921 }
922
923 /*
924 * Ok loop until we reach one of the ending char or a size limit.
925 * we are operating on already parsed values.
926 */
927 c = CUR_SCHAR(str, l);
928 while ((c != 0) && (c != end) && /* non input consuming loop */
929 (c != end2) && (c != end3)) {
930
931 if (c == 0) break;
932 if ((c == '&') && (str[1] == '#')) {
933 int val = xmlParseStringCharRef(ctxt, &str);
934 if (val != 0) {
935 COPY_BUF(0,buffer,nbchars,val);
936 }
937 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
938 if (xmlParserDebugEntities)
939 xmlGenericError(xmlGenericErrorContext,
940 "String decoding Entity Reference: %.30s\n",
941 str);
942 ent = xmlParseStringEntityRef(ctxt, &str);
943 if ((ent != NULL) &&
944 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
945 if (ent->content != NULL) {
946 COPY_BUF(0,buffer,nbchars,ent->content[0]);
947 } else {
948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
949 ctxt->sax->error(ctxt->userData,
950 "internal error entity has no content\n");
951 }
952 } else if ((ent != NULL) && (ent->content != NULL)) {
953 xmlChar *rep;
954
955 ctxt->depth++;
956 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
957 0, 0, 0);
958 ctxt->depth--;
959 if (rep != NULL) {
960 current = rep;
961 while (*current != 0) { /* non input consuming loop */
962 buffer[nbchars++] = *current++;
963 if (nbchars >
964 buffer_size - XML_PARSER_BUFFER_SIZE) {
965 growBuffer(buffer);
966 }
967 }
968 xmlFree(rep);
969 }
970 } else if (ent != NULL) {
971 int i = xmlStrlen(ent->name);
972 const xmlChar *cur = ent->name;
973
974 buffer[nbchars++] = '&';
975 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
976 growBuffer(buffer);
977 }
978 for (;i > 0;i--)
979 buffer[nbchars++] = *cur++;
980 buffer[nbchars++] = ';';
981 }
982 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
983 if (xmlParserDebugEntities)
984 xmlGenericError(xmlGenericErrorContext,
985 "String decoding PE Reference: %.30s\n", str);
986 ent = xmlParseStringPEReference(ctxt, &str);
987 if (ent != NULL) {
988 xmlChar *rep;
989
990 ctxt->depth++;
991 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
992 0, 0, 0);
993 ctxt->depth--;
994 if (rep != NULL) {
995 current = rep;
996 while (*current != 0) { /* non input consuming loop */
997 buffer[nbchars++] = *current++;
998 if (nbchars >
999 buffer_size - XML_PARSER_BUFFER_SIZE) {
1000 growBuffer(buffer);
1001 }
1002 }
1003 xmlFree(rep);
1004 }
1005 }
1006 } else {
1007 COPY_BUF(l,buffer,nbchars,c);
1008 str += l;
1009 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1010 growBuffer(buffer);
1011 }
1012 }
1013 c = CUR_SCHAR(str, l);
1014 }
1015 buffer[nbchars++] = 0;
1016 return(buffer);
1017}
1018
1019
1020/************************************************************************
1021 * *
1022 * Commodity functions to handle xmlChars *
1023 * *
1024 ************************************************************************/
1025
1026/**
1027 * xmlStrndup:
1028 * @cur: the input xmlChar *
1029 * @len: the len of @cur
1030 *
1031 * a strndup for array of xmlChar's
1032 *
1033 * Returns a new xmlChar * or NULL
1034 */
1035xmlChar *
1036xmlStrndup(const xmlChar *cur, int len) {
1037 xmlChar *ret;
1038
1039 if ((cur == NULL) || (len < 0)) return(NULL);
1040 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1041 if (ret == NULL) {
1042 xmlGenericError(xmlGenericErrorContext,
1043 "malloc of %ld byte failed\n",
1044 (len + 1) * (long)sizeof(xmlChar));
1045 return(NULL);
1046 }
1047 memcpy(ret, cur, len * sizeof(xmlChar));
1048 ret[len] = 0;
1049 return(ret);
1050}
1051
1052/**
1053 * xmlStrdup:
1054 * @cur: the input xmlChar *
1055 *
1056 * a strdup for array of xmlChar's. Since they are supposed to be
1057 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1058 * a termination mark of '0'.
1059 *
1060 * Returns a new xmlChar * or NULL
1061 */
1062xmlChar *
1063xmlStrdup(const xmlChar *cur) {
1064 const xmlChar *p = cur;
1065
1066 if (cur == NULL) return(NULL);
1067 while (*p != 0) p++; /* non input consuming */
1068 return(xmlStrndup(cur, p - cur));
1069}
1070
1071/**
1072 * xmlCharStrndup:
1073 * @cur: the input char *
1074 * @len: the len of @cur
1075 *
1076 * a strndup for char's to xmlChar's
1077 *
1078 * Returns a new xmlChar * or NULL
1079 */
1080
1081xmlChar *
1082xmlCharStrndup(const char *cur, int len) {
1083 int i;
1084 xmlChar *ret;
1085
1086 if ((cur == NULL) || (len < 0)) return(NULL);
1087 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1088 if (ret == NULL) {
1089 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1090 (len + 1) * (long)sizeof(xmlChar));
1091 return(NULL);
1092 }
1093 for (i = 0;i < len;i++)
1094 ret[i] = (xmlChar) cur[i];
1095 ret[len] = 0;
1096 return(ret);
1097}
1098
1099/**
1100 * xmlCharStrdup:
1101 * @cur: the input char *
1102 * @len: the len of @cur
1103 *
1104 * a strdup for char's to xmlChar's
1105 *
1106 * Returns a new xmlChar * or NULL
1107 */
1108
1109xmlChar *
1110xmlCharStrdup(const char *cur) {
1111 const char *p = cur;
1112
1113 if (cur == NULL) return(NULL);
1114 while (*p != '\0') p++; /* non input consuming */
1115 return(xmlCharStrndup(cur, p - cur));
1116}
1117
1118/**
1119 * xmlStrcmp:
1120 * @str1: the first xmlChar *
1121 * @str2: the second xmlChar *
1122 *
1123 * a strcmp for xmlChar's
1124 *
1125 * Returns the integer result of the comparison
1126 */
1127
1128int
1129xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1130 register int tmp;
1131
1132 if (str1 == str2) return(0);
1133 if (str1 == NULL) return(-1);
1134 if (str2 == NULL) return(1);
1135 do {
1136 tmp = *str1++ - *str2;
1137 if (tmp != 0) return(tmp);
1138 } while (*str2++ != 0);
1139 return 0;
1140}
1141
1142/**
1143 * xmlStrEqual:
1144 * @str1: the first xmlChar *
1145 * @str2: the second xmlChar *
1146 *
1147 * Check if both string are equal of have same content
1148 * Should be a bit more readable and faster than xmlStrEqual()
1149 *
1150 * Returns 1 if they are equal, 0 if they are different
1151 */
1152
1153int
1154xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1155 if (str1 == str2) return(1);
1156 if (str1 == NULL) return(0);
1157 if (str2 == NULL) return(0);
1158 do {
1159 if (*str1++ != *str2) return(0);
1160 } while (*str2++);
1161 return(1);
1162}
1163
1164/**
1165 * xmlStrncmp:
1166 * @str1: the first xmlChar *
1167 * @str2: the second xmlChar *
1168 * @len: the max comparison length
1169 *
1170 * a strncmp for xmlChar's
1171 *
1172 * Returns the integer result of the comparison
1173 */
1174
1175int
1176xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1177 register int tmp;
1178
1179 if (len <= 0) return(0);
1180 if (str1 == str2) return(0);
1181 if (str1 == NULL) return(-1);
1182 if (str2 == NULL) return(1);
1183 do {
1184 tmp = *str1++ - *str2;
1185 if (tmp != 0 || --len == 0) return(tmp);
1186 } while (*str2++ != 0);
1187 return 0;
1188}
1189
1190static xmlChar casemap[256] = {
1191 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1192 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1193 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1194 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1195 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1196 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1197 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1198 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1199 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1200 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1201 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1202 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1203 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1204 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1205 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1206 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1207 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1208 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1209 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1210 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1211 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1212 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1213 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1214 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1215 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1216 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1217 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1218 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1219 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1220 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1221 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1222 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1223};
1224
1225/**
1226 * xmlStrcasecmp:
1227 * @str1: the first xmlChar *
1228 * @str2: the second xmlChar *
1229 *
1230 * a strcasecmp for xmlChar's
1231 *
1232 * Returns the integer result of the comparison
1233 */
1234
1235int
1236xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1237 register int tmp;
1238
1239 if (str1 == str2) return(0);
1240 if (str1 == NULL) return(-1);
1241 if (str2 == NULL) return(1);
1242 do {
1243 tmp = casemap[*str1++] - casemap[*str2];
1244 if (tmp != 0) return(tmp);
1245 } while (*str2++ != 0);
1246 return 0;
1247}
1248
1249/**
1250 * xmlStrncasecmp:
1251 * @str1: the first xmlChar *
1252 * @str2: the second xmlChar *
1253 * @len: the max comparison length
1254 *
1255 * a strncasecmp for xmlChar's
1256 *
1257 * Returns the integer result of the comparison
1258 */
1259
1260int
1261xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1262 register int tmp;
1263
1264 if (len <= 0) return(0);
1265 if (str1 == str2) return(0);
1266 if (str1 == NULL) return(-1);
1267 if (str2 == NULL) return(1);
1268 do {
1269 tmp = casemap[*str1++] - casemap[*str2];
1270 if (tmp != 0 || --len == 0) return(tmp);
1271 } while (*str2++ != 0);
1272 return 0;
1273}
1274
1275/**
1276 * xmlStrchr:
1277 * @str: the xmlChar * array
1278 * @val: the xmlChar to search
1279 *
1280 * a strchr for xmlChar's
1281 *
1282 * Returns the xmlChar * for the first occurence or NULL.
1283 */
1284
1285const xmlChar *
1286xmlStrchr(const xmlChar *str, xmlChar val) {
1287 if (str == NULL) return(NULL);
1288 while (*str != 0) { /* non input consuming */
1289 if (*str == val) return((xmlChar *) str);
1290 str++;
1291 }
1292 return(NULL);
1293}
1294
1295/**
1296 * xmlStrstr:
1297 * @str: the xmlChar * array (haystack)
1298 * @val: the xmlChar to search (needle)
1299 *
1300 * a strstr for xmlChar's
1301 *
1302 * Returns the xmlChar * for the first occurence or NULL.
1303 */
1304
1305const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001306xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001307 int n;
1308
1309 if (str == NULL) return(NULL);
1310 if (val == NULL) return(NULL);
1311 n = xmlStrlen(val);
1312
1313 if (n == 0) return(str);
1314 while (*str != 0) { /* non input consuming */
1315 if (*str == *val) {
1316 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1317 }
1318 str++;
1319 }
1320 return(NULL);
1321}
1322
1323/**
1324 * xmlStrcasestr:
1325 * @str: the xmlChar * array (haystack)
1326 * @val: the xmlChar to search (needle)
1327 *
1328 * a case-ignoring strstr for xmlChar's
1329 *
1330 * Returns the xmlChar * for the first occurence or NULL.
1331 */
1332
1333const xmlChar *
1334xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1335 int n;
1336
1337 if (str == NULL) return(NULL);
1338 if (val == NULL) return(NULL);
1339 n = xmlStrlen(val);
1340
1341 if (n == 0) return(str);
1342 while (*str != 0) { /* non input consuming */
1343 if (casemap[*str] == casemap[*val])
1344 if (!xmlStrncasecmp(str, val, n)) return(str);
1345 str++;
1346 }
1347 return(NULL);
1348}
1349
1350/**
1351 * xmlStrsub:
1352 * @str: the xmlChar * array (haystack)
1353 * @start: the index of the first char (zero based)
1354 * @len: the length of the substring
1355 *
1356 * Extract a substring of a given string
1357 *
1358 * Returns the xmlChar * for the first occurence or NULL.
1359 */
1360
1361xmlChar *
1362xmlStrsub(const xmlChar *str, int start, int len) {
1363 int i;
1364
1365 if (str == NULL) return(NULL);
1366 if (start < 0) return(NULL);
1367 if (len < 0) return(NULL);
1368
1369 for (i = 0;i < start;i++) {
1370 if (*str == 0) return(NULL);
1371 str++;
1372 }
1373 if (*str == 0) return(NULL);
1374 return(xmlStrndup(str, len));
1375}
1376
1377/**
1378 * xmlStrlen:
1379 * @str: the xmlChar * array
1380 *
1381 * length of a xmlChar's string
1382 *
1383 * Returns the number of xmlChar contained in the ARRAY.
1384 */
1385
1386int
1387xmlStrlen(const xmlChar *str) {
1388 int len = 0;
1389
1390 if (str == NULL) return(0);
1391 while (*str != 0) { /* non input consuming */
1392 str++;
1393 len++;
1394 }
1395 return(len);
1396}
1397
1398/**
1399 * xmlStrncat:
1400 * @cur: the original xmlChar * array
1401 * @add: the xmlChar * array added
1402 * @len: the length of @add
1403 *
1404 * a strncat for array of xmlChar's, it will extend cur with the len
1405 * first bytes of @add.
1406 *
1407 * Returns a new xmlChar *, the original @cur is reallocated if needed
1408 * and should not be freed
1409 */
1410
1411xmlChar *
1412xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1413 int size;
1414 xmlChar *ret;
1415
1416 if ((add == NULL) || (len == 0))
1417 return(cur);
1418 if (cur == NULL)
1419 return(xmlStrndup(add, len));
1420
1421 size = xmlStrlen(cur);
1422 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1423 if (ret == NULL) {
1424 xmlGenericError(xmlGenericErrorContext,
1425 "xmlStrncat: realloc of %ld byte failed\n",
1426 (size + len + 1) * (long)sizeof(xmlChar));
1427 return(cur);
1428 }
1429 memcpy(&ret[size], add, len * sizeof(xmlChar));
1430 ret[size + len] = 0;
1431 return(ret);
1432}
1433
1434/**
1435 * xmlStrcat:
1436 * @cur: the original xmlChar * array
1437 * @add: the xmlChar * array added
1438 *
1439 * a strcat for array of xmlChar's. Since they are supposed to be
1440 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1441 * a termination mark of '0'.
1442 *
1443 * Returns a new xmlChar * containing the concatenated string.
1444 */
1445xmlChar *
1446xmlStrcat(xmlChar *cur, const xmlChar *add) {
1447 const xmlChar *p = add;
1448
1449 if (add == NULL) return(cur);
1450 if (cur == NULL)
1451 return(xmlStrdup(add));
1452
1453 while (*p != 0) p++; /* non input consuming */
1454 return(xmlStrncat(cur, add, p - add));
1455}
1456
1457/************************************************************************
1458 * *
1459 * Commodity functions, cleanup needed ? *
1460 * *
1461 ************************************************************************/
1462
1463/**
1464 * areBlanks:
1465 * @ctxt: an XML parser context
1466 * @str: a xmlChar *
1467 * @len: the size of @str
1468 *
1469 * Is this a sequence of blank chars that one can ignore ?
1470 *
1471 * Returns 1 if ignorable 0 otherwise.
1472 */
1473
1474static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1475 int i, ret;
1476 xmlNodePtr lastChild;
1477
Daniel Veillard05c13a22001-09-09 08:38:09 +00001478 /*
1479 * Don't spend time trying to differentiate them, the same callback is
1480 * used !
1481 */
1482 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001483 return(0);
1484
Owen Taylor3473f882001-02-23 17:55:21 +00001485 /*
1486 * Check for xml:space value.
1487 */
1488 if (*(ctxt->space) == 1)
1489 return(0);
1490
1491 /*
1492 * Check that the string is made of blanks
1493 */
1494 for (i = 0;i < len;i++)
1495 if (!(IS_BLANK(str[i]))) return(0);
1496
1497 /*
1498 * Look if the element is mixed content in the Dtd if available
1499 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001500 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001501 if (ctxt->myDoc != NULL) {
1502 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1503 if (ret == 0) return(1);
1504 if (ret == 1) return(0);
1505 }
1506
1507 /*
1508 * Otherwise, heuristic :-\
1509 */
Owen Taylor3473f882001-02-23 17:55:21 +00001510 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001511 if ((ctxt->node->children == NULL) &&
1512 (RAW == '<') && (NXT(1) == '/')) return(0);
1513
1514 lastChild = xmlGetLastChild(ctxt->node);
1515 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001516 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1517 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001518 } else if (xmlNodeIsText(lastChild))
1519 return(0);
1520 else if ((ctxt->node->children != NULL) &&
1521 (xmlNodeIsText(ctxt->node->children)))
1522 return(0);
1523 return(1);
1524}
1525
1526/*
1527 * Forward definition for recusive behaviour.
1528 */
1529void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1530void xmlParseReference(xmlParserCtxtPtr ctxt);
1531
1532/************************************************************************
1533 * *
1534 * Extra stuff for namespace support *
1535 * Relates to http://www.w3.org/TR/WD-xml-names *
1536 * *
1537 ************************************************************************/
1538
1539/**
1540 * xmlSplitQName:
1541 * @ctxt: an XML parser context
1542 * @name: an XML parser context
1543 * @prefix: a xmlChar **
1544 *
1545 * parse an UTF8 encoded XML qualified name string
1546 *
1547 * [NS 5] QName ::= (Prefix ':')? LocalPart
1548 *
1549 * [NS 6] Prefix ::= NCName
1550 *
1551 * [NS 7] LocalPart ::= NCName
1552 *
1553 * Returns the local part, and prefix is updated
1554 * to get the Prefix if any.
1555 */
1556
1557xmlChar *
1558xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1559 xmlChar buf[XML_MAX_NAMELEN + 5];
1560 xmlChar *buffer = NULL;
1561 int len = 0;
1562 int max = XML_MAX_NAMELEN;
1563 xmlChar *ret = NULL;
1564 const xmlChar *cur = name;
1565 int c;
1566
1567 *prefix = NULL;
1568
1569#ifndef XML_XML_NAMESPACE
1570 /* xml: prefix is not really a namespace */
1571 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1572 (cur[2] == 'l') && (cur[3] == ':'))
1573 return(xmlStrdup(name));
1574#endif
1575
1576 /* nasty but valid */
1577 if (cur[0] == ':')
1578 return(xmlStrdup(name));
1579
1580 c = *cur++;
1581 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1582 buf[len++] = c;
1583 c = *cur++;
1584 }
1585 if (len >= max) {
1586 /*
1587 * Okay someone managed to make a huge name, so he's ready to pay
1588 * for the processing speed.
1589 */
1590 max = len * 2;
1591
1592 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1593 if (buffer == NULL) {
1594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1595 ctxt->sax->error(ctxt->userData,
1596 "xmlSplitQName: out of memory\n");
1597 return(NULL);
1598 }
1599 memcpy(buffer, buf, len);
1600 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1601 if (len + 10 > max) {
1602 max *= 2;
1603 buffer = (xmlChar *) xmlRealloc(buffer,
1604 max * sizeof(xmlChar));
1605 if (buffer == NULL) {
1606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1607 ctxt->sax->error(ctxt->userData,
1608 "xmlSplitQName: out of memory\n");
1609 return(NULL);
1610 }
1611 }
1612 buffer[len++] = c;
1613 c = *cur++;
1614 }
1615 buffer[len] = 0;
1616 }
1617
1618 if (buffer == NULL)
1619 ret = xmlStrndup(buf, len);
1620 else {
1621 ret = buffer;
1622 buffer = NULL;
1623 max = XML_MAX_NAMELEN;
1624 }
1625
1626
1627 if (c == ':') {
1628 c = *cur++;
1629 if (c == 0) return(ret);
1630 *prefix = ret;
1631 len = 0;
1632
1633 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1634 buf[len++] = c;
1635 c = *cur++;
1636 }
1637 if (len >= max) {
1638 /*
1639 * Okay someone managed to make a huge name, so he's ready to pay
1640 * for the processing speed.
1641 */
1642 max = len * 2;
1643
1644 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1645 if (buffer == NULL) {
1646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1647 ctxt->sax->error(ctxt->userData,
1648 "xmlSplitQName: out of memory\n");
1649 return(NULL);
1650 }
1651 memcpy(buffer, buf, len);
1652 while (c != 0) { /* tested bigname2.xml */
1653 if (len + 10 > max) {
1654 max *= 2;
1655 buffer = (xmlChar *) xmlRealloc(buffer,
1656 max * sizeof(xmlChar));
1657 if (buffer == NULL) {
1658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1659 ctxt->sax->error(ctxt->userData,
1660 "xmlSplitQName: out of memory\n");
1661 return(NULL);
1662 }
1663 }
1664 buffer[len++] = c;
1665 c = *cur++;
1666 }
1667 buffer[len] = 0;
1668 }
1669
1670 if (buffer == NULL)
1671 ret = xmlStrndup(buf, len);
1672 else {
1673 ret = buffer;
1674 }
1675 }
1676
1677 return(ret);
1678}
1679
1680/************************************************************************
1681 * *
1682 * The parser itself *
1683 * Relates to http://www.w3.org/TR/REC-xml *
1684 * *
1685 ************************************************************************/
1686
Daniel Veillard76d66f42001-05-16 21:05:17 +00001687static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001688/**
1689 * xmlParseName:
1690 * @ctxt: an XML parser context
1691 *
1692 * parse an XML name.
1693 *
1694 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1695 * CombiningChar | Extender
1696 *
1697 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1698 *
1699 * [6] Names ::= Name (S Name)*
1700 *
1701 * Returns the Name parsed or NULL
1702 */
1703
1704xmlChar *
1705xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001706 const xmlChar *in;
1707 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001708 int count = 0;
1709
1710 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001711
1712 /*
1713 * Accelerator for simple ASCII names
1714 */
1715 in = ctxt->input->cur;
1716 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1717 ((*in >= 0x41) && (*in <= 0x5A)) ||
1718 (*in == '_') || (*in == ':')) {
1719 in++;
1720 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1721 ((*in >= 0x41) && (*in <= 0x5A)) ||
1722 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001723 (*in == '_') || (*in == '-') ||
1724 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001725 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001726 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001727 count = in - ctxt->input->cur;
1728 ret = xmlStrndup(ctxt->input->cur, count);
1729 ctxt->input->cur = in;
1730 return(ret);
1731 }
1732 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001733 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001734}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001735
Daniel Veillard76d66f42001-05-16 21:05:17 +00001736static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001737xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1738 xmlChar buf[XML_MAX_NAMELEN + 5];
1739 int len = 0, l;
1740 int c;
1741 int count = 0;
1742
1743 /*
1744 * Handler for more complex cases
1745 */
1746 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001747 c = CUR_CHAR(l);
1748 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1749 (!IS_LETTER(c) && (c != '_') &&
1750 (c != ':'))) {
1751 return(NULL);
1752 }
1753
1754 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1755 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1756 (c == '.') || (c == '-') ||
1757 (c == '_') || (c == ':') ||
1758 (IS_COMBINING(c)) ||
1759 (IS_EXTENDER(c)))) {
1760 if (count++ > 100) {
1761 count = 0;
1762 GROW;
1763 }
1764 COPY_BUF(l,buf,len,c);
1765 NEXTL(l);
1766 c = CUR_CHAR(l);
1767 if (len >= XML_MAX_NAMELEN) {
1768 /*
1769 * Okay someone managed to make a huge name, so he's ready to pay
1770 * for the processing speed.
1771 */
1772 xmlChar *buffer;
1773 int max = len * 2;
1774
1775 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1776 if (buffer == NULL) {
1777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1778 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001779 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001780 return(NULL);
1781 }
1782 memcpy(buffer, buf, len);
1783 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1784 (c == '.') || (c == '-') ||
1785 (c == '_') || (c == ':') ||
1786 (IS_COMBINING(c)) ||
1787 (IS_EXTENDER(c))) {
1788 if (count++ > 100) {
1789 count = 0;
1790 GROW;
1791 }
1792 if (len + 10 > max) {
1793 max *= 2;
1794 buffer = (xmlChar *) xmlRealloc(buffer,
1795 max * sizeof(xmlChar));
1796 if (buffer == NULL) {
1797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1798 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001799 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001800 return(NULL);
1801 }
1802 }
1803 COPY_BUF(l,buffer,len,c);
1804 NEXTL(l);
1805 c = CUR_CHAR(l);
1806 }
1807 buffer[len] = 0;
1808 return(buffer);
1809 }
1810 }
1811 return(xmlStrndup(buf, len));
1812}
1813
1814/**
1815 * xmlParseStringName:
1816 * @ctxt: an XML parser context
1817 * @str: a pointer to the string pointer (IN/OUT)
1818 *
1819 * parse an XML name.
1820 *
1821 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1822 * CombiningChar | Extender
1823 *
1824 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1825 *
1826 * [6] Names ::= Name (S Name)*
1827 *
1828 * Returns the Name parsed or NULL. The str pointer
1829 * is updated to the current location in the string.
1830 */
1831
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001832static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001833xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1834 xmlChar buf[XML_MAX_NAMELEN + 5];
1835 const xmlChar *cur = *str;
1836 int len = 0, l;
1837 int c;
1838
1839 c = CUR_SCHAR(cur, l);
1840 if (!IS_LETTER(c) && (c != '_') &&
1841 (c != ':')) {
1842 return(NULL);
1843 }
1844
1845 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1846 (c == '.') || (c == '-') ||
1847 (c == '_') || (c == ':') ||
1848 (IS_COMBINING(c)) ||
1849 (IS_EXTENDER(c))) {
1850 COPY_BUF(l,buf,len,c);
1851 cur += l;
1852 c = CUR_SCHAR(cur, l);
1853 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1854 /*
1855 * Okay someone managed to make a huge name, so he's ready to pay
1856 * for the processing speed.
1857 */
1858 xmlChar *buffer;
1859 int max = len * 2;
1860
1861 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1862 if (buffer == NULL) {
1863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1864 ctxt->sax->error(ctxt->userData,
1865 "xmlParseStringName: out of memory\n");
1866 return(NULL);
1867 }
1868 memcpy(buffer, buf, len);
1869 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1870 (c == '.') || (c == '-') ||
1871 (c == '_') || (c == ':') ||
1872 (IS_COMBINING(c)) ||
1873 (IS_EXTENDER(c))) {
1874 if (len + 10 > max) {
1875 max *= 2;
1876 buffer = (xmlChar *) xmlRealloc(buffer,
1877 max * sizeof(xmlChar));
1878 if (buffer == NULL) {
1879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1880 ctxt->sax->error(ctxt->userData,
1881 "xmlParseStringName: out of memory\n");
1882 return(NULL);
1883 }
1884 }
1885 COPY_BUF(l,buffer,len,c);
1886 cur += l;
1887 c = CUR_SCHAR(cur, l);
1888 }
1889 buffer[len] = 0;
1890 *str = cur;
1891 return(buffer);
1892 }
1893 }
1894 *str = cur;
1895 return(xmlStrndup(buf, len));
1896}
1897
1898/**
1899 * xmlParseNmtoken:
1900 * @ctxt: an XML parser context
1901 *
1902 * parse an XML Nmtoken.
1903 *
1904 * [7] Nmtoken ::= (NameChar)+
1905 *
1906 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1907 *
1908 * Returns the Nmtoken parsed or NULL
1909 */
1910
1911xmlChar *
1912xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1913 xmlChar buf[XML_MAX_NAMELEN + 5];
1914 int len = 0, l;
1915 int c;
1916 int count = 0;
1917
1918 GROW;
1919 c = CUR_CHAR(l);
1920
1921 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1922 (c == '.') || (c == '-') ||
1923 (c == '_') || (c == ':') ||
1924 (IS_COMBINING(c)) ||
1925 (IS_EXTENDER(c))) {
1926 if (count++ > 100) {
1927 count = 0;
1928 GROW;
1929 }
1930 COPY_BUF(l,buf,len,c);
1931 NEXTL(l);
1932 c = CUR_CHAR(l);
1933 if (len >= XML_MAX_NAMELEN) {
1934 /*
1935 * Okay someone managed to make a huge token, so he's ready to pay
1936 * for the processing speed.
1937 */
1938 xmlChar *buffer;
1939 int max = len * 2;
1940
1941 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1942 if (buffer == NULL) {
1943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1944 ctxt->sax->error(ctxt->userData,
1945 "xmlParseNmtoken: out of memory\n");
1946 return(NULL);
1947 }
1948 memcpy(buffer, buf, len);
1949 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1950 (c == '.') || (c == '-') ||
1951 (c == '_') || (c == ':') ||
1952 (IS_COMBINING(c)) ||
1953 (IS_EXTENDER(c))) {
1954 if (count++ > 100) {
1955 count = 0;
1956 GROW;
1957 }
1958 if (len + 10 > max) {
1959 max *= 2;
1960 buffer = (xmlChar *) xmlRealloc(buffer,
1961 max * sizeof(xmlChar));
1962 if (buffer == NULL) {
1963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1964 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001965 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001966 return(NULL);
1967 }
1968 }
1969 COPY_BUF(l,buffer,len,c);
1970 NEXTL(l);
1971 c = CUR_CHAR(l);
1972 }
1973 buffer[len] = 0;
1974 return(buffer);
1975 }
1976 }
1977 if (len == 0)
1978 return(NULL);
1979 return(xmlStrndup(buf, len));
1980}
1981
1982/**
1983 * xmlParseEntityValue:
1984 * @ctxt: an XML parser context
1985 * @orig: if non-NULL store a copy of the original entity value
1986 *
1987 * parse a value for ENTITY declarations
1988 *
1989 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1990 * "'" ([^%&'] | PEReference | Reference)* "'"
1991 *
1992 * Returns the EntityValue parsed with reference substitued or NULL
1993 */
1994
1995xmlChar *
1996xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1997 xmlChar *buf = NULL;
1998 int len = 0;
1999 int size = XML_PARSER_BUFFER_SIZE;
2000 int c, l;
2001 xmlChar stop;
2002 xmlChar *ret = NULL;
2003 const xmlChar *cur = NULL;
2004 xmlParserInputPtr input;
2005
2006 if (RAW == '"') stop = '"';
2007 else if (RAW == '\'') stop = '\'';
2008 else {
2009 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2011 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2012 ctxt->wellFormed = 0;
2013 ctxt->disableSAX = 1;
2014 return(NULL);
2015 }
2016 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2017 if (buf == NULL) {
2018 xmlGenericError(xmlGenericErrorContext,
2019 "malloc of %d byte failed\n", size);
2020 return(NULL);
2021 }
2022
2023 /*
2024 * The content of the entity definition is copied in a buffer.
2025 */
2026
2027 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2028 input = ctxt->input;
2029 GROW;
2030 NEXT;
2031 c = CUR_CHAR(l);
2032 /*
2033 * NOTE: 4.4.5 Included in Literal
2034 * When a parameter entity reference appears in a literal entity
2035 * value, ... a single or double quote character in the replacement
2036 * text is always treated as a normal data character and will not
2037 * terminate the literal.
2038 * In practice it means we stop the loop only when back at parsing
2039 * the initial entity and the quote is found
2040 */
2041 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2042 (ctxt->input != input))) {
2043 if (len + 5 >= size) {
2044 size *= 2;
2045 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2046 if (buf == NULL) {
2047 xmlGenericError(xmlGenericErrorContext,
2048 "realloc of %d byte failed\n", size);
2049 return(NULL);
2050 }
2051 }
2052 COPY_BUF(l,buf,len,c);
2053 NEXTL(l);
2054 /*
2055 * Pop-up of finished entities.
2056 */
2057 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2058 xmlPopInput(ctxt);
2059
2060 GROW;
2061 c = CUR_CHAR(l);
2062 if (c == 0) {
2063 GROW;
2064 c = CUR_CHAR(l);
2065 }
2066 }
2067 buf[len] = 0;
2068
2069 /*
2070 * Raise problem w.r.t. '&' and '%' being used in non-entities
2071 * reference constructs. Note Charref will be handled in
2072 * xmlStringDecodeEntities()
2073 */
2074 cur = buf;
2075 while (*cur != 0) { /* non input consuming */
2076 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2077 xmlChar *name;
2078 xmlChar tmp = *cur;
2079
2080 cur++;
2081 name = xmlParseStringName(ctxt, &cur);
2082 if ((name == NULL) || (*cur != ';')) {
2083 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2085 ctxt->sax->error(ctxt->userData,
2086 "EntityValue: '%c' forbidden except for entities references\n",
2087 tmp);
2088 ctxt->wellFormed = 0;
2089 ctxt->disableSAX = 1;
2090 }
2091 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2092 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2094 ctxt->sax->error(ctxt->userData,
2095 "EntityValue: PEReferences forbidden in internal subset\n",
2096 tmp);
2097 ctxt->wellFormed = 0;
2098 ctxt->disableSAX = 1;
2099 }
2100 if (name != NULL)
2101 xmlFree(name);
2102 }
2103 cur++;
2104 }
2105
2106 /*
2107 * Then PEReference entities are substituted.
2108 */
2109 if (c != stop) {
2110 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2111 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2112 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2113 ctxt->wellFormed = 0;
2114 ctxt->disableSAX = 1;
2115 xmlFree(buf);
2116 } else {
2117 NEXT;
2118 /*
2119 * NOTE: 4.4.7 Bypassed
2120 * When a general entity reference appears in the EntityValue in
2121 * an entity declaration, it is bypassed and left as is.
2122 * so XML_SUBSTITUTE_REF is not set here.
2123 */
2124 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2125 0, 0, 0);
2126 if (orig != NULL)
2127 *orig = buf;
2128 else
2129 xmlFree(buf);
2130 }
2131
2132 return(ret);
2133}
2134
2135/**
2136 * xmlParseAttValue:
2137 * @ctxt: an XML parser context
2138 *
2139 * parse a value for an attribute
2140 * Note: the parser won't do substitution of entities here, this
2141 * will be handled later in xmlStringGetNodeList
2142 *
2143 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2144 * "'" ([^<&'] | Reference)* "'"
2145 *
2146 * 3.3.3 Attribute-Value Normalization:
2147 * Before the value of an attribute is passed to the application or
2148 * checked for validity, the XML processor must normalize it as follows:
2149 * - a character reference is processed by appending the referenced
2150 * character to the attribute value
2151 * - an entity reference is processed by recursively processing the
2152 * replacement text of the entity
2153 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2154 * appending #x20 to the normalized value, except that only a single
2155 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2156 * parsed entity or the literal entity value of an internal parsed entity
2157 * - other characters are processed by appending them to the normalized value
2158 * If the declared value is not CDATA, then the XML processor must further
2159 * process the normalized attribute value by discarding any leading and
2160 * trailing space (#x20) characters, and by replacing sequences of space
2161 * (#x20) characters by a single space (#x20) character.
2162 * All attributes for which no declaration has been read should be treated
2163 * by a non-validating parser as if declared CDATA.
2164 *
2165 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2166 */
2167
2168xmlChar *
2169xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2170 xmlChar limit = 0;
2171 xmlChar *buf = NULL;
2172 int len = 0;
2173 int buf_size = 0;
2174 int c, l;
2175 xmlChar *current = NULL;
2176 xmlEntityPtr ent;
2177
2178
2179 SHRINK;
2180 if (NXT(0) == '"') {
2181 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2182 limit = '"';
2183 NEXT;
2184 } else if (NXT(0) == '\'') {
2185 limit = '\'';
2186 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2187 NEXT;
2188 } else {
2189 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2191 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2192 ctxt->wellFormed = 0;
2193 ctxt->disableSAX = 1;
2194 return(NULL);
2195 }
2196
2197 /*
2198 * allocate a translation buffer.
2199 */
2200 buf_size = XML_PARSER_BUFFER_SIZE;
2201 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2202 if (buf == NULL) {
2203 perror("xmlParseAttValue: malloc failed");
2204 return(NULL);
2205 }
2206
2207 /*
2208 * Ok loop until we reach one of the ending char or a size limit.
2209 */
2210 c = CUR_CHAR(l);
2211 while (((NXT(0) != limit) && /* checked */
2212 (c != '<')) || (ctxt->token != 0)) {
2213 if (c == 0) break;
2214 if (ctxt->token == '&') {
2215 /*
2216 * The reparsing will be done in xmlStringGetNodeList()
2217 * called by the attribute() function in SAX.c
2218 */
2219 static xmlChar buffer[6] = "&#38;";
2220
2221 if (len > buf_size - 10) {
2222 growBuffer(buf);
2223 }
2224 current = &buffer[0];
2225 while (*current != 0) { /* non input consuming */
2226 buf[len++] = *current++;
2227 }
2228 ctxt->token = 0;
2229 } else if (c == '&') {
2230 if (NXT(1) == '#') {
2231 int val = xmlParseCharRef(ctxt);
2232 if (val == '&') {
2233 /*
2234 * The reparsing will be done in xmlStringGetNodeList()
2235 * called by the attribute() function in SAX.c
2236 */
2237 static xmlChar buffer[6] = "&#38;";
2238
2239 if (len > buf_size - 10) {
2240 growBuffer(buf);
2241 }
2242 current = &buffer[0];
2243 while (*current != 0) { /* non input consuming */
2244 buf[len++] = *current++;
2245 }
2246 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002247 if (len > buf_size - 10) {
2248 growBuffer(buf);
2249 }
Owen Taylor3473f882001-02-23 17:55:21 +00002250 len += xmlCopyChar(0, &buf[len], val);
2251 }
2252 } else {
2253 ent = xmlParseEntityRef(ctxt);
2254 if ((ent != NULL) &&
2255 (ctxt->replaceEntities != 0)) {
2256 xmlChar *rep;
2257
2258 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2259 rep = xmlStringDecodeEntities(ctxt, ent->content,
2260 XML_SUBSTITUTE_REF, 0, 0, 0);
2261 if (rep != NULL) {
2262 current = rep;
2263 while (*current != 0) { /* non input consuming */
2264 buf[len++] = *current++;
2265 if (len > buf_size - 10) {
2266 growBuffer(buf);
2267 }
2268 }
2269 xmlFree(rep);
2270 }
2271 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002272 if (len > buf_size - 10) {
2273 growBuffer(buf);
2274 }
Owen Taylor3473f882001-02-23 17:55:21 +00002275 if (ent->content != NULL)
2276 buf[len++] = ent->content[0];
2277 }
2278 } else if (ent != NULL) {
2279 int i = xmlStrlen(ent->name);
2280 const xmlChar *cur = ent->name;
2281
2282 /*
2283 * This may look absurd but is needed to detect
2284 * entities problems
2285 */
2286 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2287 (ent->content != NULL)) {
2288 xmlChar *rep;
2289 rep = xmlStringDecodeEntities(ctxt, ent->content,
2290 XML_SUBSTITUTE_REF, 0, 0, 0);
2291 if (rep != NULL)
2292 xmlFree(rep);
2293 }
2294
2295 /*
2296 * Just output the reference
2297 */
2298 buf[len++] = '&';
2299 if (len > buf_size - i - 10) {
2300 growBuffer(buf);
2301 }
2302 for (;i > 0;i--)
2303 buf[len++] = *cur++;
2304 buf[len++] = ';';
2305 }
2306 }
2307 } else {
2308 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2309 COPY_BUF(l,buf,len,0x20);
2310 if (len > buf_size - 10) {
2311 growBuffer(buf);
2312 }
2313 } else {
2314 COPY_BUF(l,buf,len,c);
2315 if (len > buf_size - 10) {
2316 growBuffer(buf);
2317 }
2318 }
2319 NEXTL(l);
2320 }
2321 GROW;
2322 c = CUR_CHAR(l);
2323 }
2324 buf[len++] = 0;
2325 if (RAW == '<') {
2326 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2328 ctxt->sax->error(ctxt->userData,
2329 "Unescaped '<' not allowed in attributes values\n");
2330 ctxt->wellFormed = 0;
2331 ctxt->disableSAX = 1;
2332 } else if (RAW != limit) {
2333 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2335 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2336 ctxt->wellFormed = 0;
2337 ctxt->disableSAX = 1;
2338 } else
2339 NEXT;
2340 return(buf);
2341}
2342
2343/**
2344 * xmlParseSystemLiteral:
2345 * @ctxt: an XML parser context
2346 *
2347 * parse an XML Literal
2348 *
2349 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2350 *
2351 * Returns the SystemLiteral parsed or NULL
2352 */
2353
2354xmlChar *
2355xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2356 xmlChar *buf = NULL;
2357 int len = 0;
2358 int size = XML_PARSER_BUFFER_SIZE;
2359 int cur, l;
2360 xmlChar stop;
2361 int state = ctxt->instate;
2362 int count = 0;
2363
2364 SHRINK;
2365 if (RAW == '"') {
2366 NEXT;
2367 stop = '"';
2368 } else if (RAW == '\'') {
2369 NEXT;
2370 stop = '\'';
2371 } else {
2372 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2374 ctxt->sax->error(ctxt->userData,
2375 "SystemLiteral \" or ' expected\n");
2376 ctxt->wellFormed = 0;
2377 ctxt->disableSAX = 1;
2378 return(NULL);
2379 }
2380
2381 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2382 if (buf == NULL) {
2383 xmlGenericError(xmlGenericErrorContext,
2384 "malloc of %d byte failed\n", size);
2385 return(NULL);
2386 }
2387 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2388 cur = CUR_CHAR(l);
2389 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2390 if (len + 5 >= size) {
2391 size *= 2;
2392 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2393 if (buf == NULL) {
2394 xmlGenericError(xmlGenericErrorContext,
2395 "realloc of %d byte failed\n", size);
2396 ctxt->instate = (xmlParserInputState) state;
2397 return(NULL);
2398 }
2399 }
2400 count++;
2401 if (count > 50) {
2402 GROW;
2403 count = 0;
2404 }
2405 COPY_BUF(l,buf,len,cur);
2406 NEXTL(l);
2407 cur = CUR_CHAR(l);
2408 if (cur == 0) {
2409 GROW;
2410 SHRINK;
2411 cur = CUR_CHAR(l);
2412 }
2413 }
2414 buf[len] = 0;
2415 ctxt->instate = (xmlParserInputState) state;
2416 if (!IS_CHAR(cur)) {
2417 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2418 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2419 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2420 ctxt->wellFormed = 0;
2421 ctxt->disableSAX = 1;
2422 } else {
2423 NEXT;
2424 }
2425 return(buf);
2426}
2427
2428/**
2429 * xmlParsePubidLiteral:
2430 * @ctxt: an XML parser context
2431 *
2432 * parse an XML public literal
2433 *
2434 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2435 *
2436 * Returns the PubidLiteral parsed or NULL.
2437 */
2438
2439xmlChar *
2440xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2441 xmlChar *buf = NULL;
2442 int len = 0;
2443 int size = XML_PARSER_BUFFER_SIZE;
2444 xmlChar cur;
2445 xmlChar stop;
2446 int count = 0;
2447
2448 SHRINK;
2449 if (RAW == '"') {
2450 NEXT;
2451 stop = '"';
2452 } else if (RAW == '\'') {
2453 NEXT;
2454 stop = '\'';
2455 } else {
2456 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2457 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2458 ctxt->sax->error(ctxt->userData,
2459 "SystemLiteral \" or ' expected\n");
2460 ctxt->wellFormed = 0;
2461 ctxt->disableSAX = 1;
2462 return(NULL);
2463 }
2464 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2465 if (buf == NULL) {
2466 xmlGenericError(xmlGenericErrorContext,
2467 "malloc of %d byte failed\n", size);
2468 return(NULL);
2469 }
2470 cur = CUR;
2471 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2472 if (len + 1 >= size) {
2473 size *= 2;
2474 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2475 if (buf == NULL) {
2476 xmlGenericError(xmlGenericErrorContext,
2477 "realloc of %d byte failed\n", size);
2478 return(NULL);
2479 }
2480 }
2481 buf[len++] = cur;
2482 count++;
2483 if (count > 50) {
2484 GROW;
2485 count = 0;
2486 }
2487 NEXT;
2488 cur = CUR;
2489 if (cur == 0) {
2490 GROW;
2491 SHRINK;
2492 cur = CUR;
2493 }
2494 }
2495 buf[len] = 0;
2496 if (cur != stop) {
2497 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2499 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2500 ctxt->wellFormed = 0;
2501 ctxt->disableSAX = 1;
2502 } else {
2503 NEXT;
2504 }
2505 return(buf);
2506}
2507
Daniel Veillard48b2f892001-02-25 16:11:03 +00002508void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002509/**
2510 * xmlParseCharData:
2511 * @ctxt: an XML parser context
2512 * @cdata: int indicating whether we are within a CDATA section
2513 *
2514 * parse a CharData section.
2515 * if we are within a CDATA section ']]>' marks an end of section.
2516 *
2517 * The right angle bracket (>) may be represented using the string "&gt;",
2518 * and must, for compatibility, be escaped using "&gt;" or a character
2519 * reference when it appears in the string "]]>" in content, when that
2520 * string is not marking the end of a CDATA section.
2521 *
2522 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2523 */
2524
2525void
2526xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002527 const xmlChar *in;
2528 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002529 int line = ctxt->input->line;
2530 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002531
2532 SHRINK;
2533 GROW;
2534 /*
2535 * Accelerated common case where input don't need to be
2536 * modified before passing it to the handler.
2537 */
2538 if ((ctxt->token == 0) && (!cdata)) {
2539 in = ctxt->input->cur;
2540 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002541get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002542 while (((*in >= 0x20) && (*in != '<') &&
2543 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2544 in++;
2545 if (*in == 0xA) {
2546 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002547 in++;
2548 while (*in == 0xA) {
2549 ctxt->input->line++;
2550 in++;
2551 }
2552 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002553 }
2554 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002555 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002556 if (IS_BLANK(*ctxt->input->cur)) {
2557 const xmlChar *tmp = ctxt->input->cur;
2558 ctxt->input->cur = in;
2559 if (areBlanks(ctxt, tmp, nbchar)) {
2560 if (ctxt->sax->ignorableWhitespace != NULL)
2561 ctxt->sax->ignorableWhitespace(ctxt->userData,
2562 tmp, nbchar);
2563 } else {
2564 if (ctxt->sax->characters != NULL)
2565 ctxt->sax->characters(ctxt->userData,
2566 tmp, nbchar);
2567 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002568 line = ctxt->input->line;
2569 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002570 } else {
2571 if (ctxt->sax->characters != NULL)
2572 ctxt->sax->characters(ctxt->userData,
2573 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002574 line = ctxt->input->line;
2575 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002576 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002577 }
2578 ctxt->input->cur = in;
2579 if (*in == 0xD) {
2580 in++;
2581 if (*in == 0xA) {
2582 ctxt->input->cur = in;
2583 in++;
2584 ctxt->input->line++;
2585 continue; /* while */
2586 }
2587 in--;
2588 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002589 if (*in == '<') {
2590 return;
2591 }
2592 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002593 return;
2594 }
2595 SHRINK;
2596 GROW;
2597 in = ctxt->input->cur;
2598 } while ((*in >= 0x20) && (*in <= 0x7F));
2599 nbchar = 0;
2600 }
Daniel Veillard50582112001-03-26 22:52:16 +00002601 ctxt->input->line = line;
2602 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002603 xmlParseCharDataComplex(ctxt, cdata);
2604}
2605
2606void
2607xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002608 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2609 int nbchar = 0;
2610 int cur, l;
2611 int count = 0;
2612
2613 SHRINK;
2614 GROW;
2615 cur = CUR_CHAR(l);
2616 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2617 ((cur != '&') || (ctxt->token == '&')) &&
2618 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2619 if ((cur == ']') && (NXT(1) == ']') &&
2620 (NXT(2) == '>')) {
2621 if (cdata) break;
2622 else {
2623 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2625 ctxt->sax->error(ctxt->userData,
2626 "Sequence ']]>' not allowed in content\n");
2627 /* Should this be relaxed ??? I see a "must here */
2628 ctxt->wellFormed = 0;
2629 ctxt->disableSAX = 1;
2630 }
2631 }
2632 COPY_BUF(l,buf,nbchar,cur);
2633 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2634 /*
2635 * Ok the segment is to be consumed as chars.
2636 */
2637 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2638 if (areBlanks(ctxt, buf, nbchar)) {
2639 if (ctxt->sax->ignorableWhitespace != NULL)
2640 ctxt->sax->ignorableWhitespace(ctxt->userData,
2641 buf, nbchar);
2642 } else {
2643 if (ctxt->sax->characters != NULL)
2644 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2645 }
2646 }
2647 nbchar = 0;
2648 }
2649 count++;
2650 if (count > 50) {
2651 GROW;
2652 count = 0;
2653 }
2654 NEXTL(l);
2655 cur = CUR_CHAR(l);
2656 }
2657 if (nbchar != 0) {
2658 /*
2659 * Ok the segment is to be consumed as chars.
2660 */
2661 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2662 if (areBlanks(ctxt, buf, nbchar)) {
2663 if (ctxt->sax->ignorableWhitespace != NULL)
2664 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2665 } else {
2666 if (ctxt->sax->characters != NULL)
2667 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2668 }
2669 }
2670 }
2671}
2672
2673/**
2674 * xmlParseExternalID:
2675 * @ctxt: an XML parser context
2676 * @publicID: a xmlChar** receiving PubidLiteral
2677 * @strict: indicate whether we should restrict parsing to only
2678 * production [75], see NOTE below
2679 *
2680 * Parse an External ID or a Public ID
2681 *
2682 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2683 * 'PUBLIC' S PubidLiteral S SystemLiteral
2684 *
2685 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2686 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2687 *
2688 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2689 *
2690 * Returns the function returns SystemLiteral and in the second
2691 * case publicID receives PubidLiteral, is strict is off
2692 * it is possible to return NULL and have publicID set.
2693 */
2694
2695xmlChar *
2696xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2697 xmlChar *URI = NULL;
2698
2699 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002700
2701 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002702 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2703 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2704 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2705 SKIP(6);
2706 if (!IS_BLANK(CUR)) {
2707 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2709 ctxt->sax->error(ctxt->userData,
2710 "Space required after 'SYSTEM'\n");
2711 ctxt->wellFormed = 0;
2712 ctxt->disableSAX = 1;
2713 }
2714 SKIP_BLANKS;
2715 URI = xmlParseSystemLiteral(ctxt);
2716 if (URI == NULL) {
2717 ctxt->errNo = XML_ERR_URI_REQUIRED;
2718 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2719 ctxt->sax->error(ctxt->userData,
2720 "xmlParseExternalID: SYSTEM, no URI\n");
2721 ctxt->wellFormed = 0;
2722 ctxt->disableSAX = 1;
2723 }
2724 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2725 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2726 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2727 SKIP(6);
2728 if (!IS_BLANK(CUR)) {
2729 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2731 ctxt->sax->error(ctxt->userData,
2732 "Space required after 'PUBLIC'\n");
2733 ctxt->wellFormed = 0;
2734 ctxt->disableSAX = 1;
2735 }
2736 SKIP_BLANKS;
2737 *publicID = xmlParsePubidLiteral(ctxt);
2738 if (*publicID == NULL) {
2739 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2741 ctxt->sax->error(ctxt->userData,
2742 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2743 ctxt->wellFormed = 0;
2744 ctxt->disableSAX = 1;
2745 }
2746 if (strict) {
2747 /*
2748 * We don't handle [83] so "S SystemLiteral" is required.
2749 */
2750 if (!IS_BLANK(CUR)) {
2751 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2753 ctxt->sax->error(ctxt->userData,
2754 "Space required after the Public Identifier\n");
2755 ctxt->wellFormed = 0;
2756 ctxt->disableSAX = 1;
2757 }
2758 } else {
2759 /*
2760 * We handle [83] so we return immediately, if
2761 * "S SystemLiteral" is not detected. From a purely parsing
2762 * point of view that's a nice mess.
2763 */
2764 const xmlChar *ptr;
2765 GROW;
2766
2767 ptr = CUR_PTR;
2768 if (!IS_BLANK(*ptr)) return(NULL);
2769
2770 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2771 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2772 }
2773 SKIP_BLANKS;
2774 URI = xmlParseSystemLiteral(ctxt);
2775 if (URI == NULL) {
2776 ctxt->errNo = XML_ERR_URI_REQUIRED;
2777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2778 ctxt->sax->error(ctxt->userData,
2779 "xmlParseExternalID: PUBLIC, no URI\n");
2780 ctxt->wellFormed = 0;
2781 ctxt->disableSAX = 1;
2782 }
2783 }
2784 return(URI);
2785}
2786
2787/**
2788 * xmlParseComment:
2789 * @ctxt: an XML parser context
2790 *
2791 * Skip an XML (SGML) comment <!-- .... -->
2792 * The spec says that "For compatibility, the string "--" (double-hyphen)
2793 * must not occur within comments. "
2794 *
2795 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2796 */
2797void
2798xmlParseComment(xmlParserCtxtPtr ctxt) {
2799 xmlChar *buf = NULL;
2800 int len;
2801 int size = XML_PARSER_BUFFER_SIZE;
2802 int q, ql;
2803 int r, rl;
2804 int cur, l;
2805 xmlParserInputState state;
2806 xmlParserInputPtr input = ctxt->input;
2807 int count = 0;
2808
2809 /*
2810 * Check that there is a comment right here.
2811 */
2812 if ((RAW != '<') || (NXT(1) != '!') ||
2813 (NXT(2) != '-') || (NXT(3) != '-')) return;
2814
2815 state = ctxt->instate;
2816 ctxt->instate = XML_PARSER_COMMENT;
2817 SHRINK;
2818 SKIP(4);
2819 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2820 if (buf == NULL) {
2821 xmlGenericError(xmlGenericErrorContext,
2822 "malloc of %d byte failed\n", size);
2823 ctxt->instate = state;
2824 return;
2825 }
2826 q = CUR_CHAR(ql);
2827 NEXTL(ql);
2828 r = CUR_CHAR(rl);
2829 NEXTL(rl);
2830 cur = CUR_CHAR(l);
2831 len = 0;
2832 while (IS_CHAR(cur) && /* checked */
2833 ((cur != '>') ||
2834 (r != '-') || (q != '-'))) {
2835 if ((r == '-') && (q == '-') && (len > 1)) {
2836 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2838 ctxt->sax->error(ctxt->userData,
2839 "Comment must not contain '--' (double-hyphen)`\n");
2840 ctxt->wellFormed = 0;
2841 ctxt->disableSAX = 1;
2842 }
2843 if (len + 5 >= size) {
2844 size *= 2;
2845 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2846 if (buf == NULL) {
2847 xmlGenericError(xmlGenericErrorContext,
2848 "realloc of %d byte failed\n", size);
2849 ctxt->instate = state;
2850 return;
2851 }
2852 }
2853 COPY_BUF(ql,buf,len,q);
2854 q = r;
2855 ql = rl;
2856 r = cur;
2857 rl = l;
2858
2859 count++;
2860 if (count > 50) {
2861 GROW;
2862 count = 0;
2863 }
2864 NEXTL(l);
2865 cur = CUR_CHAR(l);
2866 if (cur == 0) {
2867 SHRINK;
2868 GROW;
2869 cur = CUR_CHAR(l);
2870 }
2871 }
2872 buf[len] = 0;
2873 if (!IS_CHAR(cur)) {
2874 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2876 ctxt->sax->error(ctxt->userData,
2877 "Comment not terminated \n<!--%.50s\n", buf);
2878 ctxt->wellFormed = 0;
2879 ctxt->disableSAX = 1;
2880 xmlFree(buf);
2881 } else {
2882 if (input != ctxt->input) {
2883 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2884 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2885 ctxt->sax->error(ctxt->userData,
2886"Comment doesn't start and stop in the same entity\n");
2887 ctxt->wellFormed = 0;
2888 ctxt->disableSAX = 1;
2889 }
2890 NEXT;
2891 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2892 (!ctxt->disableSAX))
2893 ctxt->sax->comment(ctxt->userData, buf);
2894 xmlFree(buf);
2895 }
2896 ctxt->instate = state;
2897}
2898
2899/**
2900 * xmlParsePITarget:
2901 * @ctxt: an XML parser context
2902 *
2903 * parse the name of a PI
2904 *
2905 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2906 *
2907 * Returns the PITarget name or NULL
2908 */
2909
2910xmlChar *
2911xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2912 xmlChar *name;
2913
2914 name = xmlParseName(ctxt);
2915 if ((name != NULL) &&
2916 ((name[0] == 'x') || (name[0] == 'X')) &&
2917 ((name[1] == 'm') || (name[1] == 'M')) &&
2918 ((name[2] == 'l') || (name[2] == 'L'))) {
2919 int i;
2920 if ((name[0] == 'x') && (name[1] == 'm') &&
2921 (name[2] == 'l') && (name[3] == 0)) {
2922 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2924 ctxt->sax->error(ctxt->userData,
2925 "XML declaration allowed only at the start of the document\n");
2926 ctxt->wellFormed = 0;
2927 ctxt->disableSAX = 1;
2928 return(name);
2929 } else if (name[3] == 0) {
2930 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2932 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2933 ctxt->wellFormed = 0;
2934 ctxt->disableSAX = 1;
2935 return(name);
2936 }
2937 for (i = 0;;i++) {
2938 if (xmlW3CPIs[i] == NULL) break;
2939 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2940 return(name);
2941 }
2942 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2943 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2944 ctxt->sax->warning(ctxt->userData,
2945 "xmlParsePItarget: invalid name prefix 'xml'\n");
2946 }
2947 }
2948 return(name);
2949}
2950
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002951#ifdef LIBXML_CATALOG_ENABLED
2952/**
2953 * xmlParseCatalogPI:
2954 * @ctxt: an XML parser context
2955 * @catalog: the PI value string
2956 *
2957 * parse an XML Catalog Processing Instruction.
2958 *
2959 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
2960 *
2961 * Occurs only if allowed by the user and if happening in the Misc
2962 * part of the document before any doctype informations
2963 * This will add the given catalog to the parsing context in order
2964 * to be used if there is a resolution need further down in the document
2965 */
2966
2967static void
2968xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
2969 xmlChar *URL = NULL;
2970 const xmlChar *tmp, *base;
2971 xmlChar marker;
2972
2973 tmp = catalog;
2974 while (IS_BLANK(*tmp)) tmp++;
2975 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
2976 goto error;
2977 tmp += 7;
2978 while (IS_BLANK(*tmp)) tmp++;
2979 if (*tmp != '=') {
2980 return;
2981 }
2982 tmp++;
2983 while (IS_BLANK(*tmp)) tmp++;
2984 marker = *tmp;
2985 if ((marker != '\'') && (marker != '"'))
2986 goto error;
2987 tmp++;
2988 base = tmp;
2989 while ((*tmp != 0) && (*tmp != marker)) tmp++;
2990 if (*tmp == 0)
2991 goto error;
2992 URL = xmlStrndup(base, tmp - base);
2993 tmp++;
2994 while (IS_BLANK(*tmp)) tmp++;
2995 if (*tmp != 0)
2996 goto error;
2997
2998 if (URL != NULL) {
2999 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3000 xmlFree(URL);
3001 }
3002 return;
3003
3004error:
3005 ctxt->errNo = XML_WAR_CATALOG_PI;
3006 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3007 ctxt->sax->warning(ctxt->userData,
3008 "Catalog PI syntax error: %s\n", catalog);
3009 if (URL != NULL)
3010 xmlFree(URL);
3011}
3012#endif
3013
Owen Taylor3473f882001-02-23 17:55:21 +00003014/**
3015 * xmlParsePI:
3016 * @ctxt: an XML parser context
3017 *
3018 * parse an XML Processing Instruction.
3019 *
3020 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3021 *
3022 * The processing is transfered to SAX once parsed.
3023 */
3024
3025void
3026xmlParsePI(xmlParserCtxtPtr ctxt) {
3027 xmlChar *buf = NULL;
3028 int len = 0;
3029 int size = XML_PARSER_BUFFER_SIZE;
3030 int cur, l;
3031 xmlChar *target;
3032 xmlParserInputState state;
3033 int count = 0;
3034
3035 if ((RAW == '<') && (NXT(1) == '?')) {
3036 xmlParserInputPtr input = ctxt->input;
3037 state = ctxt->instate;
3038 ctxt->instate = XML_PARSER_PI;
3039 /*
3040 * this is a Processing Instruction.
3041 */
3042 SKIP(2);
3043 SHRINK;
3044
3045 /*
3046 * Parse the target name and check for special support like
3047 * namespace.
3048 */
3049 target = xmlParsePITarget(ctxt);
3050 if (target != NULL) {
3051 if ((RAW == '?') && (NXT(1) == '>')) {
3052 if (input != ctxt->input) {
3053 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3055 ctxt->sax->error(ctxt->userData,
3056 "PI declaration doesn't start and stop in the same entity\n");
3057 ctxt->wellFormed = 0;
3058 ctxt->disableSAX = 1;
3059 }
3060 SKIP(2);
3061
3062 /*
3063 * SAX: PI detected.
3064 */
3065 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3066 (ctxt->sax->processingInstruction != NULL))
3067 ctxt->sax->processingInstruction(ctxt->userData,
3068 target, NULL);
3069 ctxt->instate = state;
3070 xmlFree(target);
3071 return;
3072 }
3073 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3074 if (buf == NULL) {
3075 xmlGenericError(xmlGenericErrorContext,
3076 "malloc of %d byte failed\n", size);
3077 ctxt->instate = state;
3078 return;
3079 }
3080 cur = CUR;
3081 if (!IS_BLANK(cur)) {
3082 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3084 ctxt->sax->error(ctxt->userData,
3085 "xmlParsePI: PI %s space expected\n", target);
3086 ctxt->wellFormed = 0;
3087 ctxt->disableSAX = 1;
3088 }
3089 SKIP_BLANKS;
3090 cur = CUR_CHAR(l);
3091 while (IS_CHAR(cur) && /* checked */
3092 ((cur != '?') || (NXT(1) != '>'))) {
3093 if (len + 5 >= size) {
3094 size *= 2;
3095 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3096 if (buf == NULL) {
3097 xmlGenericError(xmlGenericErrorContext,
3098 "realloc of %d byte failed\n", size);
3099 ctxt->instate = state;
3100 return;
3101 }
3102 }
3103 count++;
3104 if (count > 50) {
3105 GROW;
3106 count = 0;
3107 }
3108 COPY_BUF(l,buf,len,cur);
3109 NEXTL(l);
3110 cur = CUR_CHAR(l);
3111 if (cur == 0) {
3112 SHRINK;
3113 GROW;
3114 cur = CUR_CHAR(l);
3115 }
3116 }
3117 buf[len] = 0;
3118 if (cur != '?') {
3119 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3120 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3121 ctxt->sax->error(ctxt->userData,
3122 "xmlParsePI: PI %s never end ...\n", target);
3123 ctxt->wellFormed = 0;
3124 ctxt->disableSAX = 1;
3125 } else {
3126 if (input != ctxt->input) {
3127 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3129 ctxt->sax->error(ctxt->userData,
3130 "PI declaration doesn't start and stop in the same entity\n");
3131 ctxt->wellFormed = 0;
3132 ctxt->disableSAX = 1;
3133 }
3134 SKIP(2);
3135
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003136#ifdef LIBXML_CATALOG_ENABLED
3137 if (((state == XML_PARSER_MISC) ||
3138 (state == XML_PARSER_START)) &&
3139 (xmlStrEqual(target, XML_CATALOG_PI))) {
3140 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3141 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3142 (allow == XML_CATA_ALLOW_ALL))
3143 xmlParseCatalogPI(ctxt, buf);
3144 }
3145#endif
3146
3147
Owen Taylor3473f882001-02-23 17:55:21 +00003148 /*
3149 * SAX: PI detected.
3150 */
3151 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3152 (ctxt->sax->processingInstruction != NULL))
3153 ctxt->sax->processingInstruction(ctxt->userData,
3154 target, buf);
3155 }
3156 xmlFree(buf);
3157 xmlFree(target);
3158 } else {
3159 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3161 ctxt->sax->error(ctxt->userData,
3162 "xmlParsePI : no target name\n");
3163 ctxt->wellFormed = 0;
3164 ctxt->disableSAX = 1;
3165 }
3166 ctxt->instate = state;
3167 }
3168}
3169
3170/**
3171 * xmlParseNotationDecl:
3172 * @ctxt: an XML parser context
3173 *
3174 * parse a notation declaration
3175 *
3176 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3177 *
3178 * Hence there is actually 3 choices:
3179 * 'PUBLIC' S PubidLiteral
3180 * 'PUBLIC' S PubidLiteral S SystemLiteral
3181 * and 'SYSTEM' S SystemLiteral
3182 *
3183 * See the NOTE on xmlParseExternalID().
3184 */
3185
3186void
3187xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3188 xmlChar *name;
3189 xmlChar *Pubid;
3190 xmlChar *Systemid;
3191
3192 if ((RAW == '<') && (NXT(1) == '!') &&
3193 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3194 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3195 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3196 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3197 xmlParserInputPtr input = ctxt->input;
3198 SHRINK;
3199 SKIP(10);
3200 if (!IS_BLANK(CUR)) {
3201 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3203 ctxt->sax->error(ctxt->userData,
3204 "Space required after '<!NOTATION'\n");
3205 ctxt->wellFormed = 0;
3206 ctxt->disableSAX = 1;
3207 return;
3208 }
3209 SKIP_BLANKS;
3210
Daniel Veillard76d66f42001-05-16 21:05:17 +00003211 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003212 if (name == NULL) {
3213 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3215 ctxt->sax->error(ctxt->userData,
3216 "NOTATION: Name expected here\n");
3217 ctxt->wellFormed = 0;
3218 ctxt->disableSAX = 1;
3219 return;
3220 }
3221 if (!IS_BLANK(CUR)) {
3222 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3224 ctxt->sax->error(ctxt->userData,
3225 "Space required after the NOTATION name'\n");
3226 ctxt->wellFormed = 0;
3227 ctxt->disableSAX = 1;
3228 return;
3229 }
3230 SKIP_BLANKS;
3231
3232 /*
3233 * Parse the IDs.
3234 */
3235 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3236 SKIP_BLANKS;
3237
3238 if (RAW == '>') {
3239 if (input != ctxt->input) {
3240 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3242 ctxt->sax->error(ctxt->userData,
3243"Notation declaration doesn't start and stop in the same entity\n");
3244 ctxt->wellFormed = 0;
3245 ctxt->disableSAX = 1;
3246 }
3247 NEXT;
3248 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3249 (ctxt->sax->notationDecl != NULL))
3250 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3251 } else {
3252 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3254 ctxt->sax->error(ctxt->userData,
3255 "'>' required to close NOTATION declaration\n");
3256 ctxt->wellFormed = 0;
3257 ctxt->disableSAX = 1;
3258 }
3259 xmlFree(name);
3260 if (Systemid != NULL) xmlFree(Systemid);
3261 if (Pubid != NULL) xmlFree(Pubid);
3262 }
3263}
3264
3265/**
3266 * xmlParseEntityDecl:
3267 * @ctxt: an XML parser context
3268 *
3269 * parse <!ENTITY declarations
3270 *
3271 * [70] EntityDecl ::= GEDecl | PEDecl
3272 *
3273 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3274 *
3275 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3276 *
3277 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3278 *
3279 * [74] PEDef ::= EntityValue | ExternalID
3280 *
3281 * [76] NDataDecl ::= S 'NDATA' S Name
3282 *
3283 * [ VC: Notation Declared ]
3284 * The Name must match the declared name of a notation.
3285 */
3286
3287void
3288xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3289 xmlChar *name = NULL;
3290 xmlChar *value = NULL;
3291 xmlChar *URI = NULL, *literal = NULL;
3292 xmlChar *ndata = NULL;
3293 int isParameter = 0;
3294 xmlChar *orig = NULL;
3295
3296 GROW;
3297 if ((RAW == '<') && (NXT(1) == '!') &&
3298 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3299 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3300 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3301 xmlParserInputPtr input = ctxt->input;
3302 ctxt->instate = XML_PARSER_ENTITY_DECL;
3303 SHRINK;
3304 SKIP(8);
3305 if (!IS_BLANK(CUR)) {
3306 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3307 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3308 ctxt->sax->error(ctxt->userData,
3309 "Space required after '<!ENTITY'\n");
3310 ctxt->wellFormed = 0;
3311 ctxt->disableSAX = 1;
3312 }
3313 SKIP_BLANKS;
3314
3315 if (RAW == '%') {
3316 NEXT;
3317 if (!IS_BLANK(CUR)) {
3318 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3320 ctxt->sax->error(ctxt->userData,
3321 "Space required after '%'\n");
3322 ctxt->wellFormed = 0;
3323 ctxt->disableSAX = 1;
3324 }
3325 SKIP_BLANKS;
3326 isParameter = 1;
3327 }
3328
Daniel Veillard76d66f42001-05-16 21:05:17 +00003329 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003330 if (name == NULL) {
3331 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3333 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3334 ctxt->wellFormed = 0;
3335 ctxt->disableSAX = 1;
3336 return;
3337 }
3338 if (!IS_BLANK(CUR)) {
3339 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3341 ctxt->sax->error(ctxt->userData,
3342 "Space required after the entity name\n");
3343 ctxt->wellFormed = 0;
3344 ctxt->disableSAX = 1;
3345 }
3346 SKIP_BLANKS;
3347
3348 /*
3349 * handle the various case of definitions...
3350 */
3351 if (isParameter) {
3352 if ((RAW == '"') || (RAW == '\'')) {
3353 value = xmlParseEntityValue(ctxt, &orig);
3354 if (value) {
3355 if ((ctxt->sax != NULL) &&
3356 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3357 ctxt->sax->entityDecl(ctxt->userData, name,
3358 XML_INTERNAL_PARAMETER_ENTITY,
3359 NULL, NULL, value);
3360 }
3361 } else {
3362 URI = xmlParseExternalID(ctxt, &literal, 1);
3363 if ((URI == NULL) && (literal == NULL)) {
3364 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3366 ctxt->sax->error(ctxt->userData,
3367 "Entity value required\n");
3368 ctxt->wellFormed = 0;
3369 ctxt->disableSAX = 1;
3370 }
3371 if (URI) {
3372 xmlURIPtr uri;
3373
3374 uri = xmlParseURI((const char *) URI);
3375 if (uri == NULL) {
3376 ctxt->errNo = XML_ERR_INVALID_URI;
3377 if ((ctxt->sax != NULL) &&
3378 (!ctxt->disableSAX) &&
3379 (ctxt->sax->error != NULL))
3380 ctxt->sax->error(ctxt->userData,
3381 "Invalid URI: %s\n", URI);
3382 ctxt->wellFormed = 0;
3383 } else {
3384 if (uri->fragment != NULL) {
3385 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3386 if ((ctxt->sax != NULL) &&
3387 (!ctxt->disableSAX) &&
3388 (ctxt->sax->error != NULL))
3389 ctxt->sax->error(ctxt->userData,
3390 "Fragment not allowed: %s\n", URI);
3391 ctxt->wellFormed = 0;
3392 } else {
3393 if ((ctxt->sax != NULL) &&
3394 (!ctxt->disableSAX) &&
3395 (ctxt->sax->entityDecl != NULL))
3396 ctxt->sax->entityDecl(ctxt->userData, name,
3397 XML_EXTERNAL_PARAMETER_ENTITY,
3398 literal, URI, NULL);
3399 }
3400 xmlFreeURI(uri);
3401 }
3402 }
3403 }
3404 } else {
3405 if ((RAW == '"') || (RAW == '\'')) {
3406 value = xmlParseEntityValue(ctxt, &orig);
3407 if ((ctxt->sax != NULL) &&
3408 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3409 ctxt->sax->entityDecl(ctxt->userData, name,
3410 XML_INTERNAL_GENERAL_ENTITY,
3411 NULL, NULL, value);
3412 } else {
3413 URI = xmlParseExternalID(ctxt, &literal, 1);
3414 if ((URI == NULL) && (literal == NULL)) {
3415 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3417 ctxt->sax->error(ctxt->userData,
3418 "Entity value required\n");
3419 ctxt->wellFormed = 0;
3420 ctxt->disableSAX = 1;
3421 }
3422 if (URI) {
3423 xmlURIPtr uri;
3424
3425 uri = xmlParseURI((const char *)URI);
3426 if (uri == NULL) {
3427 ctxt->errNo = XML_ERR_INVALID_URI;
3428 if ((ctxt->sax != NULL) &&
3429 (!ctxt->disableSAX) &&
3430 (ctxt->sax->error != NULL))
3431 ctxt->sax->error(ctxt->userData,
3432 "Invalid URI: %s\n", URI);
3433 ctxt->wellFormed = 0;
3434 } else {
3435 if (uri->fragment != NULL) {
3436 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3437 if ((ctxt->sax != NULL) &&
3438 (!ctxt->disableSAX) &&
3439 (ctxt->sax->error != NULL))
3440 ctxt->sax->error(ctxt->userData,
3441 "Fragment not allowed: %s\n", URI);
3442 ctxt->wellFormed = 0;
3443 }
3444 xmlFreeURI(uri);
3445 }
3446 }
3447 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3448 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3450 ctxt->sax->error(ctxt->userData,
3451 "Space required before 'NDATA'\n");
3452 ctxt->wellFormed = 0;
3453 ctxt->disableSAX = 1;
3454 }
3455 SKIP_BLANKS;
3456 if ((RAW == 'N') && (NXT(1) == 'D') &&
3457 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3458 (NXT(4) == 'A')) {
3459 SKIP(5);
3460 if (!IS_BLANK(CUR)) {
3461 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3463 ctxt->sax->error(ctxt->userData,
3464 "Space required after 'NDATA'\n");
3465 ctxt->wellFormed = 0;
3466 ctxt->disableSAX = 1;
3467 }
3468 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003469 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003470 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3471 (ctxt->sax->unparsedEntityDecl != NULL))
3472 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3473 literal, URI, ndata);
3474 } else {
3475 if ((ctxt->sax != NULL) &&
3476 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3477 ctxt->sax->entityDecl(ctxt->userData, name,
3478 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3479 literal, URI, NULL);
3480 }
3481 }
3482 }
3483 SKIP_BLANKS;
3484 if (RAW != '>') {
3485 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3487 ctxt->sax->error(ctxt->userData,
3488 "xmlParseEntityDecl: entity %s not terminated\n", name);
3489 ctxt->wellFormed = 0;
3490 ctxt->disableSAX = 1;
3491 } else {
3492 if (input != ctxt->input) {
3493 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3495 ctxt->sax->error(ctxt->userData,
3496"Entity declaration doesn't start and stop in the same entity\n");
3497 ctxt->wellFormed = 0;
3498 ctxt->disableSAX = 1;
3499 }
3500 NEXT;
3501 }
3502 if (orig != NULL) {
3503 /*
3504 * Ugly mechanism to save the raw entity value.
3505 */
3506 xmlEntityPtr cur = NULL;
3507
3508 if (isParameter) {
3509 if ((ctxt->sax != NULL) &&
3510 (ctxt->sax->getParameterEntity != NULL))
3511 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3512 } else {
3513 if ((ctxt->sax != NULL) &&
3514 (ctxt->sax->getEntity != NULL))
3515 cur = ctxt->sax->getEntity(ctxt->userData, name);
3516 }
3517 if (cur != NULL) {
3518 if (cur->orig != NULL)
3519 xmlFree(orig);
3520 else
3521 cur->orig = orig;
3522 } else
3523 xmlFree(orig);
3524 }
3525 if (name != NULL) xmlFree(name);
3526 if (value != NULL) xmlFree(value);
3527 if (URI != NULL) xmlFree(URI);
3528 if (literal != NULL) xmlFree(literal);
3529 if (ndata != NULL) xmlFree(ndata);
3530 }
3531}
3532
3533/**
3534 * xmlParseDefaultDecl:
3535 * @ctxt: an XML parser context
3536 * @value: Receive a possible fixed default value for the attribute
3537 *
3538 * Parse an attribute default declaration
3539 *
3540 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3541 *
3542 * [ VC: Required Attribute ]
3543 * if the default declaration is the keyword #REQUIRED, then the
3544 * attribute must be specified for all elements of the type in the
3545 * attribute-list declaration.
3546 *
3547 * [ VC: Attribute Default Legal ]
3548 * The declared default value must meet the lexical constraints of
3549 * the declared attribute type c.f. xmlValidateAttributeDecl()
3550 *
3551 * [ VC: Fixed Attribute Default ]
3552 * if an attribute has a default value declared with the #FIXED
3553 * keyword, instances of that attribute must match the default value.
3554 *
3555 * [ WFC: No < in Attribute Values ]
3556 * handled in xmlParseAttValue()
3557 *
3558 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3559 * or XML_ATTRIBUTE_FIXED.
3560 */
3561
3562int
3563xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3564 int val;
3565 xmlChar *ret;
3566
3567 *value = NULL;
3568 if ((RAW == '#') && (NXT(1) == 'R') &&
3569 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3570 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3571 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3572 (NXT(8) == 'D')) {
3573 SKIP(9);
3574 return(XML_ATTRIBUTE_REQUIRED);
3575 }
3576 if ((RAW == '#') && (NXT(1) == 'I') &&
3577 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3578 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3579 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3580 SKIP(8);
3581 return(XML_ATTRIBUTE_IMPLIED);
3582 }
3583 val = XML_ATTRIBUTE_NONE;
3584 if ((RAW == '#') && (NXT(1) == 'F') &&
3585 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3586 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3587 SKIP(6);
3588 val = XML_ATTRIBUTE_FIXED;
3589 if (!IS_BLANK(CUR)) {
3590 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3592 ctxt->sax->error(ctxt->userData,
3593 "Space required after '#FIXED'\n");
3594 ctxt->wellFormed = 0;
3595 ctxt->disableSAX = 1;
3596 }
3597 SKIP_BLANKS;
3598 }
3599 ret = xmlParseAttValue(ctxt);
3600 ctxt->instate = XML_PARSER_DTD;
3601 if (ret == NULL) {
3602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3603 ctxt->sax->error(ctxt->userData,
3604 "Attribute default value declaration error\n");
3605 ctxt->wellFormed = 0;
3606 ctxt->disableSAX = 1;
3607 } else
3608 *value = ret;
3609 return(val);
3610}
3611
3612/**
3613 * xmlParseNotationType:
3614 * @ctxt: an XML parser context
3615 *
3616 * parse an Notation attribute type.
3617 *
3618 * Note: the leading 'NOTATION' S part has already being parsed...
3619 *
3620 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3621 *
3622 * [ VC: Notation Attributes ]
3623 * Values of this type must match one of the notation names included
3624 * in the declaration; all notation names in the declaration must be declared.
3625 *
3626 * Returns: the notation attribute tree built while parsing
3627 */
3628
3629xmlEnumerationPtr
3630xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3631 xmlChar *name;
3632 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3633
3634 if (RAW != '(') {
3635 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3637 ctxt->sax->error(ctxt->userData,
3638 "'(' required to start 'NOTATION'\n");
3639 ctxt->wellFormed = 0;
3640 ctxt->disableSAX = 1;
3641 return(NULL);
3642 }
3643 SHRINK;
3644 do {
3645 NEXT;
3646 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003647 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003648 if (name == NULL) {
3649 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3651 ctxt->sax->error(ctxt->userData,
3652 "Name expected in NOTATION declaration\n");
3653 ctxt->wellFormed = 0;
3654 ctxt->disableSAX = 1;
3655 return(ret);
3656 }
3657 cur = xmlCreateEnumeration(name);
3658 xmlFree(name);
3659 if (cur == NULL) return(ret);
3660 if (last == NULL) ret = last = cur;
3661 else {
3662 last->next = cur;
3663 last = cur;
3664 }
3665 SKIP_BLANKS;
3666 } while (RAW == '|');
3667 if (RAW != ')') {
3668 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3670 ctxt->sax->error(ctxt->userData,
3671 "')' required to finish NOTATION declaration\n");
3672 ctxt->wellFormed = 0;
3673 ctxt->disableSAX = 1;
3674 if ((last != NULL) && (last != ret))
3675 xmlFreeEnumeration(last);
3676 return(ret);
3677 }
3678 NEXT;
3679 return(ret);
3680}
3681
3682/**
3683 * xmlParseEnumerationType:
3684 * @ctxt: an XML parser context
3685 *
3686 * parse an Enumeration attribute type.
3687 *
3688 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3689 *
3690 * [ VC: Enumeration ]
3691 * Values of this type must match one of the Nmtoken tokens in
3692 * the declaration
3693 *
3694 * Returns: the enumeration attribute tree built while parsing
3695 */
3696
3697xmlEnumerationPtr
3698xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3699 xmlChar *name;
3700 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3701
3702 if (RAW != '(') {
3703 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3705 ctxt->sax->error(ctxt->userData,
3706 "'(' required to start ATTLIST enumeration\n");
3707 ctxt->wellFormed = 0;
3708 ctxt->disableSAX = 1;
3709 return(NULL);
3710 }
3711 SHRINK;
3712 do {
3713 NEXT;
3714 SKIP_BLANKS;
3715 name = xmlParseNmtoken(ctxt);
3716 if (name == NULL) {
3717 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3718 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3719 ctxt->sax->error(ctxt->userData,
3720 "NmToken expected in ATTLIST enumeration\n");
3721 ctxt->wellFormed = 0;
3722 ctxt->disableSAX = 1;
3723 return(ret);
3724 }
3725 cur = xmlCreateEnumeration(name);
3726 xmlFree(name);
3727 if (cur == NULL) return(ret);
3728 if (last == NULL) ret = last = cur;
3729 else {
3730 last->next = cur;
3731 last = cur;
3732 }
3733 SKIP_BLANKS;
3734 } while (RAW == '|');
3735 if (RAW != ')') {
3736 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3738 ctxt->sax->error(ctxt->userData,
3739 "')' required to finish ATTLIST enumeration\n");
3740 ctxt->wellFormed = 0;
3741 ctxt->disableSAX = 1;
3742 return(ret);
3743 }
3744 NEXT;
3745 return(ret);
3746}
3747
3748/**
3749 * xmlParseEnumeratedType:
3750 * @ctxt: an XML parser context
3751 * @tree: the enumeration tree built while parsing
3752 *
3753 * parse an Enumerated attribute type.
3754 *
3755 * [57] EnumeratedType ::= NotationType | Enumeration
3756 *
3757 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3758 *
3759 *
3760 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3761 */
3762
3763int
3764xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3765 if ((RAW == 'N') && (NXT(1) == 'O') &&
3766 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3767 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3768 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3769 SKIP(8);
3770 if (!IS_BLANK(CUR)) {
3771 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3773 ctxt->sax->error(ctxt->userData,
3774 "Space required after 'NOTATION'\n");
3775 ctxt->wellFormed = 0;
3776 ctxt->disableSAX = 1;
3777 return(0);
3778 }
3779 SKIP_BLANKS;
3780 *tree = xmlParseNotationType(ctxt);
3781 if (*tree == NULL) return(0);
3782 return(XML_ATTRIBUTE_NOTATION);
3783 }
3784 *tree = xmlParseEnumerationType(ctxt);
3785 if (*tree == NULL) return(0);
3786 return(XML_ATTRIBUTE_ENUMERATION);
3787}
3788
3789/**
3790 * xmlParseAttributeType:
3791 * @ctxt: an XML parser context
3792 * @tree: the enumeration tree built while parsing
3793 *
3794 * parse the Attribute list def for an element
3795 *
3796 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3797 *
3798 * [55] StringType ::= 'CDATA'
3799 *
3800 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3801 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3802 *
3803 * Validity constraints for attribute values syntax are checked in
3804 * xmlValidateAttributeValue()
3805 *
3806 * [ VC: ID ]
3807 * Values of type ID must match the Name production. A name must not
3808 * appear more than once in an XML document as a value of this type;
3809 * i.e., ID values must uniquely identify the elements which bear them.
3810 *
3811 * [ VC: One ID per Element Type ]
3812 * No element type may have more than one ID attribute specified.
3813 *
3814 * [ VC: ID Attribute Default ]
3815 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3816 *
3817 * [ VC: IDREF ]
3818 * Values of type IDREF must match the Name production, and values
3819 * of type IDREFS must match Names; each IDREF Name must match the value
3820 * of an ID attribute on some element in the XML document; i.e. IDREF
3821 * values must match the value of some ID attribute.
3822 *
3823 * [ VC: Entity Name ]
3824 * Values of type ENTITY must match the Name production, values
3825 * of type ENTITIES must match Names; each Entity Name must match the
3826 * name of an unparsed entity declared in the DTD.
3827 *
3828 * [ VC: Name Token ]
3829 * Values of type NMTOKEN must match the Nmtoken production; values
3830 * of type NMTOKENS must match Nmtokens.
3831 *
3832 * Returns the attribute type
3833 */
3834int
3835xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3836 SHRINK;
3837 if ((RAW == 'C') && (NXT(1) == 'D') &&
3838 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3839 (NXT(4) == 'A')) {
3840 SKIP(5);
3841 return(XML_ATTRIBUTE_CDATA);
3842 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3843 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3844 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3845 SKIP(6);
3846 return(XML_ATTRIBUTE_IDREFS);
3847 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3848 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3849 (NXT(4) == 'F')) {
3850 SKIP(5);
3851 return(XML_ATTRIBUTE_IDREF);
3852 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3853 SKIP(2);
3854 return(XML_ATTRIBUTE_ID);
3855 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3856 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3857 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3858 SKIP(6);
3859 return(XML_ATTRIBUTE_ENTITY);
3860 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3861 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3862 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3863 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3864 SKIP(8);
3865 return(XML_ATTRIBUTE_ENTITIES);
3866 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3867 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3868 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3869 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3870 SKIP(8);
3871 return(XML_ATTRIBUTE_NMTOKENS);
3872 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3873 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3874 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3875 (NXT(6) == 'N')) {
3876 SKIP(7);
3877 return(XML_ATTRIBUTE_NMTOKEN);
3878 }
3879 return(xmlParseEnumeratedType(ctxt, tree));
3880}
3881
3882/**
3883 * xmlParseAttributeListDecl:
3884 * @ctxt: an XML parser context
3885 *
3886 * : parse the Attribute list def for an element
3887 *
3888 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3889 *
3890 * [53] AttDef ::= S Name S AttType S DefaultDecl
3891 *
3892 */
3893void
3894xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3895 xmlChar *elemName;
3896 xmlChar *attrName;
3897 xmlEnumerationPtr tree;
3898
3899 if ((RAW == '<') && (NXT(1) == '!') &&
3900 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3901 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3902 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3903 (NXT(8) == 'T')) {
3904 xmlParserInputPtr input = ctxt->input;
3905
3906 SKIP(9);
3907 if (!IS_BLANK(CUR)) {
3908 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3910 ctxt->sax->error(ctxt->userData,
3911 "Space required after '<!ATTLIST'\n");
3912 ctxt->wellFormed = 0;
3913 ctxt->disableSAX = 1;
3914 }
3915 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003916 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003917 if (elemName == NULL) {
3918 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3920 ctxt->sax->error(ctxt->userData,
3921 "ATTLIST: no name for Element\n");
3922 ctxt->wellFormed = 0;
3923 ctxt->disableSAX = 1;
3924 return;
3925 }
3926 SKIP_BLANKS;
3927 GROW;
3928 while (RAW != '>') {
3929 const xmlChar *check = CUR_PTR;
3930 int type;
3931 int def;
3932 xmlChar *defaultValue = NULL;
3933
3934 GROW;
3935 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003936 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003937 if (attrName == NULL) {
3938 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3940 ctxt->sax->error(ctxt->userData,
3941 "ATTLIST: no name for Attribute\n");
3942 ctxt->wellFormed = 0;
3943 ctxt->disableSAX = 1;
3944 break;
3945 }
3946 GROW;
3947 if (!IS_BLANK(CUR)) {
3948 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3950 ctxt->sax->error(ctxt->userData,
3951 "Space required after the attribute name\n");
3952 ctxt->wellFormed = 0;
3953 ctxt->disableSAX = 1;
3954 if (attrName != NULL)
3955 xmlFree(attrName);
3956 if (defaultValue != NULL)
3957 xmlFree(defaultValue);
3958 break;
3959 }
3960 SKIP_BLANKS;
3961
3962 type = xmlParseAttributeType(ctxt, &tree);
3963 if (type <= 0) {
3964 if (attrName != NULL)
3965 xmlFree(attrName);
3966 if (defaultValue != NULL)
3967 xmlFree(defaultValue);
3968 break;
3969 }
3970
3971 GROW;
3972 if (!IS_BLANK(CUR)) {
3973 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3975 ctxt->sax->error(ctxt->userData,
3976 "Space required after the attribute type\n");
3977 ctxt->wellFormed = 0;
3978 ctxt->disableSAX = 1;
3979 if (attrName != NULL)
3980 xmlFree(attrName);
3981 if (defaultValue != NULL)
3982 xmlFree(defaultValue);
3983 if (tree != NULL)
3984 xmlFreeEnumeration(tree);
3985 break;
3986 }
3987 SKIP_BLANKS;
3988
3989 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3990 if (def <= 0) {
3991 if (attrName != NULL)
3992 xmlFree(attrName);
3993 if (defaultValue != NULL)
3994 xmlFree(defaultValue);
3995 if (tree != NULL)
3996 xmlFreeEnumeration(tree);
3997 break;
3998 }
3999
4000 GROW;
4001 if (RAW != '>') {
4002 if (!IS_BLANK(CUR)) {
4003 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4005 ctxt->sax->error(ctxt->userData,
4006 "Space required after the attribute default value\n");
4007 ctxt->wellFormed = 0;
4008 ctxt->disableSAX = 1;
4009 if (attrName != NULL)
4010 xmlFree(attrName);
4011 if (defaultValue != NULL)
4012 xmlFree(defaultValue);
4013 if (tree != NULL)
4014 xmlFreeEnumeration(tree);
4015 break;
4016 }
4017 SKIP_BLANKS;
4018 }
4019 if (check == CUR_PTR) {
4020 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4022 ctxt->sax->error(ctxt->userData,
4023 "xmlParseAttributeListDecl: detected internal error\n");
4024 if (attrName != NULL)
4025 xmlFree(attrName);
4026 if (defaultValue != NULL)
4027 xmlFree(defaultValue);
4028 if (tree != NULL)
4029 xmlFreeEnumeration(tree);
4030 break;
4031 }
4032 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4033 (ctxt->sax->attributeDecl != NULL))
4034 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4035 type, def, defaultValue, tree);
4036 if (attrName != NULL)
4037 xmlFree(attrName);
4038 if (defaultValue != NULL)
4039 xmlFree(defaultValue);
4040 GROW;
4041 }
4042 if (RAW == '>') {
4043 if (input != ctxt->input) {
4044 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4046 ctxt->sax->error(ctxt->userData,
4047"Attribute list declaration doesn't start and stop in the same entity\n");
4048 ctxt->wellFormed = 0;
4049 ctxt->disableSAX = 1;
4050 }
4051 NEXT;
4052 }
4053
4054 xmlFree(elemName);
4055 }
4056}
4057
4058/**
4059 * xmlParseElementMixedContentDecl:
4060 * @ctxt: an XML parser context
4061 *
4062 * parse the declaration for a Mixed Element content
4063 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4064 *
4065 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4066 * '(' S? '#PCDATA' S? ')'
4067 *
4068 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4069 *
4070 * [ VC: No Duplicate Types ]
4071 * The same name must not appear more than once in a single
4072 * mixed-content declaration.
4073 *
4074 * returns: the list of the xmlElementContentPtr describing the element choices
4075 */
4076xmlElementContentPtr
4077xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
4078 xmlElementContentPtr ret = NULL, cur = NULL, n;
4079 xmlChar *elem = NULL;
4080
4081 GROW;
4082 if ((RAW == '#') && (NXT(1) == 'P') &&
4083 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4084 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4085 (NXT(6) == 'A')) {
4086 SKIP(7);
4087 SKIP_BLANKS;
4088 SHRINK;
4089 if (RAW == ')') {
4090 ctxt->entity = ctxt->input;
4091 NEXT;
4092 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4093 if (RAW == '*') {
4094 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4095 NEXT;
4096 }
4097 return(ret);
4098 }
4099 if ((RAW == '(') || (RAW == '|')) {
4100 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4101 if (ret == NULL) return(NULL);
4102 }
4103 while (RAW == '|') {
4104 NEXT;
4105 if (elem == NULL) {
4106 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4107 if (ret == NULL) return(NULL);
4108 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004109 if (cur != NULL)
4110 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004111 cur = ret;
4112 } else {
4113 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4114 if (n == NULL) return(NULL);
4115 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004116 if (n->c1 != NULL)
4117 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004118 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004119 if (n != NULL)
4120 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004121 cur = n;
4122 xmlFree(elem);
4123 }
4124 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004125 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004126 if (elem == NULL) {
4127 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4129 ctxt->sax->error(ctxt->userData,
4130 "xmlParseElementMixedContentDecl : Name expected\n");
4131 ctxt->wellFormed = 0;
4132 ctxt->disableSAX = 1;
4133 xmlFreeElementContent(cur);
4134 return(NULL);
4135 }
4136 SKIP_BLANKS;
4137 GROW;
4138 }
4139 if ((RAW == ')') && (NXT(1) == '*')) {
4140 if (elem != NULL) {
4141 cur->c2 = xmlNewElementContent(elem,
4142 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004143 if (cur->c2 != NULL)
4144 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004145 xmlFree(elem);
4146 }
4147 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4148 ctxt->entity = ctxt->input;
4149 SKIP(2);
4150 } else {
4151 if (elem != NULL) xmlFree(elem);
4152 xmlFreeElementContent(ret);
4153 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4154 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4155 ctxt->sax->error(ctxt->userData,
4156 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4157 ctxt->wellFormed = 0;
4158 ctxt->disableSAX = 1;
4159 return(NULL);
4160 }
4161
4162 } else {
4163 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4164 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4165 ctxt->sax->error(ctxt->userData,
4166 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4167 ctxt->wellFormed = 0;
4168 ctxt->disableSAX = 1;
4169 }
4170 return(ret);
4171}
4172
4173/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004174 * xmlParseElementChildrenContentD:
4175 * @ctxt: an XML parser context
4176 *
4177 * VMS version of xmlParseElementChildrenContentDecl()
4178 *
4179 * Returns the tree of xmlElementContentPtr describing the element
4180 * hierarchy.
4181 */
4182/**
Owen Taylor3473f882001-02-23 17:55:21 +00004183 * xmlParseElementChildrenContentDecl:
4184 * @ctxt: an XML parser context
4185 *
4186 * parse the declaration for a Mixed Element content
4187 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4188 *
4189 *
4190 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4191 *
4192 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4193 *
4194 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4195 *
4196 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4197 *
4198 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4199 * TODO Parameter-entity replacement text must be properly nested
4200 * with parenthetized groups. That is to say, if either of the
4201 * opening or closing parentheses in a choice, seq, or Mixed
4202 * construct is contained in the replacement text for a parameter
4203 * entity, both must be contained in the same replacement text. For
4204 * interoperability, if a parameter-entity reference appears in a
4205 * choice, seq, or Mixed construct, its replacement text should not
4206 * be empty, and neither the first nor last non-blank character of
4207 * the replacement text should be a connector (| or ,).
4208 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004209 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004210 * hierarchy.
4211 */
4212xmlElementContentPtr
4213#ifdef VMS
4214xmlParseElementChildrenContentD
4215#else
4216xmlParseElementChildrenContentDecl
4217#endif
4218(xmlParserCtxtPtr ctxt) {
4219 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4220 xmlChar *elem;
4221 xmlChar type = 0;
4222
4223 SKIP_BLANKS;
4224 GROW;
4225 if (RAW == '(') {
4226 /* Recurse on first child */
4227 NEXT;
4228 SKIP_BLANKS;
4229 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4230 SKIP_BLANKS;
4231 GROW;
4232 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004233 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004234 if (elem == NULL) {
4235 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4237 ctxt->sax->error(ctxt->userData,
4238 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4239 ctxt->wellFormed = 0;
4240 ctxt->disableSAX = 1;
4241 return(NULL);
4242 }
4243 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4244 GROW;
4245 if (RAW == '?') {
4246 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4247 NEXT;
4248 } else if (RAW == '*') {
4249 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4250 NEXT;
4251 } else if (RAW == '+') {
4252 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4253 NEXT;
4254 } else {
4255 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4256 }
4257 xmlFree(elem);
4258 GROW;
4259 }
4260 SKIP_BLANKS;
4261 SHRINK;
4262 while (RAW != ')') {
4263 /*
4264 * Each loop we parse one separator and one element.
4265 */
4266 if (RAW == ',') {
4267 if (type == 0) type = CUR;
4268
4269 /*
4270 * Detect "Name | Name , Name" error
4271 */
4272 else if (type != CUR) {
4273 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4275 ctxt->sax->error(ctxt->userData,
4276 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4277 type);
4278 ctxt->wellFormed = 0;
4279 ctxt->disableSAX = 1;
4280 if ((op != NULL) && (op != ret))
4281 xmlFreeElementContent(op);
4282 if ((last != NULL) && (last != ret) &&
4283 (last != ret->c1) && (last != ret->c2))
4284 xmlFreeElementContent(last);
4285 if (ret != NULL)
4286 xmlFreeElementContent(ret);
4287 return(NULL);
4288 }
4289 NEXT;
4290
4291 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4292 if (op == NULL) {
4293 xmlFreeElementContent(ret);
4294 return(NULL);
4295 }
4296 if (last == NULL) {
4297 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004298 if (ret != NULL)
4299 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004300 ret = cur = op;
4301 } else {
4302 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004303 if (op != NULL)
4304 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004305 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004306 if (last != NULL)
4307 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004308 cur =op;
4309 last = NULL;
4310 }
4311 } else if (RAW == '|') {
4312 if (type == 0) type = CUR;
4313
4314 /*
4315 * Detect "Name , Name | Name" error
4316 */
4317 else if (type != CUR) {
4318 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4320 ctxt->sax->error(ctxt->userData,
4321 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4322 type);
4323 ctxt->wellFormed = 0;
4324 ctxt->disableSAX = 1;
4325 if ((op != NULL) && (op != ret) && (op != last))
4326 xmlFreeElementContent(op);
4327 if ((last != NULL) && (last != ret) &&
4328 (last != ret->c1) && (last != ret->c2))
4329 xmlFreeElementContent(last);
4330 if (ret != NULL)
4331 xmlFreeElementContent(ret);
4332 return(NULL);
4333 }
4334 NEXT;
4335
4336 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4337 if (op == NULL) {
4338 if ((op != NULL) && (op != ret))
4339 xmlFreeElementContent(op);
4340 if ((last != NULL) && (last != ret) &&
4341 (last != ret->c1) && (last != ret->c2))
4342 xmlFreeElementContent(last);
4343 if (ret != NULL)
4344 xmlFreeElementContent(ret);
4345 return(NULL);
4346 }
4347 if (last == NULL) {
4348 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004349 if (ret != NULL)
4350 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004351 ret = cur = op;
4352 } else {
4353 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004354 if (op != NULL)
4355 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004356 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004357 if (last != NULL)
4358 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004359 cur =op;
4360 last = NULL;
4361 }
4362 } else {
4363 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4365 ctxt->sax->error(ctxt->userData,
4366 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4367 ctxt->wellFormed = 0;
4368 ctxt->disableSAX = 1;
4369 if ((op != NULL) && (op != ret))
4370 xmlFreeElementContent(op);
4371 if ((last != NULL) && (last != ret) &&
4372 (last != ret->c1) && (last != ret->c2))
4373 xmlFreeElementContent(last);
4374 if (ret != NULL)
4375 xmlFreeElementContent(ret);
4376 return(NULL);
4377 }
4378 GROW;
4379 SKIP_BLANKS;
4380 GROW;
4381 if (RAW == '(') {
4382 /* Recurse on second child */
4383 NEXT;
4384 SKIP_BLANKS;
4385 last = xmlParseElementChildrenContentDecl(ctxt);
4386 SKIP_BLANKS;
4387 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004388 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004389 if (elem == NULL) {
4390 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4392 ctxt->sax->error(ctxt->userData,
4393 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4394 ctxt->wellFormed = 0;
4395 ctxt->disableSAX = 1;
4396 if ((op != NULL) && (op != ret))
4397 xmlFreeElementContent(op);
4398 if ((last != NULL) && (last != ret) &&
4399 (last != ret->c1) && (last != ret->c2))
4400 xmlFreeElementContent(last);
4401 if (ret != NULL)
4402 xmlFreeElementContent(ret);
4403 return(NULL);
4404 }
4405 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4406 xmlFree(elem);
4407 if (RAW == '?') {
4408 last->ocur = XML_ELEMENT_CONTENT_OPT;
4409 NEXT;
4410 } else if (RAW == '*') {
4411 last->ocur = XML_ELEMENT_CONTENT_MULT;
4412 NEXT;
4413 } else if (RAW == '+') {
4414 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4415 NEXT;
4416 } else {
4417 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4418 }
4419 }
4420 SKIP_BLANKS;
4421 GROW;
4422 }
4423 if ((cur != NULL) && (last != NULL)) {
4424 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004425 if (last != NULL)
4426 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004427 }
4428 ctxt->entity = ctxt->input;
4429 NEXT;
4430 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004431 if (ret != NULL)
4432 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004433 NEXT;
4434 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004435 if (ret != NULL)
4436 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004437 NEXT;
4438 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004439 if (ret != NULL)
4440 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004441 NEXT;
4442 }
4443 return(ret);
4444}
4445
4446/**
4447 * xmlParseElementContentDecl:
4448 * @ctxt: an XML parser context
4449 * @name: the name of the element being defined.
4450 * @result: the Element Content pointer will be stored here if any
4451 *
4452 * parse the declaration for an Element content either Mixed or Children,
4453 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4454 *
4455 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4456 *
4457 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4458 */
4459
4460int
4461xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4462 xmlElementContentPtr *result) {
4463
4464 xmlElementContentPtr tree = NULL;
4465 xmlParserInputPtr input = ctxt->input;
4466 int res;
4467
4468 *result = NULL;
4469
4470 if (RAW != '(') {
4471 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4473 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004474 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004475 ctxt->wellFormed = 0;
4476 ctxt->disableSAX = 1;
4477 return(-1);
4478 }
4479 NEXT;
4480 GROW;
4481 SKIP_BLANKS;
4482 if ((RAW == '#') && (NXT(1) == 'P') &&
4483 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4484 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4485 (NXT(6) == 'A')) {
4486 tree = xmlParseElementMixedContentDecl(ctxt);
4487 res = XML_ELEMENT_TYPE_MIXED;
4488 } else {
4489 tree = xmlParseElementChildrenContentDecl(ctxt);
4490 res = XML_ELEMENT_TYPE_ELEMENT;
4491 }
4492 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4493 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4495 ctxt->sax->error(ctxt->userData,
4496"Element content declaration doesn't start and stop in the same entity\n");
4497 ctxt->wellFormed = 0;
4498 ctxt->disableSAX = 1;
4499 }
4500 SKIP_BLANKS;
4501 *result = tree;
4502 return(res);
4503}
4504
4505/**
4506 * xmlParseElementDecl:
4507 * @ctxt: an XML parser context
4508 *
4509 * parse an Element declaration.
4510 *
4511 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4512 *
4513 * [ VC: Unique Element Type Declaration ]
4514 * No element type may be declared more than once
4515 *
4516 * Returns the type of the element, or -1 in case of error
4517 */
4518int
4519xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4520 xmlChar *name;
4521 int ret = -1;
4522 xmlElementContentPtr content = NULL;
4523
4524 GROW;
4525 if ((RAW == '<') && (NXT(1) == '!') &&
4526 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4527 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4528 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4529 (NXT(8) == 'T')) {
4530 xmlParserInputPtr input = ctxt->input;
4531
4532 SKIP(9);
4533 if (!IS_BLANK(CUR)) {
4534 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4536 ctxt->sax->error(ctxt->userData,
4537 "Space required after 'ELEMENT'\n");
4538 ctxt->wellFormed = 0;
4539 ctxt->disableSAX = 1;
4540 }
4541 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004542 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004543 if (name == NULL) {
4544 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4546 ctxt->sax->error(ctxt->userData,
4547 "xmlParseElementDecl: no name for Element\n");
4548 ctxt->wellFormed = 0;
4549 ctxt->disableSAX = 1;
4550 return(-1);
4551 }
4552 while ((RAW == 0) && (ctxt->inputNr > 1))
4553 xmlPopInput(ctxt);
4554 if (!IS_BLANK(CUR)) {
4555 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4557 ctxt->sax->error(ctxt->userData,
4558 "Space required after the element name\n");
4559 ctxt->wellFormed = 0;
4560 ctxt->disableSAX = 1;
4561 }
4562 SKIP_BLANKS;
4563 if ((RAW == 'E') && (NXT(1) == 'M') &&
4564 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4565 (NXT(4) == 'Y')) {
4566 SKIP(5);
4567 /*
4568 * Element must always be empty.
4569 */
4570 ret = XML_ELEMENT_TYPE_EMPTY;
4571 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4572 (NXT(2) == 'Y')) {
4573 SKIP(3);
4574 /*
4575 * Element is a generic container.
4576 */
4577 ret = XML_ELEMENT_TYPE_ANY;
4578 } else if (RAW == '(') {
4579 ret = xmlParseElementContentDecl(ctxt, name, &content);
4580 } else {
4581 /*
4582 * [ WFC: PEs in Internal Subset ] error handling.
4583 */
4584 if ((RAW == '%') && (ctxt->external == 0) &&
4585 (ctxt->inputNr == 1)) {
4586 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4588 ctxt->sax->error(ctxt->userData,
4589 "PEReference: forbidden within markup decl in internal subset\n");
4590 } else {
4591 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4593 ctxt->sax->error(ctxt->userData,
4594 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4595 }
4596 ctxt->wellFormed = 0;
4597 ctxt->disableSAX = 1;
4598 if (name != NULL) xmlFree(name);
4599 return(-1);
4600 }
4601
4602 SKIP_BLANKS;
4603 /*
4604 * Pop-up of finished entities.
4605 */
4606 while ((RAW == 0) && (ctxt->inputNr > 1))
4607 xmlPopInput(ctxt);
4608 SKIP_BLANKS;
4609
4610 if (RAW != '>') {
4611 ctxt->errNo = XML_ERR_GT_REQUIRED;
4612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4613 ctxt->sax->error(ctxt->userData,
4614 "xmlParseElementDecl: expected '>' at the end\n");
4615 ctxt->wellFormed = 0;
4616 ctxt->disableSAX = 1;
4617 } else {
4618 if (input != ctxt->input) {
4619 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4621 ctxt->sax->error(ctxt->userData,
4622"Element declaration doesn't start and stop in the same entity\n");
4623 ctxt->wellFormed = 0;
4624 ctxt->disableSAX = 1;
4625 }
4626
4627 NEXT;
4628 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4629 (ctxt->sax->elementDecl != NULL))
4630 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4631 content);
4632 }
4633 if (content != NULL) {
4634 xmlFreeElementContent(content);
4635 }
4636 if (name != NULL) {
4637 xmlFree(name);
4638 }
4639 }
4640 return(ret);
4641}
4642
4643/**
Owen Taylor3473f882001-02-23 17:55:21 +00004644 * xmlParseConditionalSections
4645 * @ctxt: an XML parser context
4646 *
4647 * [61] conditionalSect ::= includeSect | ignoreSect
4648 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4649 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4650 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4651 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4652 */
4653
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004654static void
Owen Taylor3473f882001-02-23 17:55:21 +00004655xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4656 SKIP(3);
4657 SKIP_BLANKS;
4658 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4659 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4660 (NXT(6) == 'E')) {
4661 SKIP(7);
4662 SKIP_BLANKS;
4663 if (RAW != '[') {
4664 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4666 ctxt->sax->error(ctxt->userData,
4667 "XML conditional section '[' expected\n");
4668 ctxt->wellFormed = 0;
4669 ctxt->disableSAX = 1;
4670 } else {
4671 NEXT;
4672 }
4673 if (xmlParserDebugEntities) {
4674 if ((ctxt->input != NULL) && (ctxt->input->filename))
4675 xmlGenericError(xmlGenericErrorContext,
4676 "%s(%d): ", ctxt->input->filename,
4677 ctxt->input->line);
4678 xmlGenericError(xmlGenericErrorContext,
4679 "Entering INCLUDE Conditional Section\n");
4680 }
4681
4682 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4683 (NXT(2) != '>'))) {
4684 const xmlChar *check = CUR_PTR;
4685 int cons = ctxt->input->consumed;
4686 int tok = ctxt->token;
4687
4688 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4689 xmlParseConditionalSections(ctxt);
4690 } else if (IS_BLANK(CUR)) {
4691 NEXT;
4692 } else if (RAW == '%') {
4693 xmlParsePEReference(ctxt);
4694 } else
4695 xmlParseMarkupDecl(ctxt);
4696
4697 /*
4698 * Pop-up of finished entities.
4699 */
4700 while ((RAW == 0) && (ctxt->inputNr > 1))
4701 xmlPopInput(ctxt);
4702
4703 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4704 (tok == ctxt->token)) {
4705 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4707 ctxt->sax->error(ctxt->userData,
4708 "Content error in the external subset\n");
4709 ctxt->wellFormed = 0;
4710 ctxt->disableSAX = 1;
4711 break;
4712 }
4713 }
4714 if (xmlParserDebugEntities) {
4715 if ((ctxt->input != NULL) && (ctxt->input->filename))
4716 xmlGenericError(xmlGenericErrorContext,
4717 "%s(%d): ", ctxt->input->filename,
4718 ctxt->input->line);
4719 xmlGenericError(xmlGenericErrorContext,
4720 "Leaving INCLUDE Conditional Section\n");
4721 }
4722
4723 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4724 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4725 int state;
4726 int instate;
4727 int depth = 0;
4728
4729 SKIP(6);
4730 SKIP_BLANKS;
4731 if (RAW != '[') {
4732 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4734 ctxt->sax->error(ctxt->userData,
4735 "XML conditional section '[' expected\n");
4736 ctxt->wellFormed = 0;
4737 ctxt->disableSAX = 1;
4738 } else {
4739 NEXT;
4740 }
4741 if (xmlParserDebugEntities) {
4742 if ((ctxt->input != NULL) && (ctxt->input->filename))
4743 xmlGenericError(xmlGenericErrorContext,
4744 "%s(%d): ", ctxt->input->filename,
4745 ctxt->input->line);
4746 xmlGenericError(xmlGenericErrorContext,
4747 "Entering IGNORE Conditional Section\n");
4748 }
4749
4750 /*
4751 * Parse up to the end of the conditionnal section
4752 * But disable SAX event generating DTD building in the meantime
4753 */
4754 state = ctxt->disableSAX;
4755 instate = ctxt->instate;
4756 ctxt->disableSAX = 1;
4757 ctxt->instate = XML_PARSER_IGNORE;
4758
4759 while (depth >= 0) {
4760 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4761 depth++;
4762 SKIP(3);
4763 continue;
4764 }
4765 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4766 if (--depth >= 0) SKIP(3);
4767 continue;
4768 }
4769 NEXT;
4770 continue;
4771 }
4772
4773 ctxt->disableSAX = state;
4774 ctxt->instate = instate;
4775
4776 if (xmlParserDebugEntities) {
4777 if ((ctxt->input != NULL) && (ctxt->input->filename))
4778 xmlGenericError(xmlGenericErrorContext,
4779 "%s(%d): ", ctxt->input->filename,
4780 ctxt->input->line);
4781 xmlGenericError(xmlGenericErrorContext,
4782 "Leaving IGNORE Conditional Section\n");
4783 }
4784
4785 } else {
4786 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4788 ctxt->sax->error(ctxt->userData,
4789 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4790 ctxt->wellFormed = 0;
4791 ctxt->disableSAX = 1;
4792 }
4793
4794 if (RAW == 0)
4795 SHRINK;
4796
4797 if (RAW == 0) {
4798 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4800 ctxt->sax->error(ctxt->userData,
4801 "XML conditional section not closed\n");
4802 ctxt->wellFormed = 0;
4803 ctxt->disableSAX = 1;
4804 } else {
4805 SKIP(3);
4806 }
4807}
4808
4809/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004810 * xmlParseMarkupDecl:
4811 * @ctxt: an XML parser context
4812 *
4813 * parse Markup declarations
4814 *
4815 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4816 * NotationDecl | PI | Comment
4817 *
4818 * [ VC: Proper Declaration/PE Nesting ]
4819 * Parameter-entity replacement text must be properly nested with
4820 * markup declarations. That is to say, if either the first character
4821 * or the last character of a markup declaration (markupdecl above) is
4822 * contained in the replacement text for a parameter-entity reference,
4823 * both must be contained in the same replacement text.
4824 *
4825 * [ WFC: PEs in Internal Subset ]
4826 * In the internal DTD subset, parameter-entity references can occur
4827 * only where markup declarations can occur, not within markup declarations.
4828 * (This does not apply to references that occur in external parameter
4829 * entities or to the external subset.)
4830 */
4831void
4832xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4833 GROW;
4834 xmlParseElementDecl(ctxt);
4835 xmlParseAttributeListDecl(ctxt);
4836 xmlParseEntityDecl(ctxt);
4837 xmlParseNotationDecl(ctxt);
4838 xmlParsePI(ctxt);
4839 xmlParseComment(ctxt);
4840 /*
4841 * This is only for internal subset. On external entities,
4842 * the replacement is done before parsing stage
4843 */
4844 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4845 xmlParsePEReference(ctxt);
4846
4847 /*
4848 * Conditional sections are allowed from entities included
4849 * by PE References in the internal subset.
4850 */
4851 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4852 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4853 xmlParseConditionalSections(ctxt);
4854 }
4855 }
4856
4857 ctxt->instate = XML_PARSER_DTD;
4858}
4859
4860/**
4861 * xmlParseTextDecl:
4862 * @ctxt: an XML parser context
4863 *
4864 * parse an XML declaration header for external entities
4865 *
4866 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4867 *
4868 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4869 */
4870
4871void
4872xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4873 xmlChar *version;
4874
4875 /*
4876 * We know that '<?xml' is here.
4877 */
4878 if ((RAW == '<') && (NXT(1) == '?') &&
4879 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4880 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4881 SKIP(5);
4882 } else {
4883 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4884 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4885 ctxt->sax->error(ctxt->userData,
4886 "Text declaration '<?xml' required\n");
4887 ctxt->wellFormed = 0;
4888 ctxt->disableSAX = 1;
4889
4890 return;
4891 }
4892
4893 if (!IS_BLANK(CUR)) {
4894 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4896 ctxt->sax->error(ctxt->userData,
4897 "Space needed after '<?xml'\n");
4898 ctxt->wellFormed = 0;
4899 ctxt->disableSAX = 1;
4900 }
4901 SKIP_BLANKS;
4902
4903 /*
4904 * We may have the VersionInfo here.
4905 */
4906 version = xmlParseVersionInfo(ctxt);
4907 if (version == NULL)
4908 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4909 ctxt->input->version = version;
4910
4911 /*
4912 * We must have the encoding declaration
4913 */
4914 if (!IS_BLANK(CUR)) {
4915 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4917 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4918 ctxt->wellFormed = 0;
4919 ctxt->disableSAX = 1;
4920 }
4921 xmlParseEncodingDecl(ctxt);
4922 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4923 /*
4924 * The XML REC instructs us to stop parsing right here
4925 */
4926 return;
4927 }
4928
4929 SKIP_BLANKS;
4930 if ((RAW == '?') && (NXT(1) == '>')) {
4931 SKIP(2);
4932 } else if (RAW == '>') {
4933 /* Deprecated old WD ... */
4934 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4936 ctxt->sax->error(ctxt->userData,
4937 "XML declaration must end-up with '?>'\n");
4938 ctxt->wellFormed = 0;
4939 ctxt->disableSAX = 1;
4940 NEXT;
4941 } else {
4942 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4944 ctxt->sax->error(ctxt->userData,
4945 "parsing XML declaration: '?>' expected\n");
4946 ctxt->wellFormed = 0;
4947 ctxt->disableSAX = 1;
4948 MOVETO_ENDTAG(CUR_PTR);
4949 NEXT;
4950 }
4951}
4952
4953/**
Owen Taylor3473f882001-02-23 17:55:21 +00004954 * xmlParseExternalSubset:
4955 * @ctxt: an XML parser context
4956 * @ExternalID: the external identifier
4957 * @SystemID: the system identifier (or URL)
4958 *
4959 * parse Markup declarations from an external subset
4960 *
4961 * [30] extSubset ::= textDecl? extSubsetDecl
4962 *
4963 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4964 */
4965void
4966xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4967 const xmlChar *SystemID) {
4968 GROW;
4969 if ((RAW == '<') && (NXT(1) == '?') &&
4970 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4971 (NXT(4) == 'l')) {
4972 xmlParseTextDecl(ctxt);
4973 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4974 /*
4975 * The XML REC instructs us to stop parsing right here
4976 */
4977 ctxt->instate = XML_PARSER_EOF;
4978 return;
4979 }
4980 }
4981 if (ctxt->myDoc == NULL) {
4982 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4983 }
4984 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4985 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4986
4987 ctxt->instate = XML_PARSER_DTD;
4988 ctxt->external = 1;
4989 while (((RAW == '<') && (NXT(1) == '?')) ||
4990 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00004991 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004992 const xmlChar *check = CUR_PTR;
4993 int cons = ctxt->input->consumed;
4994 int tok = ctxt->token;
4995
4996 GROW;
4997 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4998 xmlParseConditionalSections(ctxt);
4999 } else if (IS_BLANK(CUR)) {
5000 NEXT;
5001 } else if (RAW == '%') {
5002 xmlParsePEReference(ctxt);
5003 } else
5004 xmlParseMarkupDecl(ctxt);
5005
5006 /*
5007 * Pop-up of finished entities.
5008 */
5009 while ((RAW == 0) && (ctxt->inputNr > 1))
5010 xmlPopInput(ctxt);
5011
5012 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5013 (tok == ctxt->token)) {
5014 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5015 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5016 ctxt->sax->error(ctxt->userData,
5017 "Content error in the external subset\n");
5018 ctxt->wellFormed = 0;
5019 ctxt->disableSAX = 1;
5020 break;
5021 }
5022 }
5023
5024 if (RAW != 0) {
5025 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5026 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5027 ctxt->sax->error(ctxt->userData,
5028 "Extra content at the end of the document\n");
5029 ctxt->wellFormed = 0;
5030 ctxt->disableSAX = 1;
5031 }
5032
5033}
5034
5035/**
5036 * xmlParseReference:
5037 * @ctxt: an XML parser context
5038 *
5039 * parse and handle entity references in content, depending on the SAX
5040 * interface, this may end-up in a call to character() if this is a
5041 * CharRef, a predefined entity, if there is no reference() callback.
5042 * or if the parser was asked to switch to that mode.
5043 *
5044 * [67] Reference ::= EntityRef | CharRef
5045 */
5046void
5047xmlParseReference(xmlParserCtxtPtr ctxt) {
5048 xmlEntityPtr ent;
5049 xmlChar *val;
5050 if (RAW != '&') return;
5051
5052 if (NXT(1) == '#') {
5053 int i = 0;
5054 xmlChar out[10];
5055 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005056 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005057
5058 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5059 /*
5060 * So we are using non-UTF-8 buffers
5061 * Check that the char fit on 8bits, if not
5062 * generate a CharRef.
5063 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005064 if (value <= 0xFF) {
5065 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005066 out[1] = 0;
5067 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5068 (!ctxt->disableSAX))
5069 ctxt->sax->characters(ctxt->userData, out, 1);
5070 } else {
5071 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005072 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005073 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005074 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005075 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5076 (!ctxt->disableSAX))
5077 ctxt->sax->reference(ctxt->userData, out);
5078 }
5079 } else {
5080 /*
5081 * Just encode the value in UTF-8
5082 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005083 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005084 out[i] = 0;
5085 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5086 (!ctxt->disableSAX))
5087 ctxt->sax->characters(ctxt->userData, out, i);
5088 }
5089 } else {
5090 ent = xmlParseEntityRef(ctxt);
5091 if (ent == NULL) return;
5092 if ((ent->name != NULL) &&
5093 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5094 xmlNodePtr list = NULL;
5095 int ret;
5096
5097
5098 /*
5099 * The first reference to the entity trigger a parsing phase
5100 * where the ent->children is filled with the result from
5101 * the parsing.
5102 */
5103 if (ent->children == NULL) {
5104 xmlChar *value;
5105 value = ent->content;
5106
5107 /*
5108 * Check that this entity is well formed
5109 */
5110 if ((value != NULL) &&
5111 (value[1] == 0) && (value[0] == '<') &&
5112 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5113 /*
5114 * DONE: get definite answer on this !!!
5115 * Lots of entity decls are used to declare a single
5116 * char
5117 * <!ENTITY lt "<">
5118 * Which seems to be valid since
5119 * 2.4: The ampersand character (&) and the left angle
5120 * bracket (<) may appear in their literal form only
5121 * when used ... They are also legal within the literal
5122 * entity value of an internal entity declaration;i
5123 * see "4.3.2 Well-Formed Parsed Entities".
5124 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5125 * Looking at the OASIS test suite and James Clark
5126 * tests, this is broken. However the XML REC uses
5127 * it. Is the XML REC not well-formed ????
5128 * This is a hack to avoid this problem
5129 *
5130 * ANSWER: since lt gt amp .. are already defined,
5131 * this is a redefinition and hence the fact that the
5132 * contentis not well balanced is not a Wf error, this
5133 * is lousy but acceptable.
5134 */
5135 list = xmlNewDocText(ctxt->myDoc, value);
5136 if (list != NULL) {
5137 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5138 (ent->children == NULL)) {
5139 ent->children = list;
5140 ent->last = list;
5141 list->parent = (xmlNodePtr) ent;
5142 } else {
5143 xmlFreeNodeList(list);
5144 }
5145 } else if (list != NULL) {
5146 xmlFreeNodeList(list);
5147 }
5148 } else {
5149 /*
5150 * 4.3.2: An internal general parsed entity is well-formed
5151 * if its replacement text matches the production labeled
5152 * content.
5153 */
5154 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5155 ctxt->depth++;
5156 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5157 ctxt->sax, NULL, ctxt->depth,
5158 value, &list);
5159 ctxt->depth--;
5160 } else if (ent->etype ==
5161 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5162 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005163 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005164 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005165 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005166 ctxt->depth--;
5167 } else {
5168 ret = -1;
5169 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5170 ctxt->sax->error(ctxt->userData,
5171 "Internal: invalid entity type\n");
5172 }
5173 if (ret == XML_ERR_ENTITY_LOOP) {
5174 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5176 ctxt->sax->error(ctxt->userData,
5177 "Detected entity reference loop\n");
5178 ctxt->wellFormed = 0;
5179 ctxt->disableSAX = 1;
5180 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005181 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5182 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005183 (ent->children == NULL)) {
5184 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005185 if (ctxt->replaceEntities) {
5186 /*
5187 * Prune it directly in the generated document
5188 * except for single text nodes.
5189 */
5190 if ((list->type == XML_TEXT_NODE) &&
5191 (list->next == NULL)) {
5192 list->parent = (xmlNodePtr) ent;
5193 list = NULL;
5194 } else {
5195 while (list != NULL) {
5196 list->parent = (xmlNodePtr) ctxt->node;
5197 if (list->next == NULL)
5198 ent->last = list;
5199 list = list->next;
5200 }
5201 list = ent->children;
5202 }
5203 } else {
5204 while (list != NULL) {
5205 list->parent = (xmlNodePtr) ent;
5206 if (list->next == NULL)
5207 ent->last = list;
5208 list = list->next;
5209 }
Owen Taylor3473f882001-02-23 17:55:21 +00005210 }
5211 } else {
5212 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005213 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005214 }
5215 } else if (ret > 0) {
5216 ctxt->errNo = ret;
5217 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5218 ctxt->sax->error(ctxt->userData,
5219 "Entity value required\n");
5220 ctxt->wellFormed = 0;
5221 ctxt->disableSAX = 1;
5222 } else if (list != NULL) {
5223 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005224 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005225 }
5226 }
5227 }
5228 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5229 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5230 /*
5231 * Create a node.
5232 */
5233 ctxt->sax->reference(ctxt->userData, ent->name);
5234 return;
5235 } else if (ctxt->replaceEntities) {
5236 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5237 /*
5238 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005239 * a simple tree copy for all references except the first
5240 * In the first occurence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005241 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005242 if (list == NULL) {
5243 xmlNodePtr new, cur;
5244 cur = ent->children;
5245 while (cur != NULL) {
5246 new = xmlCopyNode(cur, 1);
5247 xmlAddChild(ctxt->node, new);
5248 if (cur == ent->last)
5249 break;
5250 cur = cur->next;
5251 }
5252 } else {
5253 /*
5254 * the name change is to avoid coalescing of the
5255 * node with a prossible previous text one which
5256 * would make ent->children a dandling pointer
5257 */
5258 if (ent->children->type == XML_TEXT_NODE)
5259 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5260 if ((ent->last != ent->children) &&
5261 (ent->last->type == XML_TEXT_NODE))
5262 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5263 xmlAddChildList(ctxt->node, ent->children);
5264 }
5265
Owen Taylor3473f882001-02-23 17:55:21 +00005266 /*
5267 * This is to avoid a nasty side effect, see
5268 * characters() in SAX.c
5269 */
5270 ctxt->nodemem = 0;
5271 ctxt->nodelen = 0;
5272 return;
5273 } else {
5274 /*
5275 * Probably running in SAX mode
5276 */
5277 xmlParserInputPtr input;
5278
5279 input = xmlNewEntityInputStream(ctxt, ent);
5280 xmlPushInput(ctxt, input);
5281 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5282 (RAW == '<') && (NXT(1) == '?') &&
5283 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5284 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5285 xmlParseTextDecl(ctxt);
5286 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5287 /*
5288 * The XML REC instructs us to stop parsing right here
5289 */
5290 ctxt->instate = XML_PARSER_EOF;
5291 return;
5292 }
5293 if (input->standalone == 1) {
5294 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5295 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5296 ctxt->sax->error(ctxt->userData,
5297 "external parsed entities cannot be standalone\n");
5298 ctxt->wellFormed = 0;
5299 ctxt->disableSAX = 1;
5300 }
5301 }
5302 return;
5303 }
5304 }
5305 } else {
5306 val = ent->content;
5307 if (val == NULL) return;
5308 /*
5309 * inline the entity.
5310 */
5311 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5312 (!ctxt->disableSAX))
5313 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5314 }
5315 }
5316}
5317
5318/**
5319 * xmlParseEntityRef:
5320 * @ctxt: an XML parser context
5321 *
5322 * parse ENTITY references declarations
5323 *
5324 * [68] EntityRef ::= '&' Name ';'
5325 *
5326 * [ WFC: Entity Declared ]
5327 * In a document without any DTD, a document with only an internal DTD
5328 * subset which contains no parameter entity references, or a document
5329 * with "standalone='yes'", the Name given in the entity reference
5330 * must match that in an entity declaration, except that well-formed
5331 * documents need not declare any of the following entities: amp, lt,
5332 * gt, apos, quot. The declaration of a parameter entity must precede
5333 * any reference to it. Similarly, the declaration of a general entity
5334 * must precede any reference to it which appears in a default value in an
5335 * attribute-list declaration. Note that if entities are declared in the
5336 * external subset or in external parameter entities, a non-validating
5337 * processor is not obligated to read and process their declarations;
5338 * for such documents, the rule that an entity must be declared is a
5339 * well-formedness constraint only if standalone='yes'.
5340 *
5341 * [ WFC: Parsed Entity ]
5342 * An entity reference must not contain the name of an unparsed entity
5343 *
5344 * Returns the xmlEntityPtr if found, or NULL otherwise.
5345 */
5346xmlEntityPtr
5347xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5348 xmlChar *name;
5349 xmlEntityPtr ent = NULL;
5350
5351 GROW;
5352
5353 if (RAW == '&') {
5354 NEXT;
5355 name = xmlParseName(ctxt);
5356 if (name == NULL) {
5357 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5359 ctxt->sax->error(ctxt->userData,
5360 "xmlParseEntityRef: no name\n");
5361 ctxt->wellFormed = 0;
5362 ctxt->disableSAX = 1;
5363 } else {
5364 if (RAW == ';') {
5365 NEXT;
5366 /*
5367 * Ask first SAX for entity resolution, otherwise try the
5368 * predefined set.
5369 */
5370 if (ctxt->sax != NULL) {
5371 if (ctxt->sax->getEntity != NULL)
5372 ent = ctxt->sax->getEntity(ctxt->userData, name);
5373 if (ent == NULL)
5374 ent = xmlGetPredefinedEntity(name);
5375 }
5376 /*
5377 * [ WFC: Entity Declared ]
5378 * In a document without any DTD, a document with only an
5379 * internal DTD subset which contains no parameter entity
5380 * references, or a document with "standalone='yes'", the
5381 * Name given in the entity reference must match that in an
5382 * entity declaration, except that well-formed documents
5383 * need not declare any of the following entities: amp, lt,
5384 * gt, apos, quot.
5385 * The declaration of a parameter entity must precede any
5386 * reference to it.
5387 * Similarly, the declaration of a general entity must
5388 * precede any reference to it which appears in a default
5389 * value in an attribute-list declaration. Note that if
5390 * entities are declared in the external subset or in
5391 * external parameter entities, a non-validating processor
5392 * is not obligated to read and process their declarations;
5393 * for such documents, the rule that an entity must be
5394 * declared is a well-formedness constraint only if
5395 * standalone='yes'.
5396 */
5397 if (ent == NULL) {
5398 if ((ctxt->standalone == 1) ||
5399 ((ctxt->hasExternalSubset == 0) &&
5400 (ctxt->hasPErefs == 0))) {
5401 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5403 ctxt->sax->error(ctxt->userData,
5404 "Entity '%s' not defined\n", name);
5405 ctxt->wellFormed = 0;
5406 ctxt->disableSAX = 1;
5407 } else {
5408 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005410 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005411 "Entity '%s' not defined\n", name);
5412 }
5413 }
5414
5415 /*
5416 * [ WFC: Parsed Entity ]
5417 * An entity reference must not contain the name of an
5418 * unparsed entity
5419 */
5420 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5421 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5423 ctxt->sax->error(ctxt->userData,
5424 "Entity reference to unparsed entity %s\n", name);
5425 ctxt->wellFormed = 0;
5426 ctxt->disableSAX = 1;
5427 }
5428
5429 /*
5430 * [ WFC: No External Entity References ]
5431 * Attribute values cannot contain direct or indirect
5432 * entity references to external entities.
5433 */
5434 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5435 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5436 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5437 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5438 ctxt->sax->error(ctxt->userData,
5439 "Attribute references external entity '%s'\n", name);
5440 ctxt->wellFormed = 0;
5441 ctxt->disableSAX = 1;
5442 }
5443 /*
5444 * [ WFC: No < in Attribute Values ]
5445 * The replacement text of any entity referred to directly or
5446 * indirectly in an attribute value (other than "&lt;") must
5447 * not contain a <.
5448 */
5449 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5450 (ent != NULL) &&
5451 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5452 (ent->content != NULL) &&
5453 (xmlStrchr(ent->content, '<'))) {
5454 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5456 ctxt->sax->error(ctxt->userData,
5457 "'<' in entity '%s' is not allowed in attributes values\n", name);
5458 ctxt->wellFormed = 0;
5459 ctxt->disableSAX = 1;
5460 }
5461
5462 /*
5463 * Internal check, no parameter entities here ...
5464 */
5465 else {
5466 switch (ent->etype) {
5467 case XML_INTERNAL_PARAMETER_ENTITY:
5468 case XML_EXTERNAL_PARAMETER_ENTITY:
5469 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5471 ctxt->sax->error(ctxt->userData,
5472 "Attempt to reference the parameter entity '%s'\n", name);
5473 ctxt->wellFormed = 0;
5474 ctxt->disableSAX = 1;
5475 break;
5476 default:
5477 break;
5478 }
5479 }
5480
5481 /*
5482 * [ WFC: No Recursion ]
5483 * A parsed entity must not contain a recursive reference
5484 * to itself, either directly or indirectly.
5485 * Done somewhere else
5486 */
5487
5488 } else {
5489 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5491 ctxt->sax->error(ctxt->userData,
5492 "xmlParseEntityRef: expecting ';'\n");
5493 ctxt->wellFormed = 0;
5494 ctxt->disableSAX = 1;
5495 }
5496 xmlFree(name);
5497 }
5498 }
5499 return(ent);
5500}
5501
5502/**
5503 * xmlParseStringEntityRef:
5504 * @ctxt: an XML parser context
5505 * @str: a pointer to an index in the string
5506 *
5507 * parse ENTITY references declarations, but this version parses it from
5508 * a string value.
5509 *
5510 * [68] EntityRef ::= '&' Name ';'
5511 *
5512 * [ WFC: Entity Declared ]
5513 * In a document without any DTD, a document with only an internal DTD
5514 * subset which contains no parameter entity references, or a document
5515 * with "standalone='yes'", the Name given in the entity reference
5516 * must match that in an entity declaration, except that well-formed
5517 * documents need not declare any of the following entities: amp, lt,
5518 * gt, apos, quot. The declaration of a parameter entity must precede
5519 * any reference to it. Similarly, the declaration of a general entity
5520 * must precede any reference to it which appears in a default value in an
5521 * attribute-list declaration. Note that if entities are declared in the
5522 * external subset or in external parameter entities, a non-validating
5523 * processor is not obligated to read and process their declarations;
5524 * for such documents, the rule that an entity must be declared is a
5525 * well-formedness constraint only if standalone='yes'.
5526 *
5527 * [ WFC: Parsed Entity ]
5528 * An entity reference must not contain the name of an unparsed entity
5529 *
5530 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5531 * is updated to the current location in the string.
5532 */
5533xmlEntityPtr
5534xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5535 xmlChar *name;
5536 const xmlChar *ptr;
5537 xmlChar cur;
5538 xmlEntityPtr ent = NULL;
5539
5540 if ((str == NULL) || (*str == NULL))
5541 return(NULL);
5542 ptr = *str;
5543 cur = *ptr;
5544 if (cur == '&') {
5545 ptr++;
5546 cur = *ptr;
5547 name = xmlParseStringName(ctxt, &ptr);
5548 if (name == NULL) {
5549 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5551 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005552 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005553 ctxt->wellFormed = 0;
5554 ctxt->disableSAX = 1;
5555 } else {
5556 if (*ptr == ';') {
5557 ptr++;
5558 /*
5559 * Ask first SAX for entity resolution, otherwise try the
5560 * predefined set.
5561 */
5562 if (ctxt->sax != NULL) {
5563 if (ctxt->sax->getEntity != NULL)
5564 ent = ctxt->sax->getEntity(ctxt->userData, name);
5565 if (ent == NULL)
5566 ent = xmlGetPredefinedEntity(name);
5567 }
5568 /*
5569 * [ WFC: Entity Declared ]
5570 * In a document without any DTD, a document with only an
5571 * internal DTD subset which contains no parameter entity
5572 * references, or a document with "standalone='yes'", the
5573 * Name given in the entity reference must match that in an
5574 * entity declaration, except that well-formed documents
5575 * need not declare any of the following entities: amp, lt,
5576 * gt, apos, quot.
5577 * The declaration of a parameter entity must precede any
5578 * reference to it.
5579 * Similarly, the declaration of a general entity must
5580 * precede any reference to it which appears in a default
5581 * value in an attribute-list declaration. Note that if
5582 * entities are declared in the external subset or in
5583 * external parameter entities, a non-validating processor
5584 * is not obligated to read and process their declarations;
5585 * for such documents, the rule that an entity must be
5586 * declared is a well-formedness constraint only if
5587 * standalone='yes'.
5588 */
5589 if (ent == NULL) {
5590 if ((ctxt->standalone == 1) ||
5591 ((ctxt->hasExternalSubset == 0) &&
5592 (ctxt->hasPErefs == 0))) {
5593 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5595 ctxt->sax->error(ctxt->userData,
5596 "Entity '%s' not defined\n", name);
5597 ctxt->wellFormed = 0;
5598 ctxt->disableSAX = 1;
5599 } else {
5600 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5601 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5602 ctxt->sax->warning(ctxt->userData,
5603 "Entity '%s' not defined\n", name);
5604 }
5605 }
5606
5607 /*
5608 * [ WFC: Parsed Entity ]
5609 * An entity reference must not contain the name of an
5610 * unparsed entity
5611 */
5612 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5613 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5615 ctxt->sax->error(ctxt->userData,
5616 "Entity reference to unparsed entity %s\n", name);
5617 ctxt->wellFormed = 0;
5618 ctxt->disableSAX = 1;
5619 }
5620
5621 /*
5622 * [ WFC: No External Entity References ]
5623 * Attribute values cannot contain direct or indirect
5624 * entity references to external entities.
5625 */
5626 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5627 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5628 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5630 ctxt->sax->error(ctxt->userData,
5631 "Attribute references external entity '%s'\n", name);
5632 ctxt->wellFormed = 0;
5633 ctxt->disableSAX = 1;
5634 }
5635 /*
5636 * [ WFC: No < in Attribute Values ]
5637 * The replacement text of any entity referred to directly or
5638 * indirectly in an attribute value (other than "&lt;") must
5639 * not contain a <.
5640 */
5641 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5642 (ent != NULL) &&
5643 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5644 (ent->content != NULL) &&
5645 (xmlStrchr(ent->content, '<'))) {
5646 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5648 ctxt->sax->error(ctxt->userData,
5649 "'<' in entity '%s' is not allowed in attributes values\n", name);
5650 ctxt->wellFormed = 0;
5651 ctxt->disableSAX = 1;
5652 }
5653
5654 /*
5655 * Internal check, no parameter entities here ...
5656 */
5657 else {
5658 switch (ent->etype) {
5659 case XML_INTERNAL_PARAMETER_ENTITY:
5660 case XML_EXTERNAL_PARAMETER_ENTITY:
5661 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5663 ctxt->sax->error(ctxt->userData,
5664 "Attempt to reference the parameter entity '%s'\n", name);
5665 ctxt->wellFormed = 0;
5666 ctxt->disableSAX = 1;
5667 break;
5668 default:
5669 break;
5670 }
5671 }
5672
5673 /*
5674 * [ WFC: No Recursion ]
5675 * A parsed entity must not contain a recursive reference
5676 * to itself, either directly or indirectly.
5677 * Done somewhwere else
5678 */
5679
5680 } else {
5681 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5682 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5683 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005684 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005685 ctxt->wellFormed = 0;
5686 ctxt->disableSAX = 1;
5687 }
5688 xmlFree(name);
5689 }
5690 }
5691 *str = ptr;
5692 return(ent);
5693}
5694
5695/**
5696 * xmlParsePEReference:
5697 * @ctxt: an XML parser context
5698 *
5699 * parse PEReference declarations
5700 * The entity content is handled directly by pushing it's content as
5701 * a new input stream.
5702 *
5703 * [69] PEReference ::= '%' Name ';'
5704 *
5705 * [ WFC: No Recursion ]
5706 * A parsed entity must not contain a recursive
5707 * reference to itself, either directly or indirectly.
5708 *
5709 * [ WFC: Entity Declared ]
5710 * In a document without any DTD, a document with only an internal DTD
5711 * subset which contains no parameter entity references, or a document
5712 * with "standalone='yes'", ... ... The declaration of a parameter
5713 * entity must precede any reference to it...
5714 *
5715 * [ VC: Entity Declared ]
5716 * In a document with an external subset or external parameter entities
5717 * with "standalone='no'", ... ... The declaration of a parameter entity
5718 * must precede any reference to it...
5719 *
5720 * [ WFC: In DTD ]
5721 * Parameter-entity references may only appear in the DTD.
5722 * NOTE: misleading but this is handled.
5723 */
5724void
5725xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5726 xmlChar *name;
5727 xmlEntityPtr entity = NULL;
5728 xmlParserInputPtr input;
5729
5730 if (RAW == '%') {
5731 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005732 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005733 if (name == NULL) {
5734 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5736 ctxt->sax->error(ctxt->userData,
5737 "xmlParsePEReference: no name\n");
5738 ctxt->wellFormed = 0;
5739 ctxt->disableSAX = 1;
5740 } else {
5741 if (RAW == ';') {
5742 NEXT;
5743 if ((ctxt->sax != NULL) &&
5744 (ctxt->sax->getParameterEntity != NULL))
5745 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5746 name);
5747 if (entity == NULL) {
5748 /*
5749 * [ WFC: Entity Declared ]
5750 * In a document without any DTD, a document with only an
5751 * internal DTD subset which contains no parameter entity
5752 * references, or a document with "standalone='yes'", ...
5753 * ... The declaration of a parameter entity must precede
5754 * any reference to it...
5755 */
5756 if ((ctxt->standalone == 1) ||
5757 ((ctxt->hasExternalSubset == 0) &&
5758 (ctxt->hasPErefs == 0))) {
5759 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5760 if ((!ctxt->disableSAX) &&
5761 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5762 ctxt->sax->error(ctxt->userData,
5763 "PEReference: %%%s; not found\n", name);
5764 ctxt->wellFormed = 0;
5765 ctxt->disableSAX = 1;
5766 } else {
5767 /*
5768 * [ VC: Entity Declared ]
5769 * In a document with an external subset or external
5770 * parameter entities with "standalone='no'", ...
5771 * ... The declaration of a parameter entity must precede
5772 * any reference to it...
5773 */
5774 if ((!ctxt->disableSAX) &&
5775 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5776 ctxt->sax->warning(ctxt->userData,
5777 "PEReference: %%%s; not found\n", name);
5778 ctxt->valid = 0;
5779 }
5780 } else {
5781 /*
5782 * Internal checking in case the entity quest barfed
5783 */
5784 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5785 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5786 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5787 ctxt->sax->warning(ctxt->userData,
5788 "Internal: %%%s; is not a parameter entity\n", name);
5789 } else {
5790 /*
5791 * TODO !!!
5792 * handle the extra spaces added before and after
5793 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5794 */
5795 input = xmlNewEntityInputStream(ctxt, entity);
5796 xmlPushInput(ctxt, input);
5797 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5798 (RAW == '<') && (NXT(1) == '?') &&
5799 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5800 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5801 xmlParseTextDecl(ctxt);
5802 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5803 /*
5804 * The XML REC instructs us to stop parsing
5805 * right here
5806 */
5807 ctxt->instate = XML_PARSER_EOF;
5808 xmlFree(name);
5809 return;
5810 }
5811 }
5812 if (ctxt->token == 0)
5813 ctxt->token = ' ';
5814 }
5815 }
5816 ctxt->hasPErefs = 1;
5817 } else {
5818 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5820 ctxt->sax->error(ctxt->userData,
5821 "xmlParsePEReference: expecting ';'\n");
5822 ctxt->wellFormed = 0;
5823 ctxt->disableSAX = 1;
5824 }
5825 xmlFree(name);
5826 }
5827 }
5828}
5829
5830/**
5831 * xmlParseStringPEReference:
5832 * @ctxt: an XML parser context
5833 * @str: a pointer to an index in the string
5834 *
5835 * parse PEReference declarations
5836 *
5837 * [69] PEReference ::= '%' Name ';'
5838 *
5839 * [ WFC: No Recursion ]
5840 * A parsed entity must not contain a recursive
5841 * reference to itself, either directly or indirectly.
5842 *
5843 * [ WFC: Entity Declared ]
5844 * In a document without any DTD, a document with only an internal DTD
5845 * subset which contains no parameter entity references, or a document
5846 * with "standalone='yes'", ... ... The declaration of a parameter
5847 * entity must precede any reference to it...
5848 *
5849 * [ VC: Entity Declared ]
5850 * In a document with an external subset or external parameter entities
5851 * with "standalone='no'", ... ... The declaration of a parameter entity
5852 * must precede any reference to it...
5853 *
5854 * [ WFC: In DTD ]
5855 * Parameter-entity references may only appear in the DTD.
5856 * NOTE: misleading but this is handled.
5857 *
5858 * Returns the string of the entity content.
5859 * str is updated to the current value of the index
5860 */
5861xmlEntityPtr
5862xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5863 const xmlChar *ptr;
5864 xmlChar cur;
5865 xmlChar *name;
5866 xmlEntityPtr entity = NULL;
5867
5868 if ((str == NULL) || (*str == NULL)) return(NULL);
5869 ptr = *str;
5870 cur = *ptr;
5871 if (cur == '%') {
5872 ptr++;
5873 cur = *ptr;
5874 name = xmlParseStringName(ctxt, &ptr);
5875 if (name == NULL) {
5876 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5878 ctxt->sax->error(ctxt->userData,
5879 "xmlParseStringPEReference: no name\n");
5880 ctxt->wellFormed = 0;
5881 ctxt->disableSAX = 1;
5882 } else {
5883 cur = *ptr;
5884 if (cur == ';') {
5885 ptr++;
5886 cur = *ptr;
5887 if ((ctxt->sax != NULL) &&
5888 (ctxt->sax->getParameterEntity != NULL))
5889 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5890 name);
5891 if (entity == NULL) {
5892 /*
5893 * [ WFC: Entity Declared ]
5894 * In a document without any DTD, a document with only an
5895 * internal DTD subset which contains no parameter entity
5896 * references, or a document with "standalone='yes'", ...
5897 * ... The declaration of a parameter entity must precede
5898 * any reference to it...
5899 */
5900 if ((ctxt->standalone == 1) ||
5901 ((ctxt->hasExternalSubset == 0) &&
5902 (ctxt->hasPErefs == 0))) {
5903 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5904 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5905 ctxt->sax->error(ctxt->userData,
5906 "PEReference: %%%s; not found\n", name);
5907 ctxt->wellFormed = 0;
5908 ctxt->disableSAX = 1;
5909 } else {
5910 /*
5911 * [ VC: Entity Declared ]
5912 * In a document with an external subset or external
5913 * parameter entities with "standalone='no'", ...
5914 * ... The declaration of a parameter entity must
5915 * precede any reference to it...
5916 */
5917 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5918 ctxt->sax->warning(ctxt->userData,
5919 "PEReference: %%%s; not found\n", name);
5920 ctxt->valid = 0;
5921 }
5922 } else {
5923 /*
5924 * Internal checking in case the entity quest barfed
5925 */
5926 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5927 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5928 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5929 ctxt->sax->warning(ctxt->userData,
5930 "Internal: %%%s; is not a parameter entity\n", name);
5931 }
5932 }
5933 ctxt->hasPErefs = 1;
5934 } else {
5935 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5937 ctxt->sax->error(ctxt->userData,
5938 "xmlParseStringPEReference: expecting ';'\n");
5939 ctxt->wellFormed = 0;
5940 ctxt->disableSAX = 1;
5941 }
5942 xmlFree(name);
5943 }
5944 }
5945 *str = ptr;
5946 return(entity);
5947}
5948
5949/**
5950 * xmlParseDocTypeDecl:
5951 * @ctxt: an XML parser context
5952 *
5953 * parse a DOCTYPE declaration
5954 *
5955 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5956 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5957 *
5958 * [ VC: Root Element Type ]
5959 * The Name in the document type declaration must match the element
5960 * type of the root element.
5961 */
5962
5963void
5964xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5965 xmlChar *name = NULL;
5966 xmlChar *ExternalID = NULL;
5967 xmlChar *URI = NULL;
5968
5969 /*
5970 * We know that '<!DOCTYPE' has been detected.
5971 */
5972 SKIP(9);
5973
5974 SKIP_BLANKS;
5975
5976 /*
5977 * Parse the DOCTYPE name.
5978 */
5979 name = xmlParseName(ctxt);
5980 if (name == NULL) {
5981 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5983 ctxt->sax->error(ctxt->userData,
5984 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5985 ctxt->wellFormed = 0;
5986 ctxt->disableSAX = 1;
5987 }
5988 ctxt->intSubName = name;
5989
5990 SKIP_BLANKS;
5991
5992 /*
5993 * Check for SystemID and ExternalID
5994 */
5995 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5996
5997 if ((URI != NULL) || (ExternalID != NULL)) {
5998 ctxt->hasExternalSubset = 1;
5999 }
6000 ctxt->extSubURI = URI;
6001 ctxt->extSubSystem = ExternalID;
6002
6003 SKIP_BLANKS;
6004
6005 /*
6006 * Create and update the internal subset.
6007 */
6008 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6009 (!ctxt->disableSAX))
6010 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6011
6012 /*
6013 * Is there any internal subset declarations ?
6014 * they are handled separately in xmlParseInternalSubset()
6015 */
6016 if (RAW == '[')
6017 return;
6018
6019 /*
6020 * We should be at the end of the DOCTYPE declaration.
6021 */
6022 if (RAW != '>') {
6023 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6025 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6026 ctxt->wellFormed = 0;
6027 ctxt->disableSAX = 1;
6028 }
6029 NEXT;
6030}
6031
6032/**
6033 * xmlParseInternalsubset:
6034 * @ctxt: an XML parser context
6035 *
6036 * parse the internal subset declaration
6037 *
6038 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6039 */
6040
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006041static void
Owen Taylor3473f882001-02-23 17:55:21 +00006042xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6043 /*
6044 * Is there any DTD definition ?
6045 */
6046 if (RAW == '[') {
6047 ctxt->instate = XML_PARSER_DTD;
6048 NEXT;
6049 /*
6050 * Parse the succession of Markup declarations and
6051 * PEReferences.
6052 * Subsequence (markupdecl | PEReference | S)*
6053 */
6054 while (RAW != ']') {
6055 const xmlChar *check = CUR_PTR;
6056 int cons = ctxt->input->consumed;
6057
6058 SKIP_BLANKS;
6059 xmlParseMarkupDecl(ctxt);
6060 xmlParsePEReference(ctxt);
6061
6062 /*
6063 * Pop-up of finished entities.
6064 */
6065 while ((RAW == 0) && (ctxt->inputNr > 1))
6066 xmlPopInput(ctxt);
6067
6068 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6069 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6070 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6071 ctxt->sax->error(ctxt->userData,
6072 "xmlParseInternalSubset: error detected in Markup declaration\n");
6073 ctxt->wellFormed = 0;
6074 ctxt->disableSAX = 1;
6075 break;
6076 }
6077 }
6078 if (RAW == ']') {
6079 NEXT;
6080 SKIP_BLANKS;
6081 }
6082 }
6083
6084 /*
6085 * We should be at the end of the DOCTYPE declaration.
6086 */
6087 if (RAW != '>') {
6088 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6089 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6090 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6091 ctxt->wellFormed = 0;
6092 ctxt->disableSAX = 1;
6093 }
6094 NEXT;
6095}
6096
6097/**
6098 * xmlParseAttribute:
6099 * @ctxt: an XML parser context
6100 * @value: a xmlChar ** used to store the value of the attribute
6101 *
6102 * parse an attribute
6103 *
6104 * [41] Attribute ::= Name Eq AttValue
6105 *
6106 * [ WFC: No External Entity References ]
6107 * Attribute values cannot contain direct or indirect entity references
6108 * to external entities.
6109 *
6110 * [ WFC: No < in Attribute Values ]
6111 * The replacement text of any entity referred to directly or indirectly in
6112 * an attribute value (other than "&lt;") must not contain a <.
6113 *
6114 * [ VC: Attribute Value Type ]
6115 * The attribute must have been declared; the value must be of the type
6116 * declared for it.
6117 *
6118 * [25] Eq ::= S? '=' S?
6119 *
6120 * With namespace:
6121 *
6122 * [NS 11] Attribute ::= QName Eq AttValue
6123 *
6124 * Also the case QName == xmlns:??? is handled independently as a namespace
6125 * definition.
6126 *
6127 * Returns the attribute name, and the value in *value.
6128 */
6129
6130xmlChar *
6131xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6132 xmlChar *name, *val;
6133
6134 *value = NULL;
6135 name = xmlParseName(ctxt);
6136 if (name == NULL) {
6137 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6138 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6139 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6140 ctxt->wellFormed = 0;
6141 ctxt->disableSAX = 1;
6142 return(NULL);
6143 }
6144
6145 /*
6146 * read the value
6147 */
6148 SKIP_BLANKS;
6149 if (RAW == '=') {
6150 NEXT;
6151 SKIP_BLANKS;
6152 val = xmlParseAttValue(ctxt);
6153 ctxt->instate = XML_PARSER_CONTENT;
6154 } else {
6155 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6157 ctxt->sax->error(ctxt->userData,
6158 "Specification mandate value for attribute %s\n", name);
6159 ctxt->wellFormed = 0;
6160 ctxt->disableSAX = 1;
6161 xmlFree(name);
6162 return(NULL);
6163 }
6164
6165 /*
6166 * Check that xml:lang conforms to the specification
6167 * No more registered as an error, just generate a warning now
6168 * since this was deprecated in XML second edition
6169 */
6170 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6171 if (!xmlCheckLanguageID(val)) {
6172 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6173 ctxt->sax->warning(ctxt->userData,
6174 "Malformed value for xml:lang : %s\n", val);
6175 }
6176 }
6177
6178 /*
6179 * Check that xml:space conforms to the specification
6180 */
6181 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6182 if (xmlStrEqual(val, BAD_CAST "default"))
6183 *(ctxt->space) = 0;
6184 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6185 *(ctxt->space) = 1;
6186 else {
6187 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6188 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6189 ctxt->sax->error(ctxt->userData,
6190"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6191 val);
6192 ctxt->wellFormed = 0;
6193 ctxt->disableSAX = 1;
6194 }
6195 }
6196
6197 *value = val;
6198 return(name);
6199}
6200
6201/**
6202 * xmlParseStartTag:
6203 * @ctxt: an XML parser context
6204 *
6205 * parse a start of tag either for rule element or
6206 * EmptyElement. In both case we don't parse the tag closing chars.
6207 *
6208 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6209 *
6210 * [ WFC: Unique Att Spec ]
6211 * No attribute name may appear more than once in the same start-tag or
6212 * empty-element tag.
6213 *
6214 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6215 *
6216 * [ WFC: Unique Att Spec ]
6217 * No attribute name may appear more than once in the same start-tag or
6218 * empty-element tag.
6219 *
6220 * With namespace:
6221 *
6222 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6223 *
6224 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6225 *
6226 * Returns the element name parsed
6227 */
6228
6229xmlChar *
6230xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6231 xmlChar *name;
6232 xmlChar *attname;
6233 xmlChar *attvalue;
6234 const xmlChar **atts = NULL;
6235 int nbatts = 0;
6236 int maxatts = 0;
6237 int i;
6238
6239 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006240 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006241
6242 name = xmlParseName(ctxt);
6243 if (name == NULL) {
6244 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6245 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6246 ctxt->sax->error(ctxt->userData,
6247 "xmlParseStartTag: invalid element name\n");
6248 ctxt->wellFormed = 0;
6249 ctxt->disableSAX = 1;
6250 return(NULL);
6251 }
6252
6253 /*
6254 * Now parse the attributes, it ends up with the ending
6255 *
6256 * (S Attribute)* S?
6257 */
6258 SKIP_BLANKS;
6259 GROW;
6260
Daniel Veillard21a0f912001-02-25 19:54:14 +00006261 while ((RAW != '>') &&
6262 ((RAW != '/') || (NXT(1) != '>')) &&
6263 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006264 const xmlChar *q = CUR_PTR;
6265 int cons = ctxt->input->consumed;
6266
6267 attname = xmlParseAttribute(ctxt, &attvalue);
6268 if ((attname != NULL) && (attvalue != NULL)) {
6269 /*
6270 * [ WFC: Unique Att Spec ]
6271 * No attribute name may appear more than once in the same
6272 * start-tag or empty-element tag.
6273 */
6274 for (i = 0; i < nbatts;i += 2) {
6275 if (xmlStrEqual(atts[i], attname)) {
6276 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6277 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6278 ctxt->sax->error(ctxt->userData,
6279 "Attribute %s redefined\n",
6280 attname);
6281 ctxt->wellFormed = 0;
6282 ctxt->disableSAX = 1;
6283 xmlFree(attname);
6284 xmlFree(attvalue);
6285 goto failed;
6286 }
6287 }
6288
6289 /*
6290 * Add the pair to atts
6291 */
6292 if (atts == NULL) {
6293 maxatts = 10;
6294 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6295 if (atts == NULL) {
6296 xmlGenericError(xmlGenericErrorContext,
6297 "malloc of %ld byte failed\n",
6298 maxatts * (long)sizeof(xmlChar *));
6299 return(NULL);
6300 }
6301 } else if (nbatts + 4 > maxatts) {
6302 maxatts *= 2;
6303 atts = (const xmlChar **) xmlRealloc((void *) atts,
6304 maxatts * sizeof(xmlChar *));
6305 if (atts == NULL) {
6306 xmlGenericError(xmlGenericErrorContext,
6307 "realloc of %ld byte failed\n",
6308 maxatts * (long)sizeof(xmlChar *));
6309 return(NULL);
6310 }
6311 }
6312 atts[nbatts++] = attname;
6313 atts[nbatts++] = attvalue;
6314 atts[nbatts] = NULL;
6315 atts[nbatts + 1] = NULL;
6316 } else {
6317 if (attname != NULL)
6318 xmlFree(attname);
6319 if (attvalue != NULL)
6320 xmlFree(attvalue);
6321 }
6322
6323failed:
6324
6325 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6326 break;
6327 if (!IS_BLANK(RAW)) {
6328 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6329 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6330 ctxt->sax->error(ctxt->userData,
6331 "attributes construct error\n");
6332 ctxt->wellFormed = 0;
6333 ctxt->disableSAX = 1;
6334 }
6335 SKIP_BLANKS;
6336 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6337 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6339 ctxt->sax->error(ctxt->userData,
6340 "xmlParseStartTag: problem parsing attributes\n");
6341 ctxt->wellFormed = 0;
6342 ctxt->disableSAX = 1;
6343 break;
6344 }
6345 GROW;
6346 }
6347
6348 /*
6349 * SAX: Start of Element !
6350 */
6351 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6352 (!ctxt->disableSAX))
6353 ctxt->sax->startElement(ctxt->userData, name, atts);
6354
6355 if (atts != NULL) {
6356 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6357 xmlFree((void *) atts);
6358 }
6359 return(name);
6360}
6361
6362/**
6363 * xmlParseEndTag:
6364 * @ctxt: an XML parser context
6365 *
6366 * parse an end of tag
6367 *
6368 * [42] ETag ::= '</' Name S? '>'
6369 *
6370 * With namespace
6371 *
6372 * [NS 9] ETag ::= '</' QName S? '>'
6373 */
6374
6375void
6376xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6377 xmlChar *name;
6378 xmlChar *oldname;
6379
6380 GROW;
6381 if ((RAW != '<') || (NXT(1) != '/')) {
6382 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6384 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6385 ctxt->wellFormed = 0;
6386 ctxt->disableSAX = 1;
6387 return;
6388 }
6389 SKIP(2);
6390
6391 name = xmlParseName(ctxt);
6392
6393 /*
6394 * We should definitely be at the ending "S? '>'" part
6395 */
6396 GROW;
6397 SKIP_BLANKS;
6398 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6399 ctxt->errNo = XML_ERR_GT_REQUIRED;
6400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6401 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6402 ctxt->wellFormed = 0;
6403 ctxt->disableSAX = 1;
6404 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006405 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006406
6407 /*
6408 * [ WFC: Element Type Match ]
6409 * The Name in an element's end-tag must match the element type in the
6410 * start-tag.
6411 *
6412 */
6413 if ((name == NULL) || (ctxt->name == NULL) ||
6414 (!xmlStrEqual(name, ctxt->name))) {
6415 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6417 if ((name != NULL) && (ctxt->name != NULL)) {
6418 ctxt->sax->error(ctxt->userData,
6419 "Opening and ending tag mismatch: %s and %s\n",
6420 ctxt->name, name);
6421 } else if (ctxt->name != NULL) {
6422 ctxt->sax->error(ctxt->userData,
6423 "Ending tag eror for: %s\n", ctxt->name);
6424 } else {
6425 ctxt->sax->error(ctxt->userData,
6426 "Ending tag error: internal error ???\n");
6427 }
6428
6429 }
6430 ctxt->wellFormed = 0;
6431 ctxt->disableSAX = 1;
6432 }
6433
6434 /*
6435 * SAX: End of Tag
6436 */
6437 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6438 (!ctxt->disableSAX))
6439 ctxt->sax->endElement(ctxt->userData, name);
6440
6441 if (name != NULL)
6442 xmlFree(name);
6443 oldname = namePop(ctxt);
6444 spacePop(ctxt);
6445 if (oldname != NULL) {
6446#ifdef DEBUG_STACK
6447 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6448#endif
6449 xmlFree(oldname);
6450 }
6451 return;
6452}
6453
6454/**
6455 * xmlParseCDSect:
6456 * @ctxt: an XML parser context
6457 *
6458 * Parse escaped pure raw content.
6459 *
6460 * [18] CDSect ::= CDStart CData CDEnd
6461 *
6462 * [19] CDStart ::= '<![CDATA['
6463 *
6464 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6465 *
6466 * [21] CDEnd ::= ']]>'
6467 */
6468void
6469xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6470 xmlChar *buf = NULL;
6471 int len = 0;
6472 int size = XML_PARSER_BUFFER_SIZE;
6473 int r, rl;
6474 int s, sl;
6475 int cur, l;
6476 int count = 0;
6477
6478 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6479 (NXT(2) == '[') && (NXT(3) == 'C') &&
6480 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6481 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6482 (NXT(8) == '[')) {
6483 SKIP(9);
6484 } else
6485 return;
6486
6487 ctxt->instate = XML_PARSER_CDATA_SECTION;
6488 r = CUR_CHAR(rl);
6489 if (!IS_CHAR(r)) {
6490 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6492 ctxt->sax->error(ctxt->userData,
6493 "CData section not finished\n");
6494 ctxt->wellFormed = 0;
6495 ctxt->disableSAX = 1;
6496 ctxt->instate = XML_PARSER_CONTENT;
6497 return;
6498 }
6499 NEXTL(rl);
6500 s = CUR_CHAR(sl);
6501 if (!IS_CHAR(s)) {
6502 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6503 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6504 ctxt->sax->error(ctxt->userData,
6505 "CData section not finished\n");
6506 ctxt->wellFormed = 0;
6507 ctxt->disableSAX = 1;
6508 ctxt->instate = XML_PARSER_CONTENT;
6509 return;
6510 }
6511 NEXTL(sl);
6512 cur = CUR_CHAR(l);
6513 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6514 if (buf == NULL) {
6515 xmlGenericError(xmlGenericErrorContext,
6516 "malloc of %d byte failed\n", size);
6517 return;
6518 }
6519 while (IS_CHAR(cur) &&
6520 ((r != ']') || (s != ']') || (cur != '>'))) {
6521 if (len + 5 >= size) {
6522 size *= 2;
6523 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6524 if (buf == NULL) {
6525 xmlGenericError(xmlGenericErrorContext,
6526 "realloc of %d byte failed\n", size);
6527 return;
6528 }
6529 }
6530 COPY_BUF(rl,buf,len,r);
6531 r = s;
6532 rl = sl;
6533 s = cur;
6534 sl = l;
6535 count++;
6536 if (count > 50) {
6537 GROW;
6538 count = 0;
6539 }
6540 NEXTL(l);
6541 cur = CUR_CHAR(l);
6542 }
6543 buf[len] = 0;
6544 ctxt->instate = XML_PARSER_CONTENT;
6545 if (cur != '>') {
6546 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6548 ctxt->sax->error(ctxt->userData,
6549 "CData section not finished\n%.50s\n", buf);
6550 ctxt->wellFormed = 0;
6551 ctxt->disableSAX = 1;
6552 xmlFree(buf);
6553 return;
6554 }
6555 NEXTL(l);
6556
6557 /*
6558 * Ok the buffer is to be consumed as cdata.
6559 */
6560 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6561 if (ctxt->sax->cdataBlock != NULL)
6562 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006563 else if (ctxt->sax->characters != NULL)
6564 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006565 }
6566 xmlFree(buf);
6567}
6568
6569/**
6570 * xmlParseContent:
6571 * @ctxt: an XML parser context
6572 *
6573 * Parse a content:
6574 *
6575 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6576 */
6577
6578void
6579xmlParseContent(xmlParserCtxtPtr ctxt) {
6580 GROW;
6581 while (((RAW != 0) || (ctxt->token != 0)) &&
6582 ((RAW != '<') || (NXT(1) != '/'))) {
6583 const xmlChar *test = CUR_PTR;
6584 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006585 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006586 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006587
6588 /*
6589 * Handle possible processed charrefs.
6590 */
6591 if (ctxt->token != 0) {
6592 xmlParseCharData(ctxt, 0);
6593 }
6594 /*
6595 * First case : a Processing Instruction.
6596 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006597 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006598 xmlParsePI(ctxt);
6599 }
6600
6601 /*
6602 * Second case : a CDSection
6603 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006604 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006605 (NXT(2) == '[') && (NXT(3) == 'C') &&
6606 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6607 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6608 (NXT(8) == '[')) {
6609 xmlParseCDSect(ctxt);
6610 }
6611
6612 /*
6613 * Third case : a comment
6614 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006615 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006616 (NXT(2) == '-') && (NXT(3) == '-')) {
6617 xmlParseComment(ctxt);
6618 ctxt->instate = XML_PARSER_CONTENT;
6619 }
6620
6621 /*
6622 * Fourth case : a sub-element.
6623 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006624 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006625 xmlParseElement(ctxt);
6626 }
6627
6628 /*
6629 * Fifth case : a reference. If if has not been resolved,
6630 * parsing returns it's Name, create the node
6631 */
6632
Daniel Veillard21a0f912001-02-25 19:54:14 +00006633 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006634 xmlParseReference(ctxt);
6635 }
6636
6637 /*
6638 * Last case, text. Note that References are handled directly.
6639 */
6640 else {
6641 xmlParseCharData(ctxt, 0);
6642 }
6643
6644 GROW;
6645 /*
6646 * Pop-up of finished entities.
6647 */
6648 while ((RAW == 0) && (ctxt->inputNr > 1))
6649 xmlPopInput(ctxt);
6650 SHRINK;
6651
6652 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6653 (tok == ctxt->token)) {
6654 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6656 ctxt->sax->error(ctxt->userData,
6657 "detected an error in element content\n");
6658 ctxt->wellFormed = 0;
6659 ctxt->disableSAX = 1;
6660 ctxt->instate = XML_PARSER_EOF;
6661 break;
6662 }
6663 }
6664}
6665
6666/**
6667 * xmlParseElement:
6668 * @ctxt: an XML parser context
6669 *
6670 * parse an XML element, this is highly recursive
6671 *
6672 * [39] element ::= EmptyElemTag | STag content ETag
6673 *
6674 * [ WFC: Element Type Match ]
6675 * The Name in an element's end-tag must match the element type in the
6676 * start-tag.
6677 *
6678 * [ VC: Element Valid ]
6679 * An element is valid if there is a declaration matching elementdecl
6680 * where the Name matches the element type and one of the following holds:
6681 * - The declaration matches EMPTY and the element has no content.
6682 * - The declaration matches children and the sequence of child elements
6683 * belongs to the language generated by the regular expression in the
6684 * content model, with optional white space (characters matching the
6685 * nonterminal S) between each pair of child elements.
6686 * - The declaration matches Mixed and the content consists of character
6687 * data and child elements whose types match names in the content model.
6688 * - The declaration matches ANY, and the types of any child elements have
6689 * been declared.
6690 */
6691
6692void
6693xmlParseElement(xmlParserCtxtPtr ctxt) {
6694 const xmlChar *openTag = CUR_PTR;
6695 xmlChar *name;
6696 xmlChar *oldname;
6697 xmlParserNodeInfo node_info;
6698 xmlNodePtr ret;
6699
6700 /* Capture start position */
6701 if (ctxt->record_info) {
6702 node_info.begin_pos = ctxt->input->consumed +
6703 (CUR_PTR - ctxt->input->base);
6704 node_info.begin_line = ctxt->input->line;
6705 }
6706
6707 if (ctxt->spaceNr == 0)
6708 spacePush(ctxt, -1);
6709 else
6710 spacePush(ctxt, *ctxt->space);
6711
6712 name = xmlParseStartTag(ctxt);
6713 if (name == NULL) {
6714 spacePop(ctxt);
6715 return;
6716 }
6717 namePush(ctxt, name);
6718 ret = ctxt->node;
6719
6720 /*
6721 * [ VC: Root Element Type ]
6722 * The Name in the document type declaration must match the element
6723 * type of the root element.
6724 */
6725 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6726 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6727 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6728
6729 /*
6730 * Check for an Empty Element.
6731 */
6732 if ((RAW == '/') && (NXT(1) == '>')) {
6733 SKIP(2);
6734 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6735 (!ctxt->disableSAX))
6736 ctxt->sax->endElement(ctxt->userData, name);
6737 oldname = namePop(ctxt);
6738 spacePop(ctxt);
6739 if (oldname != NULL) {
6740#ifdef DEBUG_STACK
6741 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6742#endif
6743 xmlFree(oldname);
6744 }
6745 if ( ret != NULL && ctxt->record_info ) {
6746 node_info.end_pos = ctxt->input->consumed +
6747 (CUR_PTR - ctxt->input->base);
6748 node_info.end_line = ctxt->input->line;
6749 node_info.node = ret;
6750 xmlParserAddNodeInfo(ctxt, &node_info);
6751 }
6752 return;
6753 }
6754 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006755 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006756 } else {
6757 ctxt->errNo = XML_ERR_GT_REQUIRED;
6758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6759 ctxt->sax->error(ctxt->userData,
6760 "Couldn't find end of Start Tag\n%.30s\n",
6761 openTag);
6762 ctxt->wellFormed = 0;
6763 ctxt->disableSAX = 1;
6764
6765 /*
6766 * end of parsing of this node.
6767 */
6768 nodePop(ctxt);
6769 oldname = namePop(ctxt);
6770 spacePop(ctxt);
6771 if (oldname != NULL) {
6772#ifdef DEBUG_STACK
6773 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6774#endif
6775 xmlFree(oldname);
6776 }
6777
6778 /*
6779 * Capture end position and add node
6780 */
6781 if ( ret != NULL && ctxt->record_info ) {
6782 node_info.end_pos = ctxt->input->consumed +
6783 (CUR_PTR - ctxt->input->base);
6784 node_info.end_line = ctxt->input->line;
6785 node_info.node = ret;
6786 xmlParserAddNodeInfo(ctxt, &node_info);
6787 }
6788 return;
6789 }
6790
6791 /*
6792 * Parse the content of the element:
6793 */
6794 xmlParseContent(ctxt);
6795 if (!IS_CHAR(RAW)) {
6796 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6798 ctxt->sax->error(ctxt->userData,
6799 "Premature end of data in tag %.30s\n", openTag);
6800 ctxt->wellFormed = 0;
6801 ctxt->disableSAX = 1;
6802
6803 /*
6804 * end of parsing of this node.
6805 */
6806 nodePop(ctxt);
6807 oldname = namePop(ctxt);
6808 spacePop(ctxt);
6809 if (oldname != NULL) {
6810#ifdef DEBUG_STACK
6811 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6812#endif
6813 xmlFree(oldname);
6814 }
6815 return;
6816 }
6817
6818 /*
6819 * parse the end of tag: '</' should be here.
6820 */
6821 xmlParseEndTag(ctxt);
6822
6823 /*
6824 * Capture end position and add node
6825 */
6826 if ( ret != NULL && ctxt->record_info ) {
6827 node_info.end_pos = ctxt->input->consumed +
6828 (CUR_PTR - ctxt->input->base);
6829 node_info.end_line = ctxt->input->line;
6830 node_info.node = ret;
6831 xmlParserAddNodeInfo(ctxt, &node_info);
6832 }
6833}
6834
6835/**
6836 * xmlParseVersionNum:
6837 * @ctxt: an XML parser context
6838 *
6839 * parse the XML version value.
6840 *
6841 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6842 *
6843 * Returns the string giving the XML version number, or NULL
6844 */
6845xmlChar *
6846xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6847 xmlChar *buf = NULL;
6848 int len = 0;
6849 int size = 10;
6850 xmlChar cur;
6851
6852 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6853 if (buf == NULL) {
6854 xmlGenericError(xmlGenericErrorContext,
6855 "malloc of %d byte failed\n", size);
6856 return(NULL);
6857 }
6858 cur = CUR;
6859 while (((cur >= 'a') && (cur <= 'z')) ||
6860 ((cur >= 'A') && (cur <= 'Z')) ||
6861 ((cur >= '0') && (cur <= '9')) ||
6862 (cur == '_') || (cur == '.') ||
6863 (cur == ':') || (cur == '-')) {
6864 if (len + 1 >= size) {
6865 size *= 2;
6866 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6867 if (buf == NULL) {
6868 xmlGenericError(xmlGenericErrorContext,
6869 "realloc of %d byte failed\n", size);
6870 return(NULL);
6871 }
6872 }
6873 buf[len++] = cur;
6874 NEXT;
6875 cur=CUR;
6876 }
6877 buf[len] = 0;
6878 return(buf);
6879}
6880
6881/**
6882 * xmlParseVersionInfo:
6883 * @ctxt: an XML parser context
6884 *
6885 * parse the XML version.
6886 *
6887 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6888 *
6889 * [25] Eq ::= S? '=' S?
6890 *
6891 * Returns the version string, e.g. "1.0"
6892 */
6893
6894xmlChar *
6895xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6896 xmlChar *version = NULL;
6897 const xmlChar *q;
6898
6899 if ((RAW == 'v') && (NXT(1) == 'e') &&
6900 (NXT(2) == 'r') && (NXT(3) == 's') &&
6901 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6902 (NXT(6) == 'n')) {
6903 SKIP(7);
6904 SKIP_BLANKS;
6905 if (RAW != '=') {
6906 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6908 ctxt->sax->error(ctxt->userData,
6909 "xmlParseVersionInfo : expected '='\n");
6910 ctxt->wellFormed = 0;
6911 ctxt->disableSAX = 1;
6912 return(NULL);
6913 }
6914 NEXT;
6915 SKIP_BLANKS;
6916 if (RAW == '"') {
6917 NEXT;
6918 q = CUR_PTR;
6919 version = xmlParseVersionNum(ctxt);
6920 if (RAW != '"') {
6921 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6923 ctxt->sax->error(ctxt->userData,
6924 "String not closed\n%.50s\n", q);
6925 ctxt->wellFormed = 0;
6926 ctxt->disableSAX = 1;
6927 } else
6928 NEXT;
6929 } else if (RAW == '\''){
6930 NEXT;
6931 q = CUR_PTR;
6932 version = xmlParseVersionNum(ctxt);
6933 if (RAW != '\'') {
6934 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6936 ctxt->sax->error(ctxt->userData,
6937 "String not closed\n%.50s\n", q);
6938 ctxt->wellFormed = 0;
6939 ctxt->disableSAX = 1;
6940 } else
6941 NEXT;
6942 } else {
6943 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6945 ctxt->sax->error(ctxt->userData,
6946 "xmlParseVersionInfo : expected ' or \"\n");
6947 ctxt->wellFormed = 0;
6948 ctxt->disableSAX = 1;
6949 }
6950 }
6951 return(version);
6952}
6953
6954/**
6955 * xmlParseEncName:
6956 * @ctxt: an XML parser context
6957 *
6958 * parse the XML encoding name
6959 *
6960 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6961 *
6962 * Returns the encoding name value or NULL
6963 */
6964xmlChar *
6965xmlParseEncName(xmlParserCtxtPtr ctxt) {
6966 xmlChar *buf = NULL;
6967 int len = 0;
6968 int size = 10;
6969 xmlChar cur;
6970
6971 cur = CUR;
6972 if (((cur >= 'a') && (cur <= 'z')) ||
6973 ((cur >= 'A') && (cur <= 'Z'))) {
6974 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6975 if (buf == NULL) {
6976 xmlGenericError(xmlGenericErrorContext,
6977 "malloc of %d byte failed\n", size);
6978 return(NULL);
6979 }
6980
6981 buf[len++] = cur;
6982 NEXT;
6983 cur = CUR;
6984 while (((cur >= 'a') && (cur <= 'z')) ||
6985 ((cur >= 'A') && (cur <= 'Z')) ||
6986 ((cur >= '0') && (cur <= '9')) ||
6987 (cur == '.') || (cur == '_') ||
6988 (cur == '-')) {
6989 if (len + 1 >= size) {
6990 size *= 2;
6991 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6992 if (buf == NULL) {
6993 xmlGenericError(xmlGenericErrorContext,
6994 "realloc of %d byte failed\n", size);
6995 return(NULL);
6996 }
6997 }
6998 buf[len++] = cur;
6999 NEXT;
7000 cur = CUR;
7001 if (cur == 0) {
7002 SHRINK;
7003 GROW;
7004 cur = CUR;
7005 }
7006 }
7007 buf[len] = 0;
7008 } else {
7009 ctxt->errNo = XML_ERR_ENCODING_NAME;
7010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7011 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7012 ctxt->wellFormed = 0;
7013 ctxt->disableSAX = 1;
7014 }
7015 return(buf);
7016}
7017
7018/**
7019 * xmlParseEncodingDecl:
7020 * @ctxt: an XML parser context
7021 *
7022 * parse the XML encoding declaration
7023 *
7024 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7025 *
7026 * this setups the conversion filters.
7027 *
7028 * Returns the encoding value or NULL
7029 */
7030
7031xmlChar *
7032xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7033 xmlChar *encoding = NULL;
7034 const xmlChar *q;
7035
7036 SKIP_BLANKS;
7037 if ((RAW == 'e') && (NXT(1) == 'n') &&
7038 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7039 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7040 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7041 SKIP(8);
7042 SKIP_BLANKS;
7043 if (RAW != '=') {
7044 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7046 ctxt->sax->error(ctxt->userData,
7047 "xmlParseEncodingDecl : expected '='\n");
7048 ctxt->wellFormed = 0;
7049 ctxt->disableSAX = 1;
7050 return(NULL);
7051 }
7052 NEXT;
7053 SKIP_BLANKS;
7054 if (RAW == '"') {
7055 NEXT;
7056 q = CUR_PTR;
7057 encoding = xmlParseEncName(ctxt);
7058 if (RAW != '"') {
7059 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7061 ctxt->sax->error(ctxt->userData,
7062 "String not closed\n%.50s\n", q);
7063 ctxt->wellFormed = 0;
7064 ctxt->disableSAX = 1;
7065 } else
7066 NEXT;
7067 } else if (RAW == '\''){
7068 NEXT;
7069 q = CUR_PTR;
7070 encoding = xmlParseEncName(ctxt);
7071 if (RAW != '\'') {
7072 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7074 ctxt->sax->error(ctxt->userData,
7075 "String not closed\n%.50s\n", q);
7076 ctxt->wellFormed = 0;
7077 ctxt->disableSAX = 1;
7078 } else
7079 NEXT;
7080 } else if (RAW == '"'){
7081 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7082 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7083 ctxt->sax->error(ctxt->userData,
7084 "xmlParseEncodingDecl : expected ' or \"\n");
7085 ctxt->wellFormed = 0;
7086 ctxt->disableSAX = 1;
7087 }
7088 if (encoding != NULL) {
7089 xmlCharEncoding enc;
7090 xmlCharEncodingHandlerPtr handler;
7091
7092 if (ctxt->input->encoding != NULL)
7093 xmlFree((xmlChar *) ctxt->input->encoding);
7094 ctxt->input->encoding = encoding;
7095
7096 enc = xmlParseCharEncoding((const char *) encoding);
7097 /*
7098 * registered set of known encodings
7099 */
7100 if (enc != XML_CHAR_ENCODING_ERROR) {
7101 xmlSwitchEncoding(ctxt, enc);
7102 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7103 xmlFree(encoding);
7104 return(NULL);
7105 }
7106 } else {
7107 /*
7108 * fallback for unknown encodings
7109 */
7110 handler = xmlFindCharEncodingHandler((const char *) encoding);
7111 if (handler != NULL) {
7112 xmlSwitchToEncoding(ctxt, handler);
7113 } else {
7114 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7115 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7116 ctxt->sax->error(ctxt->userData,
7117 "Unsupported encoding %s\n", encoding);
7118 return(NULL);
7119 }
7120 }
7121 }
7122 }
7123 return(encoding);
7124}
7125
7126/**
7127 * xmlParseSDDecl:
7128 * @ctxt: an XML parser context
7129 *
7130 * parse the XML standalone declaration
7131 *
7132 * [32] SDDecl ::= S 'standalone' Eq
7133 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7134 *
7135 * [ VC: Standalone Document Declaration ]
7136 * TODO The standalone document declaration must have the value "no"
7137 * if any external markup declarations contain declarations of:
7138 * - attributes with default values, if elements to which these
7139 * attributes apply appear in the document without specifications
7140 * of values for these attributes, or
7141 * - entities (other than amp, lt, gt, apos, quot), if references
7142 * to those entities appear in the document, or
7143 * - attributes with values subject to normalization, where the
7144 * attribute appears in the document with a value which will change
7145 * as a result of normalization, or
7146 * - element types with element content, if white space occurs directly
7147 * within any instance of those types.
7148 *
7149 * Returns 1 if standalone, 0 otherwise
7150 */
7151
7152int
7153xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7154 int standalone = -1;
7155
7156 SKIP_BLANKS;
7157 if ((RAW == 's') && (NXT(1) == 't') &&
7158 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7159 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7160 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7161 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7162 SKIP(10);
7163 SKIP_BLANKS;
7164 if (RAW != '=') {
7165 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7167 ctxt->sax->error(ctxt->userData,
7168 "XML standalone declaration : expected '='\n");
7169 ctxt->wellFormed = 0;
7170 ctxt->disableSAX = 1;
7171 return(standalone);
7172 }
7173 NEXT;
7174 SKIP_BLANKS;
7175 if (RAW == '\''){
7176 NEXT;
7177 if ((RAW == 'n') && (NXT(1) == 'o')) {
7178 standalone = 0;
7179 SKIP(2);
7180 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7181 (NXT(2) == 's')) {
7182 standalone = 1;
7183 SKIP(3);
7184 } else {
7185 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7187 ctxt->sax->error(ctxt->userData,
7188 "standalone accepts only 'yes' or 'no'\n");
7189 ctxt->wellFormed = 0;
7190 ctxt->disableSAX = 1;
7191 }
7192 if (RAW != '\'') {
7193 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7194 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7195 ctxt->sax->error(ctxt->userData, "String not closed\n");
7196 ctxt->wellFormed = 0;
7197 ctxt->disableSAX = 1;
7198 } else
7199 NEXT;
7200 } else if (RAW == '"'){
7201 NEXT;
7202 if ((RAW == 'n') && (NXT(1) == 'o')) {
7203 standalone = 0;
7204 SKIP(2);
7205 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7206 (NXT(2) == 's')) {
7207 standalone = 1;
7208 SKIP(3);
7209 } else {
7210 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7212 ctxt->sax->error(ctxt->userData,
7213 "standalone accepts only 'yes' or 'no'\n");
7214 ctxt->wellFormed = 0;
7215 ctxt->disableSAX = 1;
7216 }
7217 if (RAW != '"') {
7218 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7220 ctxt->sax->error(ctxt->userData, "String not closed\n");
7221 ctxt->wellFormed = 0;
7222 ctxt->disableSAX = 1;
7223 } else
7224 NEXT;
7225 } else {
7226 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7228 ctxt->sax->error(ctxt->userData,
7229 "Standalone value not found\n");
7230 ctxt->wellFormed = 0;
7231 ctxt->disableSAX = 1;
7232 }
7233 }
7234 return(standalone);
7235}
7236
7237/**
7238 * xmlParseXMLDecl:
7239 * @ctxt: an XML parser context
7240 *
7241 * parse an XML declaration header
7242 *
7243 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7244 */
7245
7246void
7247xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7248 xmlChar *version;
7249
7250 /*
7251 * We know that '<?xml' is here.
7252 */
7253 SKIP(5);
7254
7255 if (!IS_BLANK(RAW)) {
7256 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7257 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7258 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7259 ctxt->wellFormed = 0;
7260 ctxt->disableSAX = 1;
7261 }
7262 SKIP_BLANKS;
7263
7264 /*
7265 * We should have the VersionInfo here.
7266 */
7267 version = xmlParseVersionInfo(ctxt);
7268 if (version == NULL)
7269 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillarda050d232001-09-05 15:51:05 +00007270 else if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7271 /*
7272 * TODO: Blueberry should be detected here
7273 */
7274 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7275 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7276 version);
7277 }
Owen Taylor3473f882001-02-23 17:55:21 +00007278 ctxt->version = xmlStrdup(version);
7279 xmlFree(version);
7280
7281 /*
7282 * We may have the encoding declaration
7283 */
7284 if (!IS_BLANK(RAW)) {
7285 if ((RAW == '?') && (NXT(1) == '>')) {
7286 SKIP(2);
7287 return;
7288 }
7289 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7290 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7291 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7292 ctxt->wellFormed = 0;
7293 ctxt->disableSAX = 1;
7294 }
7295 xmlParseEncodingDecl(ctxt);
7296 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7297 /*
7298 * The XML REC instructs us to stop parsing right here
7299 */
7300 return;
7301 }
7302
7303 /*
7304 * We may have the standalone status.
7305 */
7306 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7307 if ((RAW == '?') && (NXT(1) == '>')) {
7308 SKIP(2);
7309 return;
7310 }
7311 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7312 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7313 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7314 ctxt->wellFormed = 0;
7315 ctxt->disableSAX = 1;
7316 }
7317 SKIP_BLANKS;
7318 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7319
7320 SKIP_BLANKS;
7321 if ((RAW == '?') && (NXT(1) == '>')) {
7322 SKIP(2);
7323 } else if (RAW == '>') {
7324 /* Deprecated old WD ... */
7325 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7327 ctxt->sax->error(ctxt->userData,
7328 "XML declaration must end-up with '?>'\n");
7329 ctxt->wellFormed = 0;
7330 ctxt->disableSAX = 1;
7331 NEXT;
7332 } else {
7333 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7335 ctxt->sax->error(ctxt->userData,
7336 "parsing XML declaration: '?>' expected\n");
7337 ctxt->wellFormed = 0;
7338 ctxt->disableSAX = 1;
7339 MOVETO_ENDTAG(CUR_PTR);
7340 NEXT;
7341 }
7342}
7343
7344/**
7345 * xmlParseMisc:
7346 * @ctxt: an XML parser context
7347 *
7348 * parse an XML Misc* optionnal field.
7349 *
7350 * [27] Misc ::= Comment | PI | S
7351 */
7352
7353void
7354xmlParseMisc(xmlParserCtxtPtr ctxt) {
7355 while (((RAW == '<') && (NXT(1) == '?')) ||
7356 ((RAW == '<') && (NXT(1) == '!') &&
7357 (NXT(2) == '-') && (NXT(3) == '-')) ||
7358 IS_BLANK(CUR)) {
7359 if ((RAW == '<') && (NXT(1) == '?')) {
7360 xmlParsePI(ctxt);
7361 } else if (IS_BLANK(CUR)) {
7362 NEXT;
7363 } else
7364 xmlParseComment(ctxt);
7365 }
7366}
7367
7368/**
7369 * xmlParseDocument:
7370 * @ctxt: an XML parser context
7371 *
7372 * parse an XML document (and build a tree if using the standard SAX
7373 * interface).
7374 *
7375 * [1] document ::= prolog element Misc*
7376 *
7377 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7378 *
7379 * Returns 0, -1 in case of error. the parser context is augmented
7380 * as a result of the parsing.
7381 */
7382
7383int
7384xmlParseDocument(xmlParserCtxtPtr ctxt) {
7385 xmlChar start[4];
7386 xmlCharEncoding enc;
7387
7388 xmlInitParser();
7389
7390 GROW;
7391
7392 /*
7393 * SAX: beginning of the document processing.
7394 */
7395 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7396 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7397
Daniel Veillard50f34372001-08-03 12:06:36 +00007398 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007399 /*
7400 * Get the 4 first bytes and decode the charset
7401 * if enc != XML_CHAR_ENCODING_NONE
7402 * plug some encoding conversion routines.
7403 */
7404 start[0] = RAW;
7405 start[1] = NXT(1);
7406 start[2] = NXT(2);
7407 start[3] = NXT(3);
7408 enc = xmlDetectCharEncoding(start, 4);
7409 if (enc != XML_CHAR_ENCODING_NONE) {
7410 xmlSwitchEncoding(ctxt, enc);
7411 }
Owen Taylor3473f882001-02-23 17:55:21 +00007412 }
7413
7414
7415 if (CUR == 0) {
7416 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7417 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7418 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7419 ctxt->wellFormed = 0;
7420 ctxt->disableSAX = 1;
7421 }
7422
7423 /*
7424 * Check for the XMLDecl in the Prolog.
7425 */
7426 GROW;
7427 if ((RAW == '<') && (NXT(1) == '?') &&
7428 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7429 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7430
7431 /*
7432 * Note that we will switch encoding on the fly.
7433 */
7434 xmlParseXMLDecl(ctxt);
7435 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7436 /*
7437 * The XML REC instructs us to stop parsing right here
7438 */
7439 return(-1);
7440 }
7441 ctxt->standalone = ctxt->input->standalone;
7442 SKIP_BLANKS;
7443 } else {
7444 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7445 }
7446 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7447 ctxt->sax->startDocument(ctxt->userData);
7448
7449 /*
7450 * The Misc part of the Prolog
7451 */
7452 GROW;
7453 xmlParseMisc(ctxt);
7454
7455 /*
7456 * Then possibly doc type declaration(s) and more Misc
7457 * (doctypedecl Misc*)?
7458 */
7459 GROW;
7460 if ((RAW == '<') && (NXT(1) == '!') &&
7461 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7462 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7463 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7464 (NXT(8) == 'E')) {
7465
7466 ctxt->inSubset = 1;
7467 xmlParseDocTypeDecl(ctxt);
7468 if (RAW == '[') {
7469 ctxt->instate = XML_PARSER_DTD;
7470 xmlParseInternalSubset(ctxt);
7471 }
7472
7473 /*
7474 * Create and update the external subset.
7475 */
7476 ctxt->inSubset = 2;
7477 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7478 (!ctxt->disableSAX))
7479 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7480 ctxt->extSubSystem, ctxt->extSubURI);
7481 ctxt->inSubset = 0;
7482
7483
7484 ctxt->instate = XML_PARSER_PROLOG;
7485 xmlParseMisc(ctxt);
7486 }
7487
7488 /*
7489 * Time to start parsing the tree itself
7490 */
7491 GROW;
7492 if (RAW != '<') {
7493 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7495 ctxt->sax->error(ctxt->userData,
7496 "Start tag expected, '<' not found\n");
7497 ctxt->wellFormed = 0;
7498 ctxt->disableSAX = 1;
7499 ctxt->instate = XML_PARSER_EOF;
7500 } else {
7501 ctxt->instate = XML_PARSER_CONTENT;
7502 xmlParseElement(ctxt);
7503 ctxt->instate = XML_PARSER_EPILOG;
7504
7505
7506 /*
7507 * The Misc part at the end
7508 */
7509 xmlParseMisc(ctxt);
7510
7511 if (RAW != 0) {
7512 ctxt->errNo = XML_ERR_DOCUMENT_END;
7513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7514 ctxt->sax->error(ctxt->userData,
7515 "Extra content at the end of the document\n");
7516 ctxt->wellFormed = 0;
7517 ctxt->disableSAX = 1;
7518 }
7519 ctxt->instate = XML_PARSER_EOF;
7520 }
7521
7522 /*
7523 * SAX: end of the document processing.
7524 */
7525 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7526 (!ctxt->disableSAX))
7527 ctxt->sax->endDocument(ctxt->userData);
7528
7529 if (! ctxt->wellFormed) return(-1);
7530 return(0);
7531}
7532
7533/**
7534 * xmlParseExtParsedEnt:
7535 * @ctxt: an XML parser context
7536 *
7537 * parse a genreral parsed entity
7538 * An external general parsed entity is well-formed if it matches the
7539 * production labeled extParsedEnt.
7540 *
7541 * [78] extParsedEnt ::= TextDecl? content
7542 *
7543 * Returns 0, -1 in case of error. the parser context is augmented
7544 * as a result of the parsing.
7545 */
7546
7547int
7548xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7549 xmlChar start[4];
7550 xmlCharEncoding enc;
7551
7552 xmlDefaultSAXHandlerInit();
7553
7554 GROW;
7555
7556 /*
7557 * SAX: beginning of the document processing.
7558 */
7559 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7560 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7561
7562 /*
7563 * Get the 4 first bytes and decode the charset
7564 * if enc != XML_CHAR_ENCODING_NONE
7565 * plug some encoding conversion routines.
7566 */
7567 start[0] = RAW;
7568 start[1] = NXT(1);
7569 start[2] = NXT(2);
7570 start[3] = NXT(3);
7571 enc = xmlDetectCharEncoding(start, 4);
7572 if (enc != XML_CHAR_ENCODING_NONE) {
7573 xmlSwitchEncoding(ctxt, enc);
7574 }
7575
7576
7577 if (CUR == 0) {
7578 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7579 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7580 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7581 ctxt->wellFormed = 0;
7582 ctxt->disableSAX = 1;
7583 }
7584
7585 /*
7586 * Check for the XMLDecl in the Prolog.
7587 */
7588 GROW;
7589 if ((RAW == '<') && (NXT(1) == '?') &&
7590 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7591 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7592
7593 /*
7594 * Note that we will switch encoding on the fly.
7595 */
7596 xmlParseXMLDecl(ctxt);
7597 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7598 /*
7599 * The XML REC instructs us to stop parsing right here
7600 */
7601 return(-1);
7602 }
7603 SKIP_BLANKS;
7604 } else {
7605 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7606 }
7607 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7608 ctxt->sax->startDocument(ctxt->userData);
7609
7610 /*
7611 * Doing validity checking on chunk doesn't make sense
7612 */
7613 ctxt->instate = XML_PARSER_CONTENT;
7614 ctxt->validate = 0;
7615 ctxt->loadsubset = 0;
7616 ctxt->depth = 0;
7617
7618 xmlParseContent(ctxt);
7619
7620 if ((RAW == '<') && (NXT(1) == '/')) {
7621 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7623 ctxt->sax->error(ctxt->userData,
7624 "chunk is not well balanced\n");
7625 ctxt->wellFormed = 0;
7626 ctxt->disableSAX = 1;
7627 } else if (RAW != 0) {
7628 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7630 ctxt->sax->error(ctxt->userData,
7631 "extra content at the end of well balanced chunk\n");
7632 ctxt->wellFormed = 0;
7633 ctxt->disableSAX = 1;
7634 }
7635
7636 /*
7637 * SAX: end of the document processing.
7638 */
7639 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7640 (!ctxt->disableSAX))
7641 ctxt->sax->endDocument(ctxt->userData);
7642
7643 if (! ctxt->wellFormed) return(-1);
7644 return(0);
7645}
7646
7647/************************************************************************
7648 * *
7649 * Progressive parsing interfaces *
7650 * *
7651 ************************************************************************/
7652
7653/**
7654 * xmlParseLookupSequence:
7655 * @ctxt: an XML parser context
7656 * @first: the first char to lookup
7657 * @next: the next char to lookup or zero
7658 * @third: the next char to lookup or zero
7659 *
7660 * Try to find if a sequence (first, next, third) or just (first next) or
7661 * (first) is available in the input stream.
7662 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7663 * to avoid rescanning sequences of bytes, it DOES change the state of the
7664 * parser, do not use liberally.
7665 *
7666 * Returns the index to the current parsing point if the full sequence
7667 * is available, -1 otherwise.
7668 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007669static int
Owen Taylor3473f882001-02-23 17:55:21 +00007670xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7671 xmlChar next, xmlChar third) {
7672 int base, len;
7673 xmlParserInputPtr in;
7674 const xmlChar *buf;
7675
7676 in = ctxt->input;
7677 if (in == NULL) return(-1);
7678 base = in->cur - in->base;
7679 if (base < 0) return(-1);
7680 if (ctxt->checkIndex > base)
7681 base = ctxt->checkIndex;
7682 if (in->buf == NULL) {
7683 buf = in->base;
7684 len = in->length;
7685 } else {
7686 buf = in->buf->buffer->content;
7687 len = in->buf->buffer->use;
7688 }
7689 /* take into account the sequence length */
7690 if (third) len -= 2;
7691 else if (next) len --;
7692 for (;base < len;base++) {
7693 if (buf[base] == first) {
7694 if (third != 0) {
7695 if ((buf[base + 1] != next) ||
7696 (buf[base + 2] != third)) continue;
7697 } else if (next != 0) {
7698 if (buf[base + 1] != next) continue;
7699 }
7700 ctxt->checkIndex = 0;
7701#ifdef DEBUG_PUSH
7702 if (next == 0)
7703 xmlGenericError(xmlGenericErrorContext,
7704 "PP: lookup '%c' found at %d\n",
7705 first, base);
7706 else if (third == 0)
7707 xmlGenericError(xmlGenericErrorContext,
7708 "PP: lookup '%c%c' found at %d\n",
7709 first, next, base);
7710 else
7711 xmlGenericError(xmlGenericErrorContext,
7712 "PP: lookup '%c%c%c' found at %d\n",
7713 first, next, third, base);
7714#endif
7715 return(base - (in->cur - in->base));
7716 }
7717 }
7718 ctxt->checkIndex = base;
7719#ifdef DEBUG_PUSH
7720 if (next == 0)
7721 xmlGenericError(xmlGenericErrorContext,
7722 "PP: lookup '%c' failed\n", first);
7723 else if (third == 0)
7724 xmlGenericError(xmlGenericErrorContext,
7725 "PP: lookup '%c%c' failed\n", first, next);
7726 else
7727 xmlGenericError(xmlGenericErrorContext,
7728 "PP: lookup '%c%c%c' failed\n", first, next, third);
7729#endif
7730 return(-1);
7731}
7732
7733/**
7734 * xmlParseTryOrFinish:
7735 * @ctxt: an XML parser context
7736 * @terminate: last chunk indicator
7737 *
7738 * Try to progress on parsing
7739 *
7740 * Returns zero if no parsing was possible
7741 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007742static int
Owen Taylor3473f882001-02-23 17:55:21 +00007743xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7744 int ret = 0;
7745 int avail;
7746 xmlChar cur, next;
7747
7748#ifdef DEBUG_PUSH
7749 switch (ctxt->instate) {
7750 case XML_PARSER_EOF:
7751 xmlGenericError(xmlGenericErrorContext,
7752 "PP: try EOF\n"); break;
7753 case XML_PARSER_START:
7754 xmlGenericError(xmlGenericErrorContext,
7755 "PP: try START\n"); break;
7756 case XML_PARSER_MISC:
7757 xmlGenericError(xmlGenericErrorContext,
7758 "PP: try MISC\n");break;
7759 case XML_PARSER_COMMENT:
7760 xmlGenericError(xmlGenericErrorContext,
7761 "PP: try COMMENT\n");break;
7762 case XML_PARSER_PROLOG:
7763 xmlGenericError(xmlGenericErrorContext,
7764 "PP: try PROLOG\n");break;
7765 case XML_PARSER_START_TAG:
7766 xmlGenericError(xmlGenericErrorContext,
7767 "PP: try START_TAG\n");break;
7768 case XML_PARSER_CONTENT:
7769 xmlGenericError(xmlGenericErrorContext,
7770 "PP: try CONTENT\n");break;
7771 case XML_PARSER_CDATA_SECTION:
7772 xmlGenericError(xmlGenericErrorContext,
7773 "PP: try CDATA_SECTION\n");break;
7774 case XML_PARSER_END_TAG:
7775 xmlGenericError(xmlGenericErrorContext,
7776 "PP: try END_TAG\n");break;
7777 case XML_PARSER_ENTITY_DECL:
7778 xmlGenericError(xmlGenericErrorContext,
7779 "PP: try ENTITY_DECL\n");break;
7780 case XML_PARSER_ENTITY_VALUE:
7781 xmlGenericError(xmlGenericErrorContext,
7782 "PP: try ENTITY_VALUE\n");break;
7783 case XML_PARSER_ATTRIBUTE_VALUE:
7784 xmlGenericError(xmlGenericErrorContext,
7785 "PP: try ATTRIBUTE_VALUE\n");break;
7786 case XML_PARSER_DTD:
7787 xmlGenericError(xmlGenericErrorContext,
7788 "PP: try DTD\n");break;
7789 case XML_PARSER_EPILOG:
7790 xmlGenericError(xmlGenericErrorContext,
7791 "PP: try EPILOG\n");break;
7792 case XML_PARSER_PI:
7793 xmlGenericError(xmlGenericErrorContext,
7794 "PP: try PI\n");break;
7795 case XML_PARSER_IGNORE:
7796 xmlGenericError(xmlGenericErrorContext,
7797 "PP: try IGNORE\n");break;
7798 }
7799#endif
7800
7801 while (1) {
7802 /*
7803 * Pop-up of finished entities.
7804 */
7805 while ((RAW == 0) && (ctxt->inputNr > 1))
7806 xmlPopInput(ctxt);
7807
7808 if (ctxt->input ==NULL) break;
7809 if (ctxt->input->buf == NULL)
7810 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7811 else
7812 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7813 if (avail < 1)
7814 goto done;
7815 switch (ctxt->instate) {
7816 case XML_PARSER_EOF:
7817 /*
7818 * Document parsing is done !
7819 */
7820 goto done;
7821 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007822 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7823 xmlChar start[4];
7824 xmlCharEncoding enc;
7825
7826 /*
7827 * Very first chars read from the document flow.
7828 */
7829 if (avail < 4)
7830 goto done;
7831
7832 /*
7833 * Get the 4 first bytes and decode the charset
7834 * if enc != XML_CHAR_ENCODING_NONE
7835 * plug some encoding conversion routines.
7836 */
7837 start[0] = RAW;
7838 start[1] = NXT(1);
7839 start[2] = NXT(2);
7840 start[3] = NXT(3);
7841 enc = xmlDetectCharEncoding(start, 4);
7842 if (enc != XML_CHAR_ENCODING_NONE) {
7843 xmlSwitchEncoding(ctxt, enc);
7844 }
7845 break;
7846 }
Owen Taylor3473f882001-02-23 17:55:21 +00007847
7848 cur = ctxt->input->cur[0];
7849 next = ctxt->input->cur[1];
7850 if (cur == 0) {
7851 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7852 ctxt->sax->setDocumentLocator(ctxt->userData,
7853 &xmlDefaultSAXLocator);
7854 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7856 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7857 ctxt->wellFormed = 0;
7858 ctxt->disableSAX = 1;
7859 ctxt->instate = XML_PARSER_EOF;
7860#ifdef DEBUG_PUSH
7861 xmlGenericError(xmlGenericErrorContext,
7862 "PP: entering EOF\n");
7863#endif
7864 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7865 ctxt->sax->endDocument(ctxt->userData);
7866 goto done;
7867 }
7868 if ((cur == '<') && (next == '?')) {
7869 /* PI or XML decl */
7870 if (avail < 5) return(ret);
7871 if ((!terminate) &&
7872 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7873 return(ret);
7874 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7875 ctxt->sax->setDocumentLocator(ctxt->userData,
7876 &xmlDefaultSAXLocator);
7877 if ((ctxt->input->cur[2] == 'x') &&
7878 (ctxt->input->cur[3] == 'm') &&
7879 (ctxt->input->cur[4] == 'l') &&
7880 (IS_BLANK(ctxt->input->cur[5]))) {
7881 ret += 5;
7882#ifdef DEBUG_PUSH
7883 xmlGenericError(xmlGenericErrorContext,
7884 "PP: Parsing XML Decl\n");
7885#endif
7886 xmlParseXMLDecl(ctxt);
7887 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7888 /*
7889 * The XML REC instructs us to stop parsing right
7890 * here
7891 */
7892 ctxt->instate = XML_PARSER_EOF;
7893 return(0);
7894 }
7895 ctxt->standalone = ctxt->input->standalone;
7896 if ((ctxt->encoding == NULL) &&
7897 (ctxt->input->encoding != NULL))
7898 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7899 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7900 (!ctxt->disableSAX))
7901 ctxt->sax->startDocument(ctxt->userData);
7902 ctxt->instate = XML_PARSER_MISC;
7903#ifdef DEBUG_PUSH
7904 xmlGenericError(xmlGenericErrorContext,
7905 "PP: entering MISC\n");
7906#endif
7907 } else {
7908 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7909 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7910 (!ctxt->disableSAX))
7911 ctxt->sax->startDocument(ctxt->userData);
7912 ctxt->instate = XML_PARSER_MISC;
7913#ifdef DEBUG_PUSH
7914 xmlGenericError(xmlGenericErrorContext,
7915 "PP: entering MISC\n");
7916#endif
7917 }
7918 } else {
7919 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7920 ctxt->sax->setDocumentLocator(ctxt->userData,
7921 &xmlDefaultSAXLocator);
7922 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7923 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7924 (!ctxt->disableSAX))
7925 ctxt->sax->startDocument(ctxt->userData);
7926 ctxt->instate = XML_PARSER_MISC;
7927#ifdef DEBUG_PUSH
7928 xmlGenericError(xmlGenericErrorContext,
7929 "PP: entering MISC\n");
7930#endif
7931 }
7932 break;
7933 case XML_PARSER_MISC:
7934 SKIP_BLANKS;
7935 if (ctxt->input->buf == NULL)
7936 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7937 else
7938 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7939 if (avail < 2)
7940 goto done;
7941 cur = ctxt->input->cur[0];
7942 next = ctxt->input->cur[1];
7943 if ((cur == '<') && (next == '?')) {
7944 if ((!terminate) &&
7945 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7946 goto done;
7947#ifdef DEBUG_PUSH
7948 xmlGenericError(xmlGenericErrorContext,
7949 "PP: Parsing PI\n");
7950#endif
7951 xmlParsePI(ctxt);
7952 } else if ((cur == '<') && (next == '!') &&
7953 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7954 if ((!terminate) &&
7955 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7956 goto done;
7957#ifdef DEBUG_PUSH
7958 xmlGenericError(xmlGenericErrorContext,
7959 "PP: Parsing Comment\n");
7960#endif
7961 xmlParseComment(ctxt);
7962 ctxt->instate = XML_PARSER_MISC;
7963 } else if ((cur == '<') && (next == '!') &&
7964 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7965 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7966 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7967 (ctxt->input->cur[8] == 'E')) {
7968 if ((!terminate) &&
7969 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7970 goto done;
7971#ifdef DEBUG_PUSH
7972 xmlGenericError(xmlGenericErrorContext,
7973 "PP: Parsing internal subset\n");
7974#endif
7975 ctxt->inSubset = 1;
7976 xmlParseDocTypeDecl(ctxt);
7977 if (RAW == '[') {
7978 ctxt->instate = XML_PARSER_DTD;
7979#ifdef DEBUG_PUSH
7980 xmlGenericError(xmlGenericErrorContext,
7981 "PP: entering DTD\n");
7982#endif
7983 } else {
7984 /*
7985 * Create and update the external subset.
7986 */
7987 ctxt->inSubset = 2;
7988 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7989 (ctxt->sax->externalSubset != NULL))
7990 ctxt->sax->externalSubset(ctxt->userData,
7991 ctxt->intSubName, ctxt->extSubSystem,
7992 ctxt->extSubURI);
7993 ctxt->inSubset = 0;
7994 ctxt->instate = XML_PARSER_PROLOG;
7995#ifdef DEBUG_PUSH
7996 xmlGenericError(xmlGenericErrorContext,
7997 "PP: entering PROLOG\n");
7998#endif
7999 }
8000 } else if ((cur == '<') && (next == '!') &&
8001 (avail < 9)) {
8002 goto done;
8003 } else {
8004 ctxt->instate = XML_PARSER_START_TAG;
8005#ifdef DEBUG_PUSH
8006 xmlGenericError(xmlGenericErrorContext,
8007 "PP: entering START_TAG\n");
8008#endif
8009 }
8010 break;
8011 case XML_PARSER_IGNORE:
8012 xmlGenericError(xmlGenericErrorContext,
8013 "PP: internal error, state == IGNORE");
8014 ctxt->instate = XML_PARSER_DTD;
8015#ifdef DEBUG_PUSH
8016 xmlGenericError(xmlGenericErrorContext,
8017 "PP: entering DTD\n");
8018#endif
8019 break;
8020 case XML_PARSER_PROLOG:
8021 SKIP_BLANKS;
8022 if (ctxt->input->buf == NULL)
8023 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8024 else
8025 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8026 if (avail < 2)
8027 goto done;
8028 cur = ctxt->input->cur[0];
8029 next = ctxt->input->cur[1];
8030 if ((cur == '<') && (next == '?')) {
8031 if ((!terminate) &&
8032 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8033 goto done;
8034#ifdef DEBUG_PUSH
8035 xmlGenericError(xmlGenericErrorContext,
8036 "PP: Parsing PI\n");
8037#endif
8038 xmlParsePI(ctxt);
8039 } else if ((cur == '<') && (next == '!') &&
8040 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8041 if ((!terminate) &&
8042 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8043 goto done;
8044#ifdef DEBUG_PUSH
8045 xmlGenericError(xmlGenericErrorContext,
8046 "PP: Parsing Comment\n");
8047#endif
8048 xmlParseComment(ctxt);
8049 ctxt->instate = XML_PARSER_PROLOG;
8050 } else if ((cur == '<') && (next == '!') &&
8051 (avail < 4)) {
8052 goto done;
8053 } else {
8054 ctxt->instate = XML_PARSER_START_TAG;
8055#ifdef DEBUG_PUSH
8056 xmlGenericError(xmlGenericErrorContext,
8057 "PP: entering START_TAG\n");
8058#endif
8059 }
8060 break;
8061 case XML_PARSER_EPILOG:
8062 SKIP_BLANKS;
8063 if (ctxt->input->buf == NULL)
8064 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8065 else
8066 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8067 if (avail < 2)
8068 goto done;
8069 cur = ctxt->input->cur[0];
8070 next = ctxt->input->cur[1];
8071 if ((cur == '<') && (next == '?')) {
8072 if ((!terminate) &&
8073 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8074 goto done;
8075#ifdef DEBUG_PUSH
8076 xmlGenericError(xmlGenericErrorContext,
8077 "PP: Parsing PI\n");
8078#endif
8079 xmlParsePI(ctxt);
8080 ctxt->instate = XML_PARSER_EPILOG;
8081 } else if ((cur == '<') && (next == '!') &&
8082 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8083 if ((!terminate) &&
8084 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8085 goto done;
8086#ifdef DEBUG_PUSH
8087 xmlGenericError(xmlGenericErrorContext,
8088 "PP: Parsing Comment\n");
8089#endif
8090 xmlParseComment(ctxt);
8091 ctxt->instate = XML_PARSER_EPILOG;
8092 } else if ((cur == '<') && (next == '!') &&
8093 (avail < 4)) {
8094 goto done;
8095 } else {
8096 ctxt->errNo = XML_ERR_DOCUMENT_END;
8097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8098 ctxt->sax->error(ctxt->userData,
8099 "Extra content at the end of the document\n");
8100 ctxt->wellFormed = 0;
8101 ctxt->disableSAX = 1;
8102 ctxt->instate = XML_PARSER_EOF;
8103#ifdef DEBUG_PUSH
8104 xmlGenericError(xmlGenericErrorContext,
8105 "PP: entering EOF\n");
8106#endif
8107 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8108 (!ctxt->disableSAX))
8109 ctxt->sax->endDocument(ctxt->userData);
8110 goto done;
8111 }
8112 break;
8113 case XML_PARSER_START_TAG: {
8114 xmlChar *name, *oldname;
8115
8116 if ((avail < 2) && (ctxt->inputNr == 1))
8117 goto done;
8118 cur = ctxt->input->cur[0];
8119 if (cur != '<') {
8120 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8122 ctxt->sax->error(ctxt->userData,
8123 "Start tag expect, '<' not found\n");
8124 ctxt->wellFormed = 0;
8125 ctxt->disableSAX = 1;
8126 ctxt->instate = XML_PARSER_EOF;
8127#ifdef DEBUG_PUSH
8128 xmlGenericError(xmlGenericErrorContext,
8129 "PP: entering EOF\n");
8130#endif
8131 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8132 (!ctxt->disableSAX))
8133 ctxt->sax->endDocument(ctxt->userData);
8134 goto done;
8135 }
8136 if ((!terminate) &&
8137 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8138 goto done;
8139 if (ctxt->spaceNr == 0)
8140 spacePush(ctxt, -1);
8141 else
8142 spacePush(ctxt, *ctxt->space);
8143 name = xmlParseStartTag(ctxt);
8144 if (name == NULL) {
8145 spacePop(ctxt);
8146 ctxt->instate = XML_PARSER_EOF;
8147#ifdef DEBUG_PUSH
8148 xmlGenericError(xmlGenericErrorContext,
8149 "PP: entering EOF\n");
8150#endif
8151 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8152 (!ctxt->disableSAX))
8153 ctxt->sax->endDocument(ctxt->userData);
8154 goto done;
8155 }
8156 namePush(ctxt, xmlStrdup(name));
8157
8158 /*
8159 * [ VC: Root Element Type ]
8160 * The Name in the document type declaration must match
8161 * the element type of the root element.
8162 */
8163 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8164 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8165 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8166
8167 /*
8168 * Check for an Empty Element.
8169 */
8170 if ((RAW == '/') && (NXT(1) == '>')) {
8171 SKIP(2);
8172 if ((ctxt->sax != NULL) &&
8173 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8174 ctxt->sax->endElement(ctxt->userData, name);
8175 xmlFree(name);
8176 oldname = namePop(ctxt);
8177 spacePop(ctxt);
8178 if (oldname != NULL) {
8179#ifdef DEBUG_STACK
8180 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8181#endif
8182 xmlFree(oldname);
8183 }
8184 if (ctxt->name == NULL) {
8185 ctxt->instate = XML_PARSER_EPILOG;
8186#ifdef DEBUG_PUSH
8187 xmlGenericError(xmlGenericErrorContext,
8188 "PP: entering EPILOG\n");
8189#endif
8190 } else {
8191 ctxt->instate = XML_PARSER_CONTENT;
8192#ifdef DEBUG_PUSH
8193 xmlGenericError(xmlGenericErrorContext,
8194 "PP: entering CONTENT\n");
8195#endif
8196 }
8197 break;
8198 }
8199 if (RAW == '>') {
8200 NEXT;
8201 } else {
8202 ctxt->errNo = XML_ERR_GT_REQUIRED;
8203 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8204 ctxt->sax->error(ctxt->userData,
8205 "Couldn't find end of Start Tag %s\n",
8206 name);
8207 ctxt->wellFormed = 0;
8208 ctxt->disableSAX = 1;
8209
8210 /*
8211 * end of parsing of this node.
8212 */
8213 nodePop(ctxt);
8214 oldname = namePop(ctxt);
8215 spacePop(ctxt);
8216 if (oldname != NULL) {
8217#ifdef DEBUG_STACK
8218 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8219#endif
8220 xmlFree(oldname);
8221 }
8222 }
8223 xmlFree(name);
8224 ctxt->instate = XML_PARSER_CONTENT;
8225#ifdef DEBUG_PUSH
8226 xmlGenericError(xmlGenericErrorContext,
8227 "PP: entering CONTENT\n");
8228#endif
8229 break;
8230 }
8231 case XML_PARSER_CONTENT: {
8232 const xmlChar *test;
8233 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008234 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008235
8236 /*
8237 * Handle preparsed entities and charRef
8238 */
8239 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008240 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008241
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008242 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008243 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8244 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008245 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008246 ctxt->token = 0;
8247 }
8248 if ((avail < 2) && (ctxt->inputNr == 1))
8249 goto done;
8250 cur = ctxt->input->cur[0];
8251 next = ctxt->input->cur[1];
8252
8253 test = CUR_PTR;
8254 cons = ctxt->input->consumed;
8255 tok = ctxt->token;
8256 if ((cur == '<') && (next == '?')) {
8257 if ((!terminate) &&
8258 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8259 goto done;
8260#ifdef DEBUG_PUSH
8261 xmlGenericError(xmlGenericErrorContext,
8262 "PP: Parsing PI\n");
8263#endif
8264 xmlParsePI(ctxt);
8265 } else if ((cur == '<') && (next == '!') &&
8266 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8267 if ((!terminate) &&
8268 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8269 goto done;
8270#ifdef DEBUG_PUSH
8271 xmlGenericError(xmlGenericErrorContext,
8272 "PP: Parsing Comment\n");
8273#endif
8274 xmlParseComment(ctxt);
8275 ctxt->instate = XML_PARSER_CONTENT;
8276 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8277 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8278 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8279 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8280 (ctxt->input->cur[8] == '[')) {
8281 SKIP(9);
8282 ctxt->instate = XML_PARSER_CDATA_SECTION;
8283#ifdef DEBUG_PUSH
8284 xmlGenericError(xmlGenericErrorContext,
8285 "PP: entering CDATA_SECTION\n");
8286#endif
8287 break;
8288 } else if ((cur == '<') && (next == '!') &&
8289 (avail < 9)) {
8290 goto done;
8291 } else if ((cur == '<') && (next == '/')) {
8292 ctxt->instate = XML_PARSER_END_TAG;
8293#ifdef DEBUG_PUSH
8294 xmlGenericError(xmlGenericErrorContext,
8295 "PP: entering END_TAG\n");
8296#endif
8297 break;
8298 } else if (cur == '<') {
8299 ctxt->instate = XML_PARSER_START_TAG;
8300#ifdef DEBUG_PUSH
8301 xmlGenericError(xmlGenericErrorContext,
8302 "PP: entering START_TAG\n");
8303#endif
8304 break;
8305 } else if (cur == '&') {
8306 if ((!terminate) &&
8307 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8308 goto done;
8309#ifdef DEBUG_PUSH
8310 xmlGenericError(xmlGenericErrorContext,
8311 "PP: Parsing Reference\n");
8312#endif
8313 xmlParseReference(ctxt);
8314 } else {
8315 /* TODO Avoid the extra copy, handle directly !!! */
8316 /*
8317 * Goal of the following test is:
8318 * - minimize calls to the SAX 'character' callback
8319 * when they are mergeable
8320 * - handle an problem for isBlank when we only parse
8321 * a sequence of blank chars and the next one is
8322 * not available to check against '<' presence.
8323 * - tries to homogenize the differences in SAX
8324 * callbacks beween the push and pull versions
8325 * of the parser.
8326 */
8327 if ((ctxt->inputNr == 1) &&
8328 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8329 if ((!terminate) &&
8330 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8331 goto done;
8332 }
8333 ctxt->checkIndex = 0;
8334#ifdef DEBUG_PUSH
8335 xmlGenericError(xmlGenericErrorContext,
8336 "PP: Parsing char data\n");
8337#endif
8338 xmlParseCharData(ctxt, 0);
8339 }
8340 /*
8341 * Pop-up of finished entities.
8342 */
8343 while ((RAW == 0) && (ctxt->inputNr > 1))
8344 xmlPopInput(ctxt);
8345 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8346 (tok == ctxt->token)) {
8347 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8349 ctxt->sax->error(ctxt->userData,
8350 "detected an error in element content\n");
8351 ctxt->wellFormed = 0;
8352 ctxt->disableSAX = 1;
8353 ctxt->instate = XML_PARSER_EOF;
8354 break;
8355 }
8356 break;
8357 }
8358 case XML_PARSER_CDATA_SECTION: {
8359 /*
8360 * The Push mode need to have the SAX callback for
8361 * cdataBlock merge back contiguous callbacks.
8362 */
8363 int base;
8364
8365 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8366 if (base < 0) {
8367 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8368 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8369 if (ctxt->sax->cdataBlock != NULL)
8370 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8371 XML_PARSER_BIG_BUFFER_SIZE);
8372 }
8373 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8374 ctxt->checkIndex = 0;
8375 }
8376 goto done;
8377 } else {
8378 if ((ctxt->sax != NULL) && (base > 0) &&
8379 (!ctxt->disableSAX)) {
8380 if (ctxt->sax->cdataBlock != NULL)
8381 ctxt->sax->cdataBlock(ctxt->userData,
8382 ctxt->input->cur, base);
8383 }
8384 SKIP(base + 3);
8385 ctxt->checkIndex = 0;
8386 ctxt->instate = XML_PARSER_CONTENT;
8387#ifdef DEBUG_PUSH
8388 xmlGenericError(xmlGenericErrorContext,
8389 "PP: entering CONTENT\n");
8390#endif
8391 }
8392 break;
8393 }
8394 case XML_PARSER_END_TAG:
8395 if (avail < 2)
8396 goto done;
8397 if ((!terminate) &&
8398 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8399 goto done;
8400 xmlParseEndTag(ctxt);
8401 if (ctxt->name == NULL) {
8402 ctxt->instate = XML_PARSER_EPILOG;
8403#ifdef DEBUG_PUSH
8404 xmlGenericError(xmlGenericErrorContext,
8405 "PP: entering EPILOG\n");
8406#endif
8407 } else {
8408 ctxt->instate = XML_PARSER_CONTENT;
8409#ifdef DEBUG_PUSH
8410 xmlGenericError(xmlGenericErrorContext,
8411 "PP: entering CONTENT\n");
8412#endif
8413 }
8414 break;
8415 case XML_PARSER_DTD: {
8416 /*
8417 * Sorry but progressive parsing of the internal subset
8418 * is not expected to be supported. We first check that
8419 * the full content of the internal subset is available and
8420 * the parsing is launched only at that point.
8421 * Internal subset ends up with "']' S? '>'" in an unescaped
8422 * section and not in a ']]>' sequence which are conditional
8423 * sections (whoever argued to keep that crap in XML deserve
8424 * a place in hell !).
8425 */
8426 int base, i;
8427 xmlChar *buf;
8428 xmlChar quote = 0;
8429
8430 base = ctxt->input->cur - ctxt->input->base;
8431 if (base < 0) return(0);
8432 if (ctxt->checkIndex > base)
8433 base = ctxt->checkIndex;
8434 buf = ctxt->input->buf->buffer->content;
8435 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8436 base++) {
8437 if (quote != 0) {
8438 if (buf[base] == quote)
8439 quote = 0;
8440 continue;
8441 }
8442 if (buf[base] == '"') {
8443 quote = '"';
8444 continue;
8445 }
8446 if (buf[base] == '\'') {
8447 quote = '\'';
8448 continue;
8449 }
8450 if (buf[base] == ']') {
8451 if ((unsigned int) base +1 >=
8452 ctxt->input->buf->buffer->use)
8453 break;
8454 if (buf[base + 1] == ']') {
8455 /* conditional crap, skip both ']' ! */
8456 base++;
8457 continue;
8458 }
8459 for (i = 0;
8460 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8461 i++) {
8462 if (buf[base + i] == '>')
8463 goto found_end_int_subset;
8464 }
8465 break;
8466 }
8467 }
8468 /*
8469 * We didn't found the end of the Internal subset
8470 */
8471 if (quote == 0)
8472 ctxt->checkIndex = base;
8473#ifdef DEBUG_PUSH
8474 if (next == 0)
8475 xmlGenericError(xmlGenericErrorContext,
8476 "PP: lookup of int subset end filed\n");
8477#endif
8478 goto done;
8479
8480found_end_int_subset:
8481 xmlParseInternalSubset(ctxt);
8482 ctxt->inSubset = 2;
8483 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8484 (ctxt->sax->externalSubset != NULL))
8485 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8486 ctxt->extSubSystem, ctxt->extSubURI);
8487 ctxt->inSubset = 0;
8488 ctxt->instate = XML_PARSER_PROLOG;
8489 ctxt->checkIndex = 0;
8490#ifdef DEBUG_PUSH
8491 xmlGenericError(xmlGenericErrorContext,
8492 "PP: entering PROLOG\n");
8493#endif
8494 break;
8495 }
8496 case XML_PARSER_COMMENT:
8497 xmlGenericError(xmlGenericErrorContext,
8498 "PP: internal error, state == COMMENT\n");
8499 ctxt->instate = XML_PARSER_CONTENT;
8500#ifdef DEBUG_PUSH
8501 xmlGenericError(xmlGenericErrorContext,
8502 "PP: entering CONTENT\n");
8503#endif
8504 break;
8505 case XML_PARSER_PI:
8506 xmlGenericError(xmlGenericErrorContext,
8507 "PP: internal error, state == PI\n");
8508 ctxt->instate = XML_PARSER_CONTENT;
8509#ifdef DEBUG_PUSH
8510 xmlGenericError(xmlGenericErrorContext,
8511 "PP: entering CONTENT\n");
8512#endif
8513 break;
8514 case XML_PARSER_ENTITY_DECL:
8515 xmlGenericError(xmlGenericErrorContext,
8516 "PP: internal error, state == ENTITY_DECL\n");
8517 ctxt->instate = XML_PARSER_DTD;
8518#ifdef DEBUG_PUSH
8519 xmlGenericError(xmlGenericErrorContext,
8520 "PP: entering DTD\n");
8521#endif
8522 break;
8523 case XML_PARSER_ENTITY_VALUE:
8524 xmlGenericError(xmlGenericErrorContext,
8525 "PP: internal error, state == ENTITY_VALUE\n");
8526 ctxt->instate = XML_PARSER_CONTENT;
8527#ifdef DEBUG_PUSH
8528 xmlGenericError(xmlGenericErrorContext,
8529 "PP: entering DTD\n");
8530#endif
8531 break;
8532 case XML_PARSER_ATTRIBUTE_VALUE:
8533 xmlGenericError(xmlGenericErrorContext,
8534 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8535 ctxt->instate = XML_PARSER_START_TAG;
8536#ifdef DEBUG_PUSH
8537 xmlGenericError(xmlGenericErrorContext,
8538 "PP: entering START_TAG\n");
8539#endif
8540 break;
8541 case XML_PARSER_SYSTEM_LITERAL:
8542 xmlGenericError(xmlGenericErrorContext,
8543 "PP: internal error, state == SYSTEM_LITERAL\n");
8544 ctxt->instate = XML_PARSER_START_TAG;
8545#ifdef DEBUG_PUSH
8546 xmlGenericError(xmlGenericErrorContext,
8547 "PP: entering START_TAG\n");
8548#endif
8549 break;
8550 }
8551 }
8552done:
8553#ifdef DEBUG_PUSH
8554 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8555#endif
8556 return(ret);
8557}
8558
8559/**
Owen Taylor3473f882001-02-23 17:55:21 +00008560 * xmlParseChunk:
8561 * @ctxt: an XML parser context
8562 * @chunk: an char array
8563 * @size: the size in byte of the chunk
8564 * @terminate: last chunk indicator
8565 *
8566 * Parse a Chunk of memory
8567 *
8568 * Returns zero if no error, the xmlParserErrors otherwise.
8569 */
8570int
8571xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8572 int terminate) {
8573 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8574 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8575 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8576 int cur = ctxt->input->cur - ctxt->input->base;
8577
8578 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8579 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8580 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008581 ctxt->input->end =
8582 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008583#ifdef DEBUG_PUSH
8584 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8585#endif
8586
8587 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8588 xmlParseTryOrFinish(ctxt, terminate);
8589 } else if (ctxt->instate != XML_PARSER_EOF) {
8590 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8591 xmlParserInputBufferPtr in = ctxt->input->buf;
8592 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8593 (in->raw != NULL)) {
8594 int nbchars;
8595
8596 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8597 if (nbchars < 0) {
8598 xmlGenericError(xmlGenericErrorContext,
8599 "xmlParseChunk: encoder error\n");
8600 return(XML_ERR_INVALID_ENCODING);
8601 }
8602 }
8603 }
8604 }
8605 xmlParseTryOrFinish(ctxt, terminate);
8606 if (terminate) {
8607 /*
8608 * Check for termination
8609 */
8610 if ((ctxt->instate != XML_PARSER_EOF) &&
8611 (ctxt->instate != XML_PARSER_EPILOG)) {
8612 ctxt->errNo = XML_ERR_DOCUMENT_END;
8613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8614 ctxt->sax->error(ctxt->userData,
8615 "Extra content at the end of the document\n");
8616 ctxt->wellFormed = 0;
8617 ctxt->disableSAX = 1;
8618 }
8619 if (ctxt->instate != XML_PARSER_EOF) {
8620 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8621 (!ctxt->disableSAX))
8622 ctxt->sax->endDocument(ctxt->userData);
8623 }
8624 ctxt->instate = XML_PARSER_EOF;
8625 }
8626 return((xmlParserErrors) ctxt->errNo);
8627}
8628
8629/************************************************************************
8630 * *
8631 * I/O front end functions to the parser *
8632 * *
8633 ************************************************************************/
8634
8635/**
8636 * xmlStopParser:
8637 * @ctxt: an XML parser context
8638 *
8639 * Blocks further parser processing
8640 */
8641void
8642xmlStopParser(xmlParserCtxtPtr ctxt) {
8643 ctxt->instate = XML_PARSER_EOF;
8644 if (ctxt->input != NULL)
8645 ctxt->input->cur = BAD_CAST"";
8646}
8647
8648/**
8649 * xmlCreatePushParserCtxt:
8650 * @sax: a SAX handler
8651 * @user_data: The user data returned on SAX callbacks
8652 * @chunk: a pointer to an array of chars
8653 * @size: number of chars in the array
8654 * @filename: an optional file name or URI
8655 *
8656 * Create a parser context for using the XML parser in push mode
8657 * To allow content encoding detection, @size should be >= 4
8658 * The value of @filename is used for fetching external entities
8659 * and error/warning reports.
8660 *
8661 * Returns the new parser context or NULL
8662 */
8663xmlParserCtxtPtr
8664xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8665 const char *chunk, int size, const char *filename) {
8666 xmlParserCtxtPtr ctxt;
8667 xmlParserInputPtr inputStream;
8668 xmlParserInputBufferPtr buf;
8669 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8670
8671 /*
8672 * plug some encoding conversion routines
8673 */
8674 if ((chunk != NULL) && (size >= 4))
8675 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8676
8677 buf = xmlAllocParserInputBuffer(enc);
8678 if (buf == NULL) return(NULL);
8679
8680 ctxt = xmlNewParserCtxt();
8681 if (ctxt == NULL) {
8682 xmlFree(buf);
8683 return(NULL);
8684 }
8685 if (sax != NULL) {
8686 if (ctxt->sax != &xmlDefaultSAXHandler)
8687 xmlFree(ctxt->sax);
8688 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8689 if (ctxt->sax == NULL) {
8690 xmlFree(buf);
8691 xmlFree(ctxt);
8692 return(NULL);
8693 }
8694 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8695 if (user_data != NULL)
8696 ctxt->userData = user_data;
8697 }
8698 if (filename == NULL) {
8699 ctxt->directory = NULL;
8700 } else {
8701 ctxt->directory = xmlParserGetDirectory(filename);
8702 }
8703
8704 inputStream = xmlNewInputStream(ctxt);
8705 if (inputStream == NULL) {
8706 xmlFreeParserCtxt(ctxt);
8707 return(NULL);
8708 }
8709
8710 if (filename == NULL)
8711 inputStream->filename = NULL;
8712 else
8713 inputStream->filename = xmlMemStrdup(filename);
8714 inputStream->buf = buf;
8715 inputStream->base = inputStream->buf->buffer->content;
8716 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008717 inputStream->end =
8718 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008719
8720 inputPush(ctxt, inputStream);
8721
8722 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8723 (ctxt->input->buf != NULL)) {
8724 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8725#ifdef DEBUG_PUSH
8726 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8727#endif
8728 }
8729
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008730 if (enc != XML_CHAR_ENCODING_NONE) {
8731 xmlSwitchEncoding(ctxt, enc);
8732 }
8733
Owen Taylor3473f882001-02-23 17:55:21 +00008734 return(ctxt);
8735}
8736
8737/**
8738 * xmlCreateIOParserCtxt:
8739 * @sax: a SAX handler
8740 * @user_data: The user data returned on SAX callbacks
8741 * @ioread: an I/O read function
8742 * @ioclose: an I/O close function
8743 * @ioctx: an I/O handler
8744 * @enc: the charset encoding if known
8745 *
8746 * Create a parser context for using the XML parser with an existing
8747 * I/O stream
8748 *
8749 * Returns the new parser context or NULL
8750 */
8751xmlParserCtxtPtr
8752xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8753 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8754 void *ioctx, xmlCharEncoding enc) {
8755 xmlParserCtxtPtr ctxt;
8756 xmlParserInputPtr inputStream;
8757 xmlParserInputBufferPtr buf;
8758
8759 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8760 if (buf == NULL) return(NULL);
8761
8762 ctxt = xmlNewParserCtxt();
8763 if (ctxt == NULL) {
8764 xmlFree(buf);
8765 return(NULL);
8766 }
8767 if (sax != NULL) {
8768 if (ctxt->sax != &xmlDefaultSAXHandler)
8769 xmlFree(ctxt->sax);
8770 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8771 if (ctxt->sax == NULL) {
8772 xmlFree(buf);
8773 xmlFree(ctxt);
8774 return(NULL);
8775 }
8776 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8777 if (user_data != NULL)
8778 ctxt->userData = user_data;
8779 }
8780
8781 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8782 if (inputStream == NULL) {
8783 xmlFreeParserCtxt(ctxt);
8784 return(NULL);
8785 }
8786 inputPush(ctxt, inputStream);
8787
8788 return(ctxt);
8789}
8790
8791/************************************************************************
8792 * *
8793 * Front ends when parsing a Dtd *
8794 * *
8795 ************************************************************************/
8796
8797/**
8798 * xmlIOParseDTD:
8799 * @sax: the SAX handler block or NULL
8800 * @input: an Input Buffer
8801 * @enc: the charset encoding if known
8802 *
8803 * Load and parse a DTD
8804 *
8805 * Returns the resulting xmlDtdPtr or NULL in case of error.
8806 * @input will be freed at parsing end.
8807 */
8808
8809xmlDtdPtr
8810xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8811 xmlCharEncoding enc) {
8812 xmlDtdPtr ret = NULL;
8813 xmlParserCtxtPtr ctxt;
8814 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008815 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008816
8817 if (input == NULL)
8818 return(NULL);
8819
8820 ctxt = xmlNewParserCtxt();
8821 if (ctxt == NULL) {
8822 return(NULL);
8823 }
8824
8825 /*
8826 * Set-up the SAX context
8827 */
8828 if (sax != NULL) {
8829 if (ctxt->sax != NULL)
8830 xmlFree(ctxt->sax);
8831 ctxt->sax = sax;
8832 ctxt->userData = NULL;
8833 }
8834
8835 /*
8836 * generate a parser input from the I/O handler
8837 */
8838
8839 pinput = xmlNewIOInputStream(ctxt, input, enc);
8840 if (pinput == NULL) {
8841 if (sax != NULL) ctxt->sax = NULL;
8842 xmlFreeParserCtxt(ctxt);
8843 return(NULL);
8844 }
8845
8846 /*
8847 * plug some encoding conversion routines here.
8848 */
8849 xmlPushInput(ctxt, pinput);
8850
8851 pinput->filename = NULL;
8852 pinput->line = 1;
8853 pinput->col = 1;
8854 pinput->base = ctxt->input->cur;
8855 pinput->cur = ctxt->input->cur;
8856 pinput->free = NULL;
8857
8858 /*
8859 * let's parse that entity knowing it's an external subset.
8860 */
8861 ctxt->inSubset = 2;
8862 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8863 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8864 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008865
8866 if (enc == XML_CHAR_ENCODING_NONE) {
8867 /*
8868 * Get the 4 first bytes and decode the charset
8869 * if enc != XML_CHAR_ENCODING_NONE
8870 * plug some encoding conversion routines.
8871 */
8872 start[0] = RAW;
8873 start[1] = NXT(1);
8874 start[2] = NXT(2);
8875 start[3] = NXT(3);
8876 enc = xmlDetectCharEncoding(start, 4);
8877 if (enc != XML_CHAR_ENCODING_NONE) {
8878 xmlSwitchEncoding(ctxt, enc);
8879 }
8880 }
8881
Owen Taylor3473f882001-02-23 17:55:21 +00008882 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8883
8884 if (ctxt->myDoc != NULL) {
8885 if (ctxt->wellFormed) {
8886 ret = ctxt->myDoc->extSubset;
8887 ctxt->myDoc->extSubset = NULL;
8888 } else {
8889 ret = NULL;
8890 }
8891 xmlFreeDoc(ctxt->myDoc);
8892 ctxt->myDoc = NULL;
8893 }
8894 if (sax != NULL) ctxt->sax = NULL;
8895 xmlFreeParserCtxt(ctxt);
8896
8897 return(ret);
8898}
8899
8900/**
8901 * xmlSAXParseDTD:
8902 * @sax: the SAX handler block
8903 * @ExternalID: a NAME* containing the External ID of the DTD
8904 * @SystemID: a NAME* containing the URL to the DTD
8905 *
8906 * Load and parse an external subset.
8907 *
8908 * Returns the resulting xmlDtdPtr or NULL in case of error.
8909 */
8910
8911xmlDtdPtr
8912xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8913 const xmlChar *SystemID) {
8914 xmlDtdPtr ret = NULL;
8915 xmlParserCtxtPtr ctxt;
8916 xmlParserInputPtr input = NULL;
8917 xmlCharEncoding enc;
8918
8919 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8920
8921 ctxt = xmlNewParserCtxt();
8922 if (ctxt == NULL) {
8923 return(NULL);
8924 }
8925
8926 /*
8927 * Set-up the SAX context
8928 */
8929 if (sax != NULL) {
8930 if (ctxt->sax != NULL)
8931 xmlFree(ctxt->sax);
8932 ctxt->sax = sax;
8933 ctxt->userData = NULL;
8934 }
8935
8936 /*
8937 * Ask the Entity resolver to load the damn thing
8938 */
8939
8940 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8941 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8942 if (input == NULL) {
8943 if (sax != NULL) ctxt->sax = NULL;
8944 xmlFreeParserCtxt(ctxt);
8945 return(NULL);
8946 }
8947
8948 /*
8949 * plug some encoding conversion routines here.
8950 */
8951 xmlPushInput(ctxt, input);
8952 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8953 xmlSwitchEncoding(ctxt, enc);
8954
8955 if (input->filename == NULL)
8956 input->filename = (char *) xmlStrdup(SystemID);
8957 input->line = 1;
8958 input->col = 1;
8959 input->base = ctxt->input->cur;
8960 input->cur = ctxt->input->cur;
8961 input->free = NULL;
8962
8963 /*
8964 * let's parse that entity knowing it's an external subset.
8965 */
8966 ctxt->inSubset = 2;
8967 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8968 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8969 ExternalID, SystemID);
8970 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8971
8972 if (ctxt->myDoc != NULL) {
8973 if (ctxt->wellFormed) {
8974 ret = ctxt->myDoc->extSubset;
8975 ctxt->myDoc->extSubset = NULL;
8976 } else {
8977 ret = NULL;
8978 }
8979 xmlFreeDoc(ctxt->myDoc);
8980 ctxt->myDoc = NULL;
8981 }
8982 if (sax != NULL) ctxt->sax = NULL;
8983 xmlFreeParserCtxt(ctxt);
8984
8985 return(ret);
8986}
8987
8988/**
8989 * xmlParseDTD:
8990 * @ExternalID: a NAME* containing the External ID of the DTD
8991 * @SystemID: a NAME* containing the URL to the DTD
8992 *
8993 * Load and parse an external subset.
8994 *
8995 * Returns the resulting xmlDtdPtr or NULL in case of error.
8996 */
8997
8998xmlDtdPtr
8999xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9000 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9001}
9002
9003/************************************************************************
9004 * *
9005 * Front ends when parsing an Entity *
9006 * *
9007 ************************************************************************/
9008
9009/**
Owen Taylor3473f882001-02-23 17:55:21 +00009010 * xmlParseCtxtExternalEntity:
9011 * @ctx: the existing parsing context
9012 * @URL: the URL for the entity to load
9013 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009014 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009015 *
9016 * Parse an external general entity within an existing parsing context
9017 * An external general parsed entity is well-formed if it matches the
9018 * production labeled extParsedEnt.
9019 *
9020 * [78] extParsedEnt ::= TextDecl? content
9021 *
9022 * Returns 0 if the entity is well formed, -1 in case of args problem and
9023 * the parser error code otherwise
9024 */
9025
9026int
9027xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009028 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009029 xmlParserCtxtPtr ctxt;
9030 xmlDocPtr newDoc;
9031 xmlSAXHandlerPtr oldsax = NULL;
9032 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009033 xmlChar start[4];
9034 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009035
9036 if (ctx->depth > 40) {
9037 return(XML_ERR_ENTITY_LOOP);
9038 }
9039
Daniel Veillardcda96922001-08-21 10:56:31 +00009040 if (lst != NULL)
9041 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009042 if ((URL == NULL) && (ID == NULL))
9043 return(-1);
9044 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9045 return(-1);
9046
9047
9048 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9049 if (ctxt == NULL) return(-1);
9050 ctxt->userData = ctxt;
9051 oldsax = ctxt->sax;
9052 ctxt->sax = ctx->sax;
9053 newDoc = xmlNewDoc(BAD_CAST "1.0");
9054 if (newDoc == NULL) {
9055 xmlFreeParserCtxt(ctxt);
9056 return(-1);
9057 }
9058 if (ctx->myDoc != NULL) {
9059 newDoc->intSubset = ctx->myDoc->intSubset;
9060 newDoc->extSubset = ctx->myDoc->extSubset;
9061 }
9062 if (ctx->myDoc->URL != NULL) {
9063 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9064 }
9065 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9066 if (newDoc->children == NULL) {
9067 ctxt->sax = oldsax;
9068 xmlFreeParserCtxt(ctxt);
9069 newDoc->intSubset = NULL;
9070 newDoc->extSubset = NULL;
9071 xmlFreeDoc(newDoc);
9072 return(-1);
9073 }
9074 nodePush(ctxt, newDoc->children);
9075 if (ctx->myDoc == NULL) {
9076 ctxt->myDoc = newDoc;
9077 } else {
9078 ctxt->myDoc = ctx->myDoc;
9079 newDoc->children->doc = ctx->myDoc;
9080 }
9081
Daniel Veillard87a764e2001-06-20 17:41:10 +00009082 /*
9083 * Get the 4 first bytes and decode the charset
9084 * if enc != XML_CHAR_ENCODING_NONE
9085 * plug some encoding conversion routines.
9086 */
9087 GROW
9088 start[0] = RAW;
9089 start[1] = NXT(1);
9090 start[2] = NXT(2);
9091 start[3] = NXT(3);
9092 enc = xmlDetectCharEncoding(start, 4);
9093 if (enc != XML_CHAR_ENCODING_NONE) {
9094 xmlSwitchEncoding(ctxt, enc);
9095 }
9096
Owen Taylor3473f882001-02-23 17:55:21 +00009097 /*
9098 * Parse a possible text declaration first
9099 */
Owen Taylor3473f882001-02-23 17:55:21 +00009100 if ((RAW == '<') && (NXT(1) == '?') &&
9101 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9102 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9103 xmlParseTextDecl(ctxt);
9104 }
9105
9106 /*
9107 * Doing validity checking on chunk doesn't make sense
9108 */
9109 ctxt->instate = XML_PARSER_CONTENT;
9110 ctxt->validate = ctx->validate;
9111 ctxt->loadsubset = ctx->loadsubset;
9112 ctxt->depth = ctx->depth + 1;
9113 ctxt->replaceEntities = ctx->replaceEntities;
9114 if (ctxt->validate) {
9115 ctxt->vctxt.error = ctx->vctxt.error;
9116 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009117 } else {
9118 ctxt->vctxt.error = NULL;
9119 ctxt->vctxt.warning = NULL;
9120 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009121 ctxt->vctxt.nodeTab = NULL;
9122 ctxt->vctxt.nodeNr = 0;
9123 ctxt->vctxt.nodeMax = 0;
9124 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009125
9126 xmlParseContent(ctxt);
9127
9128 if ((RAW == '<') && (NXT(1) == '/')) {
9129 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9130 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9131 ctxt->sax->error(ctxt->userData,
9132 "chunk is not well balanced\n");
9133 ctxt->wellFormed = 0;
9134 ctxt->disableSAX = 1;
9135 } else if (RAW != 0) {
9136 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9138 ctxt->sax->error(ctxt->userData,
9139 "extra content at the end of well balanced chunk\n");
9140 ctxt->wellFormed = 0;
9141 ctxt->disableSAX = 1;
9142 }
9143 if (ctxt->node != newDoc->children) {
9144 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9145 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9146 ctxt->sax->error(ctxt->userData,
9147 "chunk is not well balanced\n");
9148 ctxt->wellFormed = 0;
9149 ctxt->disableSAX = 1;
9150 }
9151
9152 if (!ctxt->wellFormed) {
9153 if (ctxt->errNo == 0)
9154 ret = 1;
9155 else
9156 ret = ctxt->errNo;
9157 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009158 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009159 xmlNodePtr cur;
9160
9161 /*
9162 * Return the newly created nodeset after unlinking it from
9163 * they pseudo parent.
9164 */
9165 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009166 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009167 while (cur != NULL) {
9168 cur->parent = NULL;
9169 cur = cur->next;
9170 }
9171 newDoc->children->children = NULL;
9172 }
9173 ret = 0;
9174 }
9175 ctxt->sax = oldsax;
9176 xmlFreeParserCtxt(ctxt);
9177 newDoc->intSubset = NULL;
9178 newDoc->extSubset = NULL;
9179 xmlFreeDoc(newDoc);
9180
9181 return(ret);
9182}
9183
9184/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009185 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009186 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009187 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009188 * @sax: the SAX handler bloc (possibly NULL)
9189 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9190 * @depth: Used for loop detection, use 0
9191 * @URL: the URL for the entity to load
9192 * @ID: the System ID for the entity to load
9193 * @list: the return value for the set of parsed nodes
9194 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009195 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009196 *
9197 * Returns 0 if the entity is well formed, -1 in case of args problem and
9198 * the parser error code otherwise
9199 */
9200
Daniel Veillard257d9102001-05-08 10:41:44 +00009201static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009202xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9203 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009204 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009205 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009206 xmlParserCtxtPtr ctxt;
9207 xmlDocPtr newDoc;
9208 xmlSAXHandlerPtr oldsax = NULL;
9209 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009210 xmlChar start[4];
9211 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009212
9213 if (depth > 40) {
9214 return(XML_ERR_ENTITY_LOOP);
9215 }
9216
9217
9218
9219 if (list != NULL)
9220 *list = NULL;
9221 if ((URL == NULL) && (ID == NULL))
9222 return(-1);
9223 if (doc == NULL) /* @@ relax but check for dereferences */
9224 return(-1);
9225
9226
9227 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9228 if (ctxt == NULL) return(-1);
9229 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009230 if (oldctxt != NULL) {
9231 ctxt->_private = oldctxt->_private;
9232 ctxt->loadsubset = oldctxt->loadsubset;
9233 ctxt->validate = oldctxt->validate;
9234 ctxt->external = oldctxt->external;
9235 } else {
9236 /*
9237 * Doing validity checking on chunk without context
9238 * doesn't make sense
9239 */
9240 ctxt->_private = NULL;
9241 ctxt->validate = 0;
9242 ctxt->external = 2;
9243 ctxt->loadsubset = 0;
9244 }
Owen Taylor3473f882001-02-23 17:55:21 +00009245 if (sax != NULL) {
9246 oldsax = ctxt->sax;
9247 ctxt->sax = sax;
9248 if (user_data != NULL)
9249 ctxt->userData = user_data;
9250 }
9251 newDoc = xmlNewDoc(BAD_CAST "1.0");
9252 if (newDoc == NULL) {
9253 xmlFreeParserCtxt(ctxt);
9254 return(-1);
9255 }
9256 if (doc != NULL) {
9257 newDoc->intSubset = doc->intSubset;
9258 newDoc->extSubset = doc->extSubset;
9259 }
9260 if (doc->URL != NULL) {
9261 newDoc->URL = xmlStrdup(doc->URL);
9262 }
9263 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9264 if (newDoc->children == NULL) {
9265 if (sax != NULL)
9266 ctxt->sax = oldsax;
9267 xmlFreeParserCtxt(ctxt);
9268 newDoc->intSubset = NULL;
9269 newDoc->extSubset = NULL;
9270 xmlFreeDoc(newDoc);
9271 return(-1);
9272 }
9273 nodePush(ctxt, newDoc->children);
9274 if (doc == NULL) {
9275 ctxt->myDoc = newDoc;
9276 } else {
9277 ctxt->myDoc = doc;
9278 newDoc->children->doc = doc;
9279 }
9280
Daniel Veillard87a764e2001-06-20 17:41:10 +00009281 /*
9282 * Get the 4 first bytes and decode the charset
9283 * if enc != XML_CHAR_ENCODING_NONE
9284 * plug some encoding conversion routines.
9285 */
9286 GROW;
9287 start[0] = RAW;
9288 start[1] = NXT(1);
9289 start[2] = NXT(2);
9290 start[3] = NXT(3);
9291 enc = xmlDetectCharEncoding(start, 4);
9292 if (enc != XML_CHAR_ENCODING_NONE) {
9293 xmlSwitchEncoding(ctxt, enc);
9294 }
9295
Owen Taylor3473f882001-02-23 17:55:21 +00009296 /*
9297 * Parse a possible text declaration first
9298 */
Owen Taylor3473f882001-02-23 17:55:21 +00009299 if ((RAW == '<') && (NXT(1) == '?') &&
9300 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9301 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9302 xmlParseTextDecl(ctxt);
9303 }
9304
Owen Taylor3473f882001-02-23 17:55:21 +00009305 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009306 ctxt->depth = depth;
9307
9308 xmlParseContent(ctxt);
9309
9310 if ((RAW == '<') && (NXT(1) == '/')) {
9311 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9312 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9313 ctxt->sax->error(ctxt->userData,
9314 "chunk is not well balanced\n");
9315 ctxt->wellFormed = 0;
9316 ctxt->disableSAX = 1;
9317 } else if (RAW != 0) {
9318 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9320 ctxt->sax->error(ctxt->userData,
9321 "extra content at the end of well balanced chunk\n");
9322 ctxt->wellFormed = 0;
9323 ctxt->disableSAX = 1;
9324 }
9325 if (ctxt->node != newDoc->children) {
9326 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9328 ctxt->sax->error(ctxt->userData,
9329 "chunk is not well balanced\n");
9330 ctxt->wellFormed = 0;
9331 ctxt->disableSAX = 1;
9332 }
9333
9334 if (!ctxt->wellFormed) {
9335 if (ctxt->errNo == 0)
9336 ret = 1;
9337 else
9338 ret = ctxt->errNo;
9339 } else {
9340 if (list != NULL) {
9341 xmlNodePtr cur;
9342
9343 /*
9344 * Return the newly created nodeset after unlinking it from
9345 * they pseudo parent.
9346 */
9347 cur = newDoc->children->children;
9348 *list = cur;
9349 while (cur != NULL) {
9350 cur->parent = NULL;
9351 cur = cur->next;
9352 }
9353 newDoc->children->children = NULL;
9354 }
9355 ret = 0;
9356 }
9357 if (sax != NULL)
9358 ctxt->sax = oldsax;
9359 xmlFreeParserCtxt(ctxt);
9360 newDoc->intSubset = NULL;
9361 newDoc->extSubset = NULL;
9362 xmlFreeDoc(newDoc);
9363
9364 return(ret);
9365}
9366
9367/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009368 * xmlParseExternalEntity:
9369 * @doc: the document the chunk pertains to
9370 * @sax: the SAX handler bloc (possibly NULL)
9371 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9372 * @depth: Used for loop detection, use 0
9373 * @URL: the URL for the entity to load
9374 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009375 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009376 *
9377 * Parse an external general entity
9378 * An external general parsed entity is well-formed if it matches the
9379 * production labeled extParsedEnt.
9380 *
9381 * [78] extParsedEnt ::= TextDecl? content
9382 *
9383 * Returns 0 if the entity is well formed, -1 in case of args problem and
9384 * the parser error code otherwise
9385 */
9386
9387int
9388xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009389 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009390 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009391 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009392}
9393
9394/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009395 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009396 * @doc: the document the chunk pertains to
9397 * @sax: the SAX handler bloc (possibly NULL)
9398 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9399 * @depth: Used for loop detection, use 0
9400 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009401 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009402 *
9403 * Parse a well-balanced chunk of an XML document
9404 * called by the parser
9405 * The allowed sequence for the Well Balanced Chunk is the one defined by
9406 * the content production in the XML grammar:
9407 *
9408 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9409 *
9410 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9411 * the parser error code otherwise
9412 */
9413
9414int
9415xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009416 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009417 xmlParserCtxtPtr ctxt;
9418 xmlDocPtr newDoc;
9419 xmlSAXHandlerPtr oldsax = NULL;
9420 int size;
9421 int ret = 0;
9422
9423 if (depth > 40) {
9424 return(XML_ERR_ENTITY_LOOP);
9425 }
9426
9427
Daniel Veillardcda96922001-08-21 10:56:31 +00009428 if (lst != NULL)
9429 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009430 if (string == NULL)
9431 return(-1);
9432
9433 size = xmlStrlen(string);
9434
9435 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9436 if (ctxt == NULL) return(-1);
9437 ctxt->userData = ctxt;
9438 if (sax != NULL) {
9439 oldsax = ctxt->sax;
9440 ctxt->sax = sax;
9441 if (user_data != NULL)
9442 ctxt->userData = user_data;
9443 }
9444 newDoc = xmlNewDoc(BAD_CAST "1.0");
9445 if (newDoc == NULL) {
9446 xmlFreeParserCtxt(ctxt);
9447 return(-1);
9448 }
9449 if (doc != NULL) {
9450 newDoc->intSubset = doc->intSubset;
9451 newDoc->extSubset = doc->extSubset;
9452 }
9453 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9454 if (newDoc->children == NULL) {
9455 if (sax != NULL)
9456 ctxt->sax = oldsax;
9457 xmlFreeParserCtxt(ctxt);
9458 newDoc->intSubset = NULL;
9459 newDoc->extSubset = NULL;
9460 xmlFreeDoc(newDoc);
9461 return(-1);
9462 }
9463 nodePush(ctxt, newDoc->children);
9464 if (doc == NULL) {
9465 ctxt->myDoc = newDoc;
9466 } else {
9467 ctxt->myDoc = doc;
9468 newDoc->children->doc = doc;
9469 }
9470 ctxt->instate = XML_PARSER_CONTENT;
9471 ctxt->depth = depth;
9472
9473 /*
9474 * Doing validity checking on chunk doesn't make sense
9475 */
9476 ctxt->validate = 0;
9477 ctxt->loadsubset = 0;
9478
9479 xmlParseContent(ctxt);
9480
9481 if ((RAW == '<') && (NXT(1) == '/')) {
9482 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9484 ctxt->sax->error(ctxt->userData,
9485 "chunk is not well balanced\n");
9486 ctxt->wellFormed = 0;
9487 ctxt->disableSAX = 1;
9488 } else if (RAW != 0) {
9489 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9491 ctxt->sax->error(ctxt->userData,
9492 "extra content at the end of well balanced chunk\n");
9493 ctxt->wellFormed = 0;
9494 ctxt->disableSAX = 1;
9495 }
9496 if (ctxt->node != newDoc->children) {
9497 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9499 ctxt->sax->error(ctxt->userData,
9500 "chunk is not well balanced\n");
9501 ctxt->wellFormed = 0;
9502 ctxt->disableSAX = 1;
9503 }
9504
9505 if (!ctxt->wellFormed) {
9506 if (ctxt->errNo == 0)
9507 ret = 1;
9508 else
9509 ret = ctxt->errNo;
9510 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009511 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009512 xmlNodePtr cur;
9513
9514 /*
9515 * Return the newly created nodeset after unlinking it from
9516 * they pseudo parent.
9517 */
9518 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009519 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009520 while (cur != NULL) {
9521 cur->parent = NULL;
9522 cur = cur->next;
9523 }
9524 newDoc->children->children = NULL;
9525 }
9526 ret = 0;
9527 }
9528 if (sax != NULL)
9529 ctxt->sax = oldsax;
9530 xmlFreeParserCtxt(ctxt);
9531 newDoc->intSubset = NULL;
9532 newDoc->extSubset = NULL;
9533 xmlFreeDoc(newDoc);
9534
9535 return(ret);
9536}
9537
9538/**
9539 * xmlSAXParseEntity:
9540 * @sax: the SAX handler block
9541 * @filename: the filename
9542 *
9543 * parse an XML external entity out of context and build a tree.
9544 * It use the given SAX function block to handle the parsing callback.
9545 * If sax is NULL, fallback to the default DOM tree building routines.
9546 *
9547 * [78] extParsedEnt ::= TextDecl? content
9548 *
9549 * This correspond to a "Well Balanced" chunk
9550 *
9551 * Returns the resulting document tree
9552 */
9553
9554xmlDocPtr
9555xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9556 xmlDocPtr ret;
9557 xmlParserCtxtPtr ctxt;
9558 char *directory = NULL;
9559
9560 ctxt = xmlCreateFileParserCtxt(filename);
9561 if (ctxt == NULL) {
9562 return(NULL);
9563 }
9564 if (sax != NULL) {
9565 if (ctxt->sax != NULL)
9566 xmlFree(ctxt->sax);
9567 ctxt->sax = sax;
9568 ctxt->userData = NULL;
9569 }
9570
9571 if ((ctxt->directory == NULL) && (directory == NULL))
9572 directory = xmlParserGetDirectory(filename);
9573
9574 xmlParseExtParsedEnt(ctxt);
9575
9576 if (ctxt->wellFormed)
9577 ret = ctxt->myDoc;
9578 else {
9579 ret = NULL;
9580 xmlFreeDoc(ctxt->myDoc);
9581 ctxt->myDoc = NULL;
9582 }
9583 if (sax != NULL)
9584 ctxt->sax = NULL;
9585 xmlFreeParserCtxt(ctxt);
9586
9587 return(ret);
9588}
9589
9590/**
9591 * xmlParseEntity:
9592 * @filename: the filename
9593 *
9594 * parse an XML external entity out of context and build a tree.
9595 *
9596 * [78] extParsedEnt ::= TextDecl? content
9597 *
9598 * This correspond to a "Well Balanced" chunk
9599 *
9600 * Returns the resulting document tree
9601 */
9602
9603xmlDocPtr
9604xmlParseEntity(const char *filename) {
9605 return(xmlSAXParseEntity(NULL, filename));
9606}
9607
9608/**
9609 * xmlCreateEntityParserCtxt:
9610 * @URL: the entity URL
9611 * @ID: the entity PUBLIC ID
9612 * @base: a posible base for the target URI
9613 *
9614 * Create a parser context for an external entity
9615 * Automatic support for ZLIB/Compress compressed document is provided
9616 * by default if found at compile-time.
9617 *
9618 * Returns the new parser context or NULL
9619 */
9620xmlParserCtxtPtr
9621xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9622 const xmlChar *base) {
9623 xmlParserCtxtPtr ctxt;
9624 xmlParserInputPtr inputStream;
9625 char *directory = NULL;
9626 xmlChar *uri;
9627
9628 ctxt = xmlNewParserCtxt();
9629 if (ctxt == NULL) {
9630 return(NULL);
9631 }
9632
9633 uri = xmlBuildURI(URL, base);
9634
9635 if (uri == NULL) {
9636 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9637 if (inputStream == NULL) {
9638 xmlFreeParserCtxt(ctxt);
9639 return(NULL);
9640 }
9641
9642 inputPush(ctxt, inputStream);
9643
9644 if ((ctxt->directory == NULL) && (directory == NULL))
9645 directory = xmlParserGetDirectory((char *)URL);
9646 if ((ctxt->directory == NULL) && (directory != NULL))
9647 ctxt->directory = directory;
9648 } else {
9649 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9650 if (inputStream == NULL) {
9651 xmlFree(uri);
9652 xmlFreeParserCtxt(ctxt);
9653 return(NULL);
9654 }
9655
9656 inputPush(ctxt, inputStream);
9657
9658 if ((ctxt->directory == NULL) && (directory == NULL))
9659 directory = xmlParserGetDirectory((char *)uri);
9660 if ((ctxt->directory == NULL) && (directory != NULL))
9661 ctxt->directory = directory;
9662 xmlFree(uri);
9663 }
9664
9665 return(ctxt);
9666}
9667
9668/************************************************************************
9669 * *
9670 * Front ends when parsing from a file *
9671 * *
9672 ************************************************************************/
9673
9674/**
9675 * xmlCreateFileParserCtxt:
9676 * @filename: the filename
9677 *
9678 * Create a parser context for a file content.
9679 * Automatic support for ZLIB/Compress compressed document is provided
9680 * by default if found at compile-time.
9681 *
9682 * Returns the new parser context or NULL
9683 */
9684xmlParserCtxtPtr
9685xmlCreateFileParserCtxt(const char *filename)
9686{
9687 xmlParserCtxtPtr ctxt;
9688 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009689 char *directory = NULL;
9690
Owen Taylor3473f882001-02-23 17:55:21 +00009691 ctxt = xmlNewParserCtxt();
9692 if (ctxt == NULL) {
9693 if (xmlDefaultSAXHandler.error != NULL) {
9694 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9695 }
9696 return(NULL);
9697 }
9698
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009699 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009700 if (inputStream == NULL) {
9701 xmlFreeParserCtxt(ctxt);
9702 return(NULL);
9703 }
9704
Owen Taylor3473f882001-02-23 17:55:21 +00009705 inputPush(ctxt, inputStream);
9706 if ((ctxt->directory == NULL) && (directory == NULL))
9707 directory = xmlParserGetDirectory(filename);
9708 if ((ctxt->directory == NULL) && (directory != NULL))
9709 ctxt->directory = directory;
9710
9711 return(ctxt);
9712}
9713
9714/**
9715 * xmlSAXParseFile:
9716 * @sax: the SAX handler block
9717 * @filename: the filename
9718 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9719 * documents
9720 *
9721 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9722 * compressed document is provided by default if found at compile-time.
9723 * It use the given SAX function block to handle the parsing callback.
9724 * If sax is NULL, fallback to the default DOM tree building routines.
9725 *
9726 * Returns the resulting document tree
9727 */
9728
9729xmlDocPtr
9730xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9731 int recovery) {
9732 xmlDocPtr ret;
9733 xmlParserCtxtPtr ctxt;
9734 char *directory = NULL;
9735
9736 ctxt = xmlCreateFileParserCtxt(filename);
9737 if (ctxt == NULL) {
9738 return(NULL);
9739 }
9740 if (sax != NULL) {
9741 if (ctxt->sax != NULL)
9742 xmlFree(ctxt->sax);
9743 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009744 }
9745
9746 if ((ctxt->directory == NULL) && (directory == NULL))
9747 directory = xmlParserGetDirectory(filename);
9748 if ((ctxt->directory == NULL) && (directory != NULL))
9749 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9750
9751 xmlParseDocument(ctxt);
9752
9753 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9754 else {
9755 ret = NULL;
9756 xmlFreeDoc(ctxt->myDoc);
9757 ctxt->myDoc = NULL;
9758 }
9759 if (sax != NULL)
9760 ctxt->sax = NULL;
9761 xmlFreeParserCtxt(ctxt);
9762
9763 return(ret);
9764}
9765
9766/**
9767 * xmlRecoverDoc:
9768 * @cur: a pointer to an array of xmlChar
9769 *
9770 * parse an XML in-memory document and build a tree.
9771 * In the case the document is not Well Formed, a tree is built anyway
9772 *
9773 * Returns the resulting document tree
9774 */
9775
9776xmlDocPtr
9777xmlRecoverDoc(xmlChar *cur) {
9778 return(xmlSAXParseDoc(NULL, cur, 1));
9779}
9780
9781/**
9782 * xmlParseFile:
9783 * @filename: the filename
9784 *
9785 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9786 * compressed document is provided by default if found at compile-time.
9787 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +00009788 * Returns the resulting document tree if the file was wellformed,
9789 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +00009790 */
9791
9792xmlDocPtr
9793xmlParseFile(const char *filename) {
9794 return(xmlSAXParseFile(NULL, filename, 0));
9795}
9796
9797/**
9798 * xmlRecoverFile:
9799 * @filename: the filename
9800 *
9801 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9802 * compressed document is provided by default if found at compile-time.
9803 * In the case the document is not Well Formed, a tree is built anyway
9804 *
9805 * Returns the resulting document tree
9806 */
9807
9808xmlDocPtr
9809xmlRecoverFile(const char *filename) {
9810 return(xmlSAXParseFile(NULL, filename, 1));
9811}
9812
9813
9814/**
9815 * xmlSetupParserForBuffer:
9816 * @ctxt: an XML parser context
9817 * @buffer: a xmlChar * buffer
9818 * @filename: a file name
9819 *
9820 * Setup the parser context to parse a new buffer; Clears any prior
9821 * contents from the parser context. The buffer parameter must not be
9822 * NULL, but the filename parameter can be
9823 */
9824void
9825xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9826 const char* filename)
9827{
9828 xmlParserInputPtr input;
9829
9830 input = xmlNewInputStream(ctxt);
9831 if (input == NULL) {
9832 perror("malloc");
9833 xmlFree(ctxt);
9834 return;
9835 }
9836
9837 xmlClearParserCtxt(ctxt);
9838 if (filename != NULL)
9839 input->filename = xmlMemStrdup(filename);
9840 input->base = buffer;
9841 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009842 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009843 inputPush(ctxt, input);
9844}
9845
9846/**
9847 * xmlSAXUserParseFile:
9848 * @sax: a SAX handler
9849 * @user_data: The user data returned on SAX callbacks
9850 * @filename: a file name
9851 *
9852 * parse an XML file and call the given SAX handler routines.
9853 * Automatic support for ZLIB/Compress compressed document is provided
9854 *
9855 * Returns 0 in case of success or a error number otherwise
9856 */
9857int
9858xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9859 const char *filename) {
9860 int ret = 0;
9861 xmlParserCtxtPtr ctxt;
9862
9863 ctxt = xmlCreateFileParserCtxt(filename);
9864 if (ctxt == NULL) return -1;
9865 if (ctxt->sax != &xmlDefaultSAXHandler)
9866 xmlFree(ctxt->sax);
9867 ctxt->sax = sax;
9868 if (user_data != NULL)
9869 ctxt->userData = user_data;
9870
9871 xmlParseDocument(ctxt);
9872
9873 if (ctxt->wellFormed)
9874 ret = 0;
9875 else {
9876 if (ctxt->errNo != 0)
9877 ret = ctxt->errNo;
9878 else
9879 ret = -1;
9880 }
9881 if (sax != NULL)
9882 ctxt->sax = NULL;
9883 xmlFreeParserCtxt(ctxt);
9884
9885 return ret;
9886}
9887
9888/************************************************************************
9889 * *
9890 * Front ends when parsing from memory *
9891 * *
9892 ************************************************************************/
9893
9894/**
9895 * xmlCreateMemoryParserCtxt:
9896 * @buffer: a pointer to a char array
9897 * @size: the size of the array
9898 *
9899 * Create a parser context for an XML in-memory document.
9900 *
9901 * Returns the new parser context or NULL
9902 */
9903xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009904xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009905 xmlParserCtxtPtr ctxt;
9906 xmlParserInputPtr input;
9907 xmlParserInputBufferPtr buf;
9908
9909 if (buffer == NULL)
9910 return(NULL);
9911 if (size <= 0)
9912 return(NULL);
9913
9914 ctxt = xmlNewParserCtxt();
9915 if (ctxt == NULL)
9916 return(NULL);
9917
9918 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9919 if (buf == NULL) return(NULL);
9920
9921 input = xmlNewInputStream(ctxt);
9922 if (input == NULL) {
9923 xmlFreeParserCtxt(ctxt);
9924 return(NULL);
9925 }
9926
9927 input->filename = NULL;
9928 input->buf = buf;
9929 input->base = input->buf->buffer->content;
9930 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009931 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009932
9933 inputPush(ctxt, input);
9934 return(ctxt);
9935}
9936
9937/**
9938 * xmlSAXParseMemory:
9939 * @sax: the SAX handler block
9940 * @buffer: an pointer to a char array
9941 * @size: the size of the array
9942 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9943 * documents
9944 *
9945 * parse an XML in-memory block and use the given SAX function block
9946 * to handle the parsing callback. If sax is NULL, fallback to the default
9947 * DOM tree building routines.
9948 *
9949 * Returns the resulting document tree
9950 */
9951xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +00009952xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
9953 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +00009954 xmlDocPtr ret;
9955 xmlParserCtxtPtr ctxt;
9956
9957 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9958 if (ctxt == NULL) return(NULL);
9959 if (sax != NULL) {
9960 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009961 }
9962
9963 xmlParseDocument(ctxt);
9964
9965 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9966 else {
9967 ret = NULL;
9968 xmlFreeDoc(ctxt->myDoc);
9969 ctxt->myDoc = NULL;
9970 }
9971 if (sax != NULL)
9972 ctxt->sax = NULL;
9973 xmlFreeParserCtxt(ctxt);
9974
9975 return(ret);
9976}
9977
9978/**
9979 * xmlParseMemory:
9980 * @buffer: an pointer to a char array
9981 * @size: the size of the array
9982 *
9983 * parse an XML in-memory block and build a tree.
9984 *
9985 * Returns the resulting document tree
9986 */
9987
Daniel Veillard50822cb2001-07-26 20:05:51 +00009988xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009989 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9990}
9991
9992/**
9993 * xmlRecoverMemory:
9994 * @buffer: an pointer to a char array
9995 * @size: the size of the array
9996 *
9997 * parse an XML in-memory block and build a tree.
9998 * In the case the document is not Well Formed, a tree is built anyway
9999 *
10000 * Returns the resulting document tree
10001 */
10002
Daniel Veillard50822cb2001-07-26 20:05:51 +000010003xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010004 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10005}
10006
10007/**
10008 * xmlSAXUserParseMemory:
10009 * @sax: a SAX handler
10010 * @user_data: The user data returned on SAX callbacks
10011 * @buffer: an in-memory XML document input
10012 * @size: the length of the XML document in bytes
10013 *
10014 * A better SAX parsing routine.
10015 * parse an XML in-memory buffer and call the given SAX handler routines.
10016 *
10017 * Returns 0 in case of success or a error number otherwise
10018 */
10019int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010020 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010021 int ret = 0;
10022 xmlParserCtxtPtr ctxt;
10023 xmlSAXHandlerPtr oldsax = NULL;
10024
10025 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10026 if (ctxt == NULL) return -1;
10027 if (sax != NULL) {
10028 oldsax = ctxt->sax;
10029 ctxt->sax = sax;
10030 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010031 if (user_data != NULL)
10032 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010033
10034 xmlParseDocument(ctxt);
10035
10036 if (ctxt->wellFormed)
10037 ret = 0;
10038 else {
10039 if (ctxt->errNo != 0)
10040 ret = ctxt->errNo;
10041 else
10042 ret = -1;
10043 }
10044 if (sax != NULL) {
10045 ctxt->sax = oldsax;
10046 }
10047 xmlFreeParserCtxt(ctxt);
10048
10049 return ret;
10050}
10051
10052/**
10053 * xmlCreateDocParserCtxt:
10054 * @cur: a pointer to an array of xmlChar
10055 *
10056 * Creates a parser context for an XML in-memory document.
10057 *
10058 * Returns the new parser context or NULL
10059 */
10060xmlParserCtxtPtr
10061xmlCreateDocParserCtxt(xmlChar *cur) {
10062 int len;
10063
10064 if (cur == NULL)
10065 return(NULL);
10066 len = xmlStrlen(cur);
10067 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10068}
10069
10070/**
10071 * xmlSAXParseDoc:
10072 * @sax: the SAX handler block
10073 * @cur: a pointer to an array of xmlChar
10074 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10075 * documents
10076 *
10077 * parse an XML in-memory document and build a tree.
10078 * It use the given SAX function block to handle the parsing callback.
10079 * If sax is NULL, fallback to the default DOM tree building routines.
10080 *
10081 * Returns the resulting document tree
10082 */
10083
10084xmlDocPtr
10085xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10086 xmlDocPtr ret;
10087 xmlParserCtxtPtr ctxt;
10088
10089 if (cur == NULL) return(NULL);
10090
10091
10092 ctxt = xmlCreateDocParserCtxt(cur);
10093 if (ctxt == NULL) return(NULL);
10094 if (sax != NULL) {
10095 ctxt->sax = sax;
10096 ctxt->userData = NULL;
10097 }
10098
10099 xmlParseDocument(ctxt);
10100 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10101 else {
10102 ret = NULL;
10103 xmlFreeDoc(ctxt->myDoc);
10104 ctxt->myDoc = NULL;
10105 }
10106 if (sax != NULL)
10107 ctxt->sax = NULL;
10108 xmlFreeParserCtxt(ctxt);
10109
10110 return(ret);
10111}
10112
10113/**
10114 * xmlParseDoc:
10115 * @cur: a pointer to an array of xmlChar
10116 *
10117 * parse an XML in-memory document and build a tree.
10118 *
10119 * Returns the resulting document tree
10120 */
10121
10122xmlDocPtr
10123xmlParseDoc(xmlChar *cur) {
10124 return(xmlSAXParseDoc(NULL, cur, 0));
10125}
10126
10127
10128/************************************************************************
10129 * *
10130 * Miscellaneous *
10131 * *
10132 ************************************************************************/
10133
10134#ifdef LIBXML_XPATH_ENABLED
10135#include <libxml/xpath.h>
10136#endif
10137
10138static int xmlParserInitialized = 0;
10139
10140/**
10141 * xmlInitParser:
10142 *
10143 * Initialization function for the XML parser.
10144 * This is not reentrant. Call once before processing in case of
10145 * use in multithreaded programs.
10146 */
10147
10148void
10149xmlInitParser(void) {
10150 if (xmlParserInitialized) return;
10151
10152 xmlInitCharEncodingHandlers();
10153 xmlInitializePredefinedEntities();
10154 xmlDefaultSAXHandlerInit();
10155 xmlRegisterDefaultInputCallbacks();
10156 xmlRegisterDefaultOutputCallbacks();
10157#ifdef LIBXML_HTML_ENABLED
10158 htmlInitAutoClose();
10159 htmlDefaultSAXHandlerInit();
10160#endif
10161#ifdef LIBXML_XPATH_ENABLED
10162 xmlXPathInit();
10163#endif
10164 xmlParserInitialized = 1;
10165}
10166
10167/**
10168 * xmlCleanupParser:
10169 *
10170 * Cleanup function for the XML parser. It tries to reclaim all
10171 * parsing related global memory allocated for the parser processing.
10172 * It doesn't deallocate any document related memory. Calling this
10173 * function should not prevent reusing the parser.
10174 */
10175
10176void
10177xmlCleanupParser(void) {
10178 xmlParserInitialized = 0;
10179 xmlCleanupCharEncodingHandlers();
10180 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010181#ifdef LIBXML_CATALOG_ENABLED
10182 xmlCatalogCleanup();
10183#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010184}
10185