blob: 108bd7376047110c67590257e9b26caaacf7e9b7 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
Bjorn Reese70a9da52001-04-21 16:57:29 +000036#include "libxml.h"
37
Owen Taylor3473f882001-02-23 17:55:21 +000038#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '\\'
40#else
Owen Taylor3473f882001-02-23 17:55:21 +000041#define XML_DIR_SEP '/'
42#endif
43
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <stdlib.h>
45#include <string.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
56
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
83int xmlGetWarningsDefaultValue = 1;
84int xmlParserDebugEntities = 0;
85#ifdef VMS
86int xmlSubstituteEntitiesDefaultVal = 0;
87#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
88int xmlDoValidityCheckingDefaultVal = 0;
89#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
90#else
91int xmlSubstituteEntitiesDefaultValue = 0;
92int xmlDoValidityCheckingDefaultValue = 0;
93#endif
94int xmlLoadExtDtdDefaultValue = 0;
95int xmlPedanticParserDefaultValue = 0;
96int xmlKeepBlanksDefaultValue = 1;
97
98/*
99 * List of XML prefixed PI allowed by W3C specs
100 */
101
102const char *xmlW3CPIs[] = {
103 "xml-stylesheet",
104 NULL
105};
106
107/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
108void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
109xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
110 const xmlChar **str);
111
Daniel Veillard257d9102001-05-08 10:41:44 +0000112static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000113xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
114 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000115 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000116 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000117
118/************************************************************************
119 * *
120 * Parser stacks related functions and macros *
121 * *
122 ************************************************************************/
123
124xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
125 const xmlChar ** str);
126
127/*
128 * Generic function for accessing stacks in the Parser Context
129 */
130
131#define PUSH_AND_POP(scope, type, name) \
132scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
133 if (ctxt->name##Nr >= ctxt->name##Max) { \
134 ctxt->name##Max *= 2; \
135 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
136 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
137 if (ctxt->name##Tab == NULL) { \
138 xmlGenericError(xmlGenericErrorContext, \
139 "realloc failed !\n"); \
140 return(0); \
141 } \
142 } \
143 ctxt->name##Tab[ctxt->name##Nr] = value; \
144 ctxt->name = value; \
145 return(ctxt->name##Nr++); \
146} \
147scope type name##Pop(xmlParserCtxtPtr ctxt) { \
148 type ret; \
149 if (ctxt->name##Nr <= 0) return(0); \
150 ctxt->name##Nr--; \
151 if (ctxt->name##Nr > 0) \
152 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
153 else \
154 ctxt->name = NULL; \
155 ret = ctxt->name##Tab[ctxt->name##Nr]; \
156 ctxt->name##Tab[ctxt->name##Nr] = 0; \
157 return(ret); \
158} \
159
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000160/**
161 * inputPop:
162 * @ctxt: an XML parser context
163 *
164 * Pops the top parser input from the input stack
165 *
166 * Returns the input just removed
167 */
168/**
169 * inputPush:
170 * @ctxt: an XML parser context
171 * @input: the parser input
172 *
173 * Pushes a new parser input on top of the input stack
174 */
175/**
176 * namePop:
177 * @ctxt: an XML parser context
178 *
179 * Pops the top element name from the name stack
180 *
181 * Returns the name just removed
182 */
183/**
184 * namePush:
185 * @ctxt: an XML parser context
186 * @name: the element name
187 *
188 * Pushes a new element name on top of the name stack
189 */
190/**
191 * nodePop:
192 * @ctxt: an XML parser context
193 *
194 * Pops the top element node from the node stack
195 *
196 * Returns the node just removed
197 */
198/**
199 * nodePush:
200 * @ctxt: an XML parser context
201 * @node: the element node
202 *
203 * Pushes a new element node on top of the node stack
204 */
Owen Taylor3473f882001-02-23 17:55:21 +0000205/*
206 * Those macros actually generate the functions
207 */
208PUSH_AND_POP(extern, xmlParserInputPtr, input)
209PUSH_AND_POP(extern, xmlNodePtr, node)
210PUSH_AND_POP(extern, xmlChar*, name)
211
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000212static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000213 if (ctxt->spaceNr >= ctxt->spaceMax) {
214 ctxt->spaceMax *= 2;
215 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
216 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
217 if (ctxt->spaceTab == NULL) {
218 xmlGenericError(xmlGenericErrorContext,
219 "realloc failed !\n");
220 return(0);
221 }
222 }
223 ctxt->spaceTab[ctxt->spaceNr] = val;
224 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
225 return(ctxt->spaceNr++);
226}
227
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000228static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000229 int ret;
230 if (ctxt->spaceNr <= 0) return(0);
231 ctxt->spaceNr--;
232 if (ctxt->spaceNr > 0)
233 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
234 else
235 ctxt->space = NULL;
236 ret = ctxt->spaceTab[ctxt->spaceNr];
237 ctxt->spaceTab[ctxt->spaceNr] = -1;
238 return(ret);
239}
240
241/*
242 * Macros for accessing the content. Those should be used only by the parser,
243 * and not exported.
244 *
245 * Dirty macros, i.e. one often need to make assumption on the context to
246 * use them
247 *
248 * CUR_PTR return the current pointer to the xmlChar to be parsed.
249 * To be used with extreme caution since operations consuming
250 * characters may move the input buffer to a different location !
251 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
252 * This should be used internally by the parser
253 * only to compare to ASCII values otherwise it would break when
254 * running with UTF-8 encoding.
255 * RAW same as CUR but in the input buffer, bypass any token
256 * extraction that may have been done
257 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
258 * to compare on ASCII based substring.
259 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
260 * strings within the parser.
261 *
262 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
263 *
264 * NEXT Skip to the next character, this does the proper decoding
265 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
266 * NEXTL(l) Skip l xmlChars in the input buffer
267 * CUR_CHAR(l) returns the current unicode character (int), set l
268 * to the number of xmlChars used for the encoding [0-5].
269 * CUR_SCHAR same but operate on a string instead of the context
270 * COPY_BUF copy the current unicode char to the target buffer, increment
271 * the index
272 * GROW, SHRINK handling of input buffers
273 */
274
275#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
276#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
277#define NXT(val) ctxt->input->cur[(val)]
278#define CUR_PTR ctxt->input->cur
279
280#define SKIP(val) do { \
281 ctxt->nbChars += (val),ctxt->input->cur += (val); \
282 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000283 if ((*ctxt->input->cur == 0) && \
284 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
285 xmlPopInput(ctxt); \
286 } while (0)
287
Daniel Veillard48b2f892001-02-25 16:11:03 +0000288#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000289 xmlParserInputShrink(ctxt->input); \
290 if ((*ctxt->input->cur == 0) && \
291 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
292 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000293 }
Owen Taylor3473f882001-02-23 17:55:21 +0000294
Daniel Veillard48b2f892001-02-25 16:11:03 +0000295#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000296 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
297 if ((*ctxt->input->cur == 0) && \
298 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
299 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000300 }
Owen Taylor3473f882001-02-23 17:55:21 +0000301
302#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
303
304#define NEXT xmlNextChar(ctxt)
305
Daniel Veillard21a0f912001-02-25 19:54:14 +0000306#define NEXT1 { \
307 ctxt->input->cur++; \
308 ctxt->nbChars++; \
309 if (*ctxt->input->cur == 0) \
310 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
311 }
312
Owen Taylor3473f882001-02-23 17:55:21 +0000313#define NEXTL(l) do { \
314 if (*(ctxt->input->cur) == '\n') { \
315 ctxt->input->line++; ctxt->input->col = 1; \
316 } else ctxt->input->col++; \
317 ctxt->token = 0; ctxt->input->cur += l; \
318 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000319 } while (0)
320
321#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
322#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
323
324#define COPY_BUF(l,b,i,v) \
325 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000326 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000327
328/**
329 * xmlSkipBlankChars:
330 * @ctxt: the XML parser context
331 *
332 * skip all blanks character found at that point in the input streams.
333 * It pops up finished entities in the process if allowable at that point.
334 *
335 * Returns the number of space chars skipped
336 */
337
338int
339xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000340 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000341
Daniel Veillard02141ea2001-04-30 11:46:40 +0000342 if (ctxt->token != 0) {
343 if (!IS_BLANK(ctxt->token))
344 return(0);
345 ctxt->token = 0;
346 res++;
347 }
Owen Taylor3473f882001-02-23 17:55:21 +0000348 /*
349 * It's Okay to use CUR/NEXT here since all the blanks are on
350 * the ASCII range.
351 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000352 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
353 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000354 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000355 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000356 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000357 cur = ctxt->input->cur;
358 while (IS_BLANK(*cur)) {
359 if (*cur == '\n') {
360 ctxt->input->line++; ctxt->input->col = 1;
361 }
362 cur++;
363 res++;
364 if (*cur == 0) {
365 ctxt->input->cur = cur;
366 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
367 cur = ctxt->input->cur;
368 }
369 }
370 ctxt->input->cur = cur;
371 } else {
372 int cur;
373 do {
374 cur = CUR;
375 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
376 NEXT;
377 cur = CUR;
378 res++;
379 }
380 while ((cur == 0) && (ctxt->inputNr > 1) &&
381 (ctxt->instate != XML_PARSER_COMMENT)) {
382 xmlPopInput(ctxt);
383 cur = CUR;
384 }
385 /*
386 * Need to handle support of entities branching here
387 */
388 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
389 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
390 }
Owen Taylor3473f882001-02-23 17:55:21 +0000391 return(res);
392}
393
394/************************************************************************
395 * *
396 * Commodity functions to handle entities *
397 * *
398 ************************************************************************/
399
400/**
401 * xmlPopInput:
402 * @ctxt: an XML parser context
403 *
404 * xmlPopInput: the current input pointed by ctxt->input came to an end
405 * pop it and return the next char.
406 *
407 * Returns the current xmlChar in the parser context
408 */
409xmlChar
410xmlPopInput(xmlParserCtxtPtr ctxt) {
411 if (ctxt->inputNr == 1) return(0); /* End of main Input */
412 if (xmlParserDebugEntities)
413 xmlGenericError(xmlGenericErrorContext,
414 "Popping input %d\n", ctxt->inputNr);
415 xmlFreeInputStream(inputPop(ctxt));
416 if ((*ctxt->input->cur == 0) &&
417 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
418 return(xmlPopInput(ctxt));
419 return(CUR);
420}
421
422/**
423 * xmlPushInput:
424 * @ctxt: an XML parser context
425 * @input: an XML parser input fragment (entity, XML fragment ...).
426 *
427 * xmlPushInput: switch to a new input stream which is stacked on top
428 * of the previous one(s).
429 */
430void
431xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
432 if (input == NULL) return;
433
434 if (xmlParserDebugEntities) {
435 if ((ctxt->input != NULL) && (ctxt->input->filename))
436 xmlGenericError(xmlGenericErrorContext,
437 "%s(%d): ", ctxt->input->filename,
438 ctxt->input->line);
439 xmlGenericError(xmlGenericErrorContext,
440 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
441 }
442 inputPush(ctxt, input);
443 GROW;
444}
445
446/**
447 * xmlParseCharRef:
448 * @ctxt: an XML parser context
449 *
450 * parse Reference declarations
451 *
452 * [66] CharRef ::= '&#' [0-9]+ ';' |
453 * '&#x' [0-9a-fA-F]+ ';'
454 *
455 * [ WFC: Legal Character ]
456 * Characters referred to using character references must match the
457 * production for Char.
458 *
459 * Returns the value parsed (as an int), 0 in case of error
460 */
461int
462xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000463 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000464 int count = 0;
465
466 if (ctxt->token != 0) {
467 val = ctxt->token;
468 ctxt->token = 0;
469 return(val);
470 }
471 /*
472 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
473 */
474 if ((RAW == '&') && (NXT(1) == '#') &&
475 (NXT(2) == 'x')) {
476 SKIP(3);
477 GROW;
478 while (RAW != ';') { /* loop blocked by count */
479 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
480 val = val * 16 + (CUR - '0');
481 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
482 val = val * 16 + (CUR - 'a') + 10;
483 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
484 val = val * 16 + (CUR - 'A') + 10;
485 else {
486 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
487 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
488 ctxt->sax->error(ctxt->userData,
489 "xmlParseCharRef: invalid hexadecimal value\n");
490 ctxt->wellFormed = 0;
491 ctxt->disableSAX = 1;
492 val = 0;
493 break;
494 }
495 NEXT;
496 count++;
497 }
498 if (RAW == ';') {
499 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
500 ctxt->nbChars ++;
501 ctxt->input->cur++;
502 }
503 } else if ((RAW == '&') && (NXT(1) == '#')) {
504 SKIP(2);
505 GROW;
506 while (RAW != ';') { /* loop blocked by count */
507 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
508 val = val * 10 + (CUR - '0');
509 else {
510 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
511 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
512 ctxt->sax->error(ctxt->userData,
513 "xmlParseCharRef: invalid decimal value\n");
514 ctxt->wellFormed = 0;
515 ctxt->disableSAX = 1;
516 val = 0;
517 break;
518 }
519 NEXT;
520 count++;
521 }
522 if (RAW == ';') {
523 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
524 ctxt->nbChars ++;
525 ctxt->input->cur++;
526 }
527 } else {
528 ctxt->errNo = XML_ERR_INVALID_CHARREF;
529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
530 ctxt->sax->error(ctxt->userData,
531 "xmlParseCharRef: invalid value\n");
532 ctxt->wellFormed = 0;
533 ctxt->disableSAX = 1;
534 }
535
536 /*
537 * [ WFC: Legal Character ]
538 * Characters referred to using character references must match the
539 * production for Char.
540 */
541 if (IS_CHAR(val)) {
542 return(val);
543 } else {
544 ctxt->errNo = XML_ERR_INVALID_CHAR;
545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
546 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
547 val);
548 ctxt->wellFormed = 0;
549 ctxt->disableSAX = 1;
550 }
551 return(0);
552}
553
554/**
555 * xmlParseStringCharRef:
556 * @ctxt: an XML parser context
557 * @str: a pointer to an index in the string
558 *
559 * parse Reference declarations, variant parsing from a string rather
560 * than an an input flow.
561 *
562 * [66] CharRef ::= '&#' [0-9]+ ';' |
563 * '&#x' [0-9a-fA-F]+ ';'
564 *
565 * [ WFC: Legal Character ]
566 * Characters referred to using character references must match the
567 * production for Char.
568 *
569 * Returns the value parsed (as an int), 0 in case of error, str will be
570 * updated to the current value of the index
571 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000572static int
Owen Taylor3473f882001-02-23 17:55:21 +0000573xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
574 const xmlChar *ptr;
575 xmlChar cur;
576 int val = 0;
577
578 if ((str == NULL) || (*str == NULL)) return(0);
579 ptr = *str;
580 cur = *ptr;
581 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
582 ptr += 3;
583 cur = *ptr;
584 while (cur != ';') { /* Non input consuming loop */
585 if ((cur >= '0') && (cur <= '9'))
586 val = val * 16 + (cur - '0');
587 else if ((cur >= 'a') && (cur <= 'f'))
588 val = val * 16 + (cur - 'a') + 10;
589 else if ((cur >= 'A') && (cur <= 'F'))
590 val = val * 16 + (cur - 'A') + 10;
591 else {
592 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
594 ctxt->sax->error(ctxt->userData,
595 "xmlParseStringCharRef: invalid hexadecimal value\n");
596 ctxt->wellFormed = 0;
597 ctxt->disableSAX = 1;
598 val = 0;
599 break;
600 }
601 ptr++;
602 cur = *ptr;
603 }
604 if (cur == ';')
605 ptr++;
606 } else if ((cur == '&') && (ptr[1] == '#')){
607 ptr += 2;
608 cur = *ptr;
609 while (cur != ';') { /* Non input consuming loops */
610 if ((cur >= '0') && (cur <= '9'))
611 val = val * 10 + (cur - '0');
612 else {
613 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
615 ctxt->sax->error(ctxt->userData,
616 "xmlParseStringCharRef: invalid decimal value\n");
617 ctxt->wellFormed = 0;
618 ctxt->disableSAX = 1;
619 val = 0;
620 break;
621 }
622 ptr++;
623 cur = *ptr;
624 }
625 if (cur == ';')
626 ptr++;
627 } else {
628 ctxt->errNo = XML_ERR_INVALID_CHARREF;
629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
630 ctxt->sax->error(ctxt->userData,
631 "xmlParseCharRef: invalid value\n");
632 ctxt->wellFormed = 0;
633 ctxt->disableSAX = 1;
634 return(0);
635 }
636 *str = ptr;
637
638 /*
639 * [ WFC: Legal Character ]
640 * Characters referred to using character references must match the
641 * production for Char.
642 */
643 if (IS_CHAR(val)) {
644 return(val);
645 } else {
646 ctxt->errNo = XML_ERR_INVALID_CHAR;
647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
648 ctxt->sax->error(ctxt->userData,
649 "CharRef: invalid xmlChar value %d\n", val);
650 ctxt->wellFormed = 0;
651 ctxt->disableSAX = 1;
652 }
653 return(0);
654}
655
656/**
657 * xmlParserHandlePEReference:
658 * @ctxt: the parser context
659 *
660 * [69] PEReference ::= '%' Name ';'
661 *
662 * [ WFC: No Recursion ]
663 * A parsed entity must not contain a recursive
664 * reference to itself, either directly or indirectly.
665 *
666 * [ WFC: Entity Declared ]
667 * In a document without any DTD, a document with only an internal DTD
668 * subset which contains no parameter entity references, or a document
669 * with "standalone='yes'", ... ... The declaration of a parameter
670 * entity must precede any reference to it...
671 *
672 * [ VC: Entity Declared ]
673 * In a document with an external subset or external parameter entities
674 * with "standalone='no'", ... ... The declaration of a parameter entity
675 * must precede any reference to it...
676 *
677 * [ WFC: In DTD ]
678 * Parameter-entity references may only appear in the DTD.
679 * NOTE: misleading but this is handled.
680 *
681 * A PEReference may have been detected in the current input stream
682 * the handling is done accordingly to
683 * http://www.w3.org/TR/REC-xml#entproc
684 * i.e.
685 * - Included in literal in entity values
686 * - Included as Paraemeter Entity reference within DTDs
687 */
688void
689xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
690 xmlChar *name;
691 xmlEntityPtr entity = NULL;
692 xmlParserInputPtr input;
693
694 if (ctxt->token != 0) {
695 return;
696 }
697 if (RAW != '%') return;
698 switch(ctxt->instate) {
699 case XML_PARSER_CDATA_SECTION:
700 return;
701 case XML_PARSER_COMMENT:
702 return;
703 case XML_PARSER_START_TAG:
704 return;
705 case XML_PARSER_END_TAG:
706 return;
707 case XML_PARSER_EOF:
708 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
710 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
711 ctxt->wellFormed = 0;
712 ctxt->disableSAX = 1;
713 return;
714 case XML_PARSER_PROLOG:
715 case XML_PARSER_START:
716 case XML_PARSER_MISC:
717 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
718 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
719 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
720 ctxt->wellFormed = 0;
721 ctxt->disableSAX = 1;
722 return;
723 case XML_PARSER_ENTITY_DECL:
724 case XML_PARSER_CONTENT:
725 case XML_PARSER_ATTRIBUTE_VALUE:
726 case XML_PARSER_PI:
727 case XML_PARSER_SYSTEM_LITERAL:
728 /* we just ignore it there */
729 return;
730 case XML_PARSER_EPILOG:
731 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
733 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
734 ctxt->wellFormed = 0;
735 ctxt->disableSAX = 1;
736 return;
737 case XML_PARSER_ENTITY_VALUE:
738 /*
739 * NOTE: in the case of entity values, we don't do the
740 * substitution here since we need the literal
741 * entity value to be able to save the internal
742 * subset of the document.
743 * This will be handled by xmlStringDecodeEntities
744 */
745 return;
746 case XML_PARSER_DTD:
747 /*
748 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
749 * In the internal DTD subset, parameter-entity references
750 * can occur only where markup declarations can occur, not
751 * within markup declarations.
752 * In that case this is handled in xmlParseMarkupDecl
753 */
754 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
755 return;
756 break;
757 case XML_PARSER_IGNORE:
758 return;
759 }
760
761 NEXT;
762 name = xmlParseName(ctxt);
763 if (xmlParserDebugEntities)
764 xmlGenericError(xmlGenericErrorContext,
765 "PE Reference: %s\n", name);
766 if (name == NULL) {
767 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
769 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
770 ctxt->wellFormed = 0;
771 ctxt->disableSAX = 1;
772 } else {
773 if (RAW == ';') {
774 NEXT;
775 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
776 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
777 if (entity == NULL) {
778
779 /*
780 * [ WFC: Entity Declared ]
781 * In a document without any DTD, a document with only an
782 * internal DTD subset which contains no parameter entity
783 * references, or a document with "standalone='yes'", ...
784 * ... The declaration of a parameter entity must precede
785 * any reference to it...
786 */
787 if ((ctxt->standalone == 1) ||
788 ((ctxt->hasExternalSubset == 0) &&
789 (ctxt->hasPErefs == 0))) {
790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
791 ctxt->sax->error(ctxt->userData,
792 "PEReference: %%%s; not found\n", name);
793 ctxt->wellFormed = 0;
794 ctxt->disableSAX = 1;
795 } else {
796 /*
797 * [ VC: Entity Declared ]
798 * In a document with an external subset or external
799 * parameter entities with "standalone='no'", ...
800 * ... The declaration of a parameter entity must precede
801 * any reference to it...
802 */
803 if ((!ctxt->disableSAX) &&
804 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
805 ctxt->vctxt.error(ctxt->vctxt.userData,
806 "PEReference: %%%s; not found\n", name);
807 } else if ((!ctxt->disableSAX) &&
808 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
809 ctxt->sax->warning(ctxt->userData,
810 "PEReference: %%%s; not found\n", name);
811 ctxt->valid = 0;
812 }
813 } else {
814 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
815 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000816 xmlChar start[4];
817 xmlCharEncoding enc;
818
Owen Taylor3473f882001-02-23 17:55:21 +0000819 /*
820 * handle the extra spaces added before and after
821 * c.f. http://www.w3.org/TR/REC-xml#as-PE
822 * this is done independantly.
823 */
824 input = xmlNewEntityInputStream(ctxt, entity);
825 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000826
827 /*
828 * Get the 4 first bytes and decode the charset
829 * if enc != XML_CHAR_ENCODING_NONE
830 * plug some encoding conversion routines.
831 */
832 GROW
833 start[0] = RAW;
834 start[1] = NXT(1);
835 start[2] = NXT(2);
836 start[3] = NXT(3);
837 enc = xmlDetectCharEncoding(start, 4);
838 if (enc != XML_CHAR_ENCODING_NONE) {
839 xmlSwitchEncoding(ctxt, enc);
840 }
841
Owen Taylor3473f882001-02-23 17:55:21 +0000842 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
843 (RAW == '<') && (NXT(1) == '?') &&
844 (NXT(2) == 'x') && (NXT(3) == 'm') &&
845 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
846 xmlParseTextDecl(ctxt);
847 }
848 if (ctxt->token == 0)
849 ctxt->token = ' ';
850 } else {
851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
852 ctxt->sax->error(ctxt->userData,
853 "xmlHandlePEReference: %s is not a parameter entity\n",
854 name);
855 ctxt->wellFormed = 0;
856 ctxt->disableSAX = 1;
857 }
858 }
859 } else {
860 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
862 ctxt->sax->error(ctxt->userData,
863 "xmlHandlePEReference: expecting ';'\n");
864 ctxt->wellFormed = 0;
865 ctxt->disableSAX = 1;
866 }
867 xmlFree(name);
868 }
869}
870
871/*
872 * Macro used to grow the current buffer.
873 */
874#define growBuffer(buffer) { \
875 buffer##_size *= 2; \
876 buffer = (xmlChar *) \
877 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
878 if (buffer == NULL) { \
879 perror("realloc failed"); \
880 return(NULL); \
881 } \
882}
883
884/**
885 * xmlStringDecodeEntities:
886 * @ctxt: the parser context
887 * @str: the input string
888 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
889 * @end: an end marker xmlChar, 0 if none
890 * @end2: an end marker xmlChar, 0 if none
891 * @end3: an end marker xmlChar, 0 if none
892 *
893 * Takes a entity string content and process to do the adequate subtitutions.
894 *
895 * [67] Reference ::= EntityRef | CharRef
896 *
897 * [69] PEReference ::= '%' Name ';'
898 *
899 * Returns A newly allocated string with the substitution done. The caller
900 * must deallocate it !
901 */
902xmlChar *
903xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
904 xmlChar end, xmlChar end2, xmlChar end3) {
905 xmlChar *buffer = NULL;
906 int buffer_size = 0;
907
908 xmlChar *current = NULL;
909 xmlEntityPtr ent;
910 int c,l;
911 int nbchars = 0;
912
913 if (str == NULL)
914 return(NULL);
915
916 if (ctxt->depth > 40) {
917 ctxt->errNo = XML_ERR_ENTITY_LOOP;
918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
919 ctxt->sax->error(ctxt->userData,
920 "Detected entity reference loop\n");
921 ctxt->wellFormed = 0;
922 ctxt->disableSAX = 1;
923 return(NULL);
924 }
925
926 /*
927 * allocate a translation buffer.
928 */
929 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
930 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
931 if (buffer == NULL) {
932 perror("xmlDecodeEntities: malloc failed");
933 return(NULL);
934 }
935
936 /*
937 * Ok loop until we reach one of the ending char or a size limit.
938 * we are operating on already parsed values.
939 */
940 c = CUR_SCHAR(str, l);
941 while ((c != 0) && (c != end) && /* non input consuming loop */
942 (c != end2) && (c != end3)) {
943
944 if (c == 0) break;
945 if ((c == '&') && (str[1] == '#')) {
946 int val = xmlParseStringCharRef(ctxt, &str);
947 if (val != 0) {
948 COPY_BUF(0,buffer,nbchars,val);
949 }
950 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
951 if (xmlParserDebugEntities)
952 xmlGenericError(xmlGenericErrorContext,
953 "String decoding Entity Reference: %.30s\n",
954 str);
955 ent = xmlParseStringEntityRef(ctxt, &str);
956 if ((ent != NULL) &&
957 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
958 if (ent->content != NULL) {
959 COPY_BUF(0,buffer,nbchars,ent->content[0]);
960 } else {
961 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
962 ctxt->sax->error(ctxt->userData,
963 "internal error entity has no content\n");
964 }
965 } else if ((ent != NULL) && (ent->content != NULL)) {
966 xmlChar *rep;
967
968 ctxt->depth++;
969 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
970 0, 0, 0);
971 ctxt->depth--;
972 if (rep != NULL) {
973 current = rep;
974 while (*current != 0) { /* non input consuming loop */
975 buffer[nbchars++] = *current++;
976 if (nbchars >
977 buffer_size - XML_PARSER_BUFFER_SIZE) {
978 growBuffer(buffer);
979 }
980 }
981 xmlFree(rep);
982 }
983 } else if (ent != NULL) {
984 int i = xmlStrlen(ent->name);
985 const xmlChar *cur = ent->name;
986
987 buffer[nbchars++] = '&';
988 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
989 growBuffer(buffer);
990 }
991 for (;i > 0;i--)
992 buffer[nbchars++] = *cur++;
993 buffer[nbchars++] = ';';
994 }
995 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
996 if (xmlParserDebugEntities)
997 xmlGenericError(xmlGenericErrorContext,
998 "String decoding PE Reference: %.30s\n", str);
999 ent = xmlParseStringPEReference(ctxt, &str);
1000 if (ent != NULL) {
1001 xmlChar *rep;
1002
1003 ctxt->depth++;
1004 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1005 0, 0, 0);
1006 ctxt->depth--;
1007 if (rep != NULL) {
1008 current = rep;
1009 while (*current != 0) { /* non input consuming loop */
1010 buffer[nbchars++] = *current++;
1011 if (nbchars >
1012 buffer_size - XML_PARSER_BUFFER_SIZE) {
1013 growBuffer(buffer);
1014 }
1015 }
1016 xmlFree(rep);
1017 }
1018 }
1019 } else {
1020 COPY_BUF(l,buffer,nbchars,c);
1021 str += l;
1022 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1023 growBuffer(buffer);
1024 }
1025 }
1026 c = CUR_SCHAR(str, l);
1027 }
1028 buffer[nbchars++] = 0;
1029 return(buffer);
1030}
1031
1032
1033/************************************************************************
1034 * *
1035 * Commodity functions to handle xmlChars *
1036 * *
1037 ************************************************************************/
1038
1039/**
1040 * xmlStrndup:
1041 * @cur: the input xmlChar *
1042 * @len: the len of @cur
1043 *
1044 * a strndup for array of xmlChar's
1045 *
1046 * Returns a new xmlChar * or NULL
1047 */
1048xmlChar *
1049xmlStrndup(const xmlChar *cur, int len) {
1050 xmlChar *ret;
1051
1052 if ((cur == NULL) || (len < 0)) return(NULL);
1053 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1054 if (ret == NULL) {
1055 xmlGenericError(xmlGenericErrorContext,
1056 "malloc of %ld byte failed\n",
1057 (len + 1) * (long)sizeof(xmlChar));
1058 return(NULL);
1059 }
1060 memcpy(ret, cur, len * sizeof(xmlChar));
1061 ret[len] = 0;
1062 return(ret);
1063}
1064
1065/**
1066 * xmlStrdup:
1067 * @cur: the input xmlChar *
1068 *
1069 * a strdup for array of xmlChar's. Since they are supposed to be
1070 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1071 * a termination mark of '0'.
1072 *
1073 * Returns a new xmlChar * or NULL
1074 */
1075xmlChar *
1076xmlStrdup(const xmlChar *cur) {
1077 const xmlChar *p = cur;
1078
1079 if (cur == NULL) return(NULL);
1080 while (*p != 0) p++; /* non input consuming */
1081 return(xmlStrndup(cur, p - cur));
1082}
1083
1084/**
1085 * xmlCharStrndup:
1086 * @cur: the input char *
1087 * @len: the len of @cur
1088 *
1089 * a strndup for char's to xmlChar's
1090 *
1091 * Returns a new xmlChar * or NULL
1092 */
1093
1094xmlChar *
1095xmlCharStrndup(const char *cur, int len) {
1096 int i;
1097 xmlChar *ret;
1098
1099 if ((cur == NULL) || (len < 0)) return(NULL);
1100 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1101 if (ret == NULL) {
1102 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1103 (len + 1) * (long)sizeof(xmlChar));
1104 return(NULL);
1105 }
1106 for (i = 0;i < len;i++)
1107 ret[i] = (xmlChar) cur[i];
1108 ret[len] = 0;
1109 return(ret);
1110}
1111
1112/**
1113 * xmlCharStrdup:
1114 * @cur: the input char *
1115 * @len: the len of @cur
1116 *
1117 * a strdup for char's to xmlChar's
1118 *
1119 * Returns a new xmlChar * or NULL
1120 */
1121
1122xmlChar *
1123xmlCharStrdup(const char *cur) {
1124 const char *p = cur;
1125
1126 if (cur == NULL) return(NULL);
1127 while (*p != '\0') p++; /* non input consuming */
1128 return(xmlCharStrndup(cur, p - cur));
1129}
1130
1131/**
1132 * xmlStrcmp:
1133 * @str1: the first xmlChar *
1134 * @str2: the second xmlChar *
1135 *
1136 * a strcmp for xmlChar's
1137 *
1138 * Returns the integer result of the comparison
1139 */
1140
1141int
1142xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1143 register int tmp;
1144
1145 if (str1 == str2) return(0);
1146 if (str1 == NULL) return(-1);
1147 if (str2 == NULL) return(1);
1148 do {
1149 tmp = *str1++ - *str2;
1150 if (tmp != 0) return(tmp);
1151 } while (*str2++ != 0);
1152 return 0;
1153}
1154
1155/**
1156 * xmlStrEqual:
1157 * @str1: the first xmlChar *
1158 * @str2: the second xmlChar *
1159 *
1160 * Check if both string are equal of have same content
1161 * Should be a bit more readable and faster than xmlStrEqual()
1162 *
1163 * Returns 1 if they are equal, 0 if they are different
1164 */
1165
1166int
1167xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1168 if (str1 == str2) return(1);
1169 if (str1 == NULL) return(0);
1170 if (str2 == NULL) return(0);
1171 do {
1172 if (*str1++ != *str2) return(0);
1173 } while (*str2++);
1174 return(1);
1175}
1176
1177/**
1178 * xmlStrncmp:
1179 * @str1: the first xmlChar *
1180 * @str2: the second xmlChar *
1181 * @len: the max comparison length
1182 *
1183 * a strncmp for xmlChar's
1184 *
1185 * Returns the integer result of the comparison
1186 */
1187
1188int
1189xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1190 register int tmp;
1191
1192 if (len <= 0) return(0);
1193 if (str1 == str2) return(0);
1194 if (str1 == NULL) return(-1);
1195 if (str2 == NULL) return(1);
1196 do {
1197 tmp = *str1++ - *str2;
1198 if (tmp != 0 || --len == 0) return(tmp);
1199 } while (*str2++ != 0);
1200 return 0;
1201}
1202
1203static xmlChar casemap[256] = {
1204 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1205 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1206 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1207 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1208 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1209 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1210 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1211 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1212 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1213 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1214 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1215 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1216 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1217 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1218 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1219 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1220 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1221 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1222 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1223 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1224 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1225 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1226 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1227 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1228 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1229 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1230 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1231 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1232 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1233 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1234 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1235 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1236};
1237
1238/**
1239 * xmlStrcasecmp:
1240 * @str1: the first xmlChar *
1241 * @str2: the second xmlChar *
1242 *
1243 * a strcasecmp for xmlChar's
1244 *
1245 * Returns the integer result of the comparison
1246 */
1247
1248int
1249xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1250 register int tmp;
1251
1252 if (str1 == str2) return(0);
1253 if (str1 == NULL) return(-1);
1254 if (str2 == NULL) return(1);
1255 do {
1256 tmp = casemap[*str1++] - casemap[*str2];
1257 if (tmp != 0) return(tmp);
1258 } while (*str2++ != 0);
1259 return 0;
1260}
1261
1262/**
1263 * xmlStrncasecmp:
1264 * @str1: the first xmlChar *
1265 * @str2: the second xmlChar *
1266 * @len: the max comparison length
1267 *
1268 * a strncasecmp for xmlChar's
1269 *
1270 * Returns the integer result of the comparison
1271 */
1272
1273int
1274xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1275 register int tmp;
1276
1277 if (len <= 0) return(0);
1278 if (str1 == str2) return(0);
1279 if (str1 == NULL) return(-1);
1280 if (str2 == NULL) return(1);
1281 do {
1282 tmp = casemap[*str1++] - casemap[*str2];
1283 if (tmp != 0 || --len == 0) return(tmp);
1284 } while (*str2++ != 0);
1285 return 0;
1286}
1287
1288/**
1289 * xmlStrchr:
1290 * @str: the xmlChar * array
1291 * @val: the xmlChar to search
1292 *
1293 * a strchr for xmlChar's
1294 *
1295 * Returns the xmlChar * for the first occurence or NULL.
1296 */
1297
1298const xmlChar *
1299xmlStrchr(const xmlChar *str, xmlChar val) {
1300 if (str == NULL) return(NULL);
1301 while (*str != 0) { /* non input consuming */
1302 if (*str == val) return((xmlChar *) str);
1303 str++;
1304 }
1305 return(NULL);
1306}
1307
1308/**
1309 * xmlStrstr:
1310 * @str: the xmlChar * array (haystack)
1311 * @val: the xmlChar to search (needle)
1312 *
1313 * a strstr for xmlChar's
1314 *
1315 * Returns the xmlChar * for the first occurence or NULL.
1316 */
1317
1318const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001319xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001320 int n;
1321
1322 if (str == NULL) return(NULL);
1323 if (val == NULL) return(NULL);
1324 n = xmlStrlen(val);
1325
1326 if (n == 0) return(str);
1327 while (*str != 0) { /* non input consuming */
1328 if (*str == *val) {
1329 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1330 }
1331 str++;
1332 }
1333 return(NULL);
1334}
1335
1336/**
1337 * xmlStrcasestr:
1338 * @str: the xmlChar * array (haystack)
1339 * @val: the xmlChar to search (needle)
1340 *
1341 * a case-ignoring strstr for xmlChar's
1342 *
1343 * Returns the xmlChar * for the first occurence or NULL.
1344 */
1345
1346const xmlChar *
1347xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1348 int n;
1349
1350 if (str == NULL) return(NULL);
1351 if (val == NULL) return(NULL);
1352 n = xmlStrlen(val);
1353
1354 if (n == 0) return(str);
1355 while (*str != 0) { /* non input consuming */
1356 if (casemap[*str] == casemap[*val])
1357 if (!xmlStrncasecmp(str, val, n)) return(str);
1358 str++;
1359 }
1360 return(NULL);
1361}
1362
1363/**
1364 * xmlStrsub:
1365 * @str: the xmlChar * array (haystack)
1366 * @start: the index of the first char (zero based)
1367 * @len: the length of the substring
1368 *
1369 * Extract a substring of a given string
1370 *
1371 * Returns the xmlChar * for the first occurence or NULL.
1372 */
1373
1374xmlChar *
1375xmlStrsub(const xmlChar *str, int start, int len) {
1376 int i;
1377
1378 if (str == NULL) return(NULL);
1379 if (start < 0) return(NULL);
1380 if (len < 0) return(NULL);
1381
1382 for (i = 0;i < start;i++) {
1383 if (*str == 0) return(NULL);
1384 str++;
1385 }
1386 if (*str == 0) return(NULL);
1387 return(xmlStrndup(str, len));
1388}
1389
1390/**
1391 * xmlStrlen:
1392 * @str: the xmlChar * array
1393 *
1394 * length of a xmlChar's string
1395 *
1396 * Returns the number of xmlChar contained in the ARRAY.
1397 */
1398
1399int
1400xmlStrlen(const xmlChar *str) {
1401 int len = 0;
1402
1403 if (str == NULL) return(0);
1404 while (*str != 0) { /* non input consuming */
1405 str++;
1406 len++;
1407 }
1408 return(len);
1409}
1410
1411/**
1412 * xmlStrncat:
1413 * @cur: the original xmlChar * array
1414 * @add: the xmlChar * array added
1415 * @len: the length of @add
1416 *
1417 * a strncat for array of xmlChar's, it will extend cur with the len
1418 * first bytes of @add.
1419 *
1420 * Returns a new xmlChar *, the original @cur is reallocated if needed
1421 * and should not be freed
1422 */
1423
1424xmlChar *
1425xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1426 int size;
1427 xmlChar *ret;
1428
1429 if ((add == NULL) || (len == 0))
1430 return(cur);
1431 if (cur == NULL)
1432 return(xmlStrndup(add, len));
1433
1434 size = xmlStrlen(cur);
1435 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1436 if (ret == NULL) {
1437 xmlGenericError(xmlGenericErrorContext,
1438 "xmlStrncat: realloc of %ld byte failed\n",
1439 (size + len + 1) * (long)sizeof(xmlChar));
1440 return(cur);
1441 }
1442 memcpy(&ret[size], add, len * sizeof(xmlChar));
1443 ret[size + len] = 0;
1444 return(ret);
1445}
1446
1447/**
1448 * xmlStrcat:
1449 * @cur: the original xmlChar * array
1450 * @add: the xmlChar * array added
1451 *
1452 * a strcat for array of xmlChar's. Since they are supposed to be
1453 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1454 * a termination mark of '0'.
1455 *
1456 * Returns a new xmlChar * containing the concatenated string.
1457 */
1458xmlChar *
1459xmlStrcat(xmlChar *cur, const xmlChar *add) {
1460 const xmlChar *p = add;
1461
1462 if (add == NULL) return(cur);
1463 if (cur == NULL)
1464 return(xmlStrdup(add));
1465
1466 while (*p != 0) p++; /* non input consuming */
1467 return(xmlStrncat(cur, add, p - add));
1468}
1469
1470/************************************************************************
1471 * *
1472 * Commodity functions, cleanup needed ? *
1473 * *
1474 ************************************************************************/
1475
1476/**
1477 * areBlanks:
1478 * @ctxt: an XML parser context
1479 * @str: a xmlChar *
1480 * @len: the size of @str
1481 *
1482 * Is this a sequence of blank chars that one can ignore ?
1483 *
1484 * Returns 1 if ignorable 0 otherwise.
1485 */
1486
1487static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1488 int i, ret;
1489 xmlNodePtr lastChild;
1490
Daniel Veillard2f362242001-03-02 17:36:21 +00001491 if (ctxt->keepBlanks)
1492 return(0);
1493
Owen Taylor3473f882001-02-23 17:55:21 +00001494 /*
1495 * Check for xml:space value.
1496 */
1497 if (*(ctxt->space) == 1)
1498 return(0);
1499
1500 /*
1501 * Check that the string is made of blanks
1502 */
1503 for (i = 0;i < len;i++)
1504 if (!(IS_BLANK(str[i]))) return(0);
1505
1506 /*
1507 * Look if the element is mixed content in the Dtd if available
1508 */
1509 if (ctxt->myDoc != NULL) {
1510 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1511 if (ret == 0) return(1);
1512 if (ret == 1) return(0);
1513 }
1514
1515 /*
1516 * Otherwise, heuristic :-\
1517 */
Owen Taylor3473f882001-02-23 17:55:21 +00001518 if (RAW != '<') return(0);
1519 if (ctxt->node == NULL) return(0);
1520 if ((ctxt->node->children == NULL) &&
1521 (RAW == '<') && (NXT(1) == '/')) return(0);
1522
1523 lastChild = xmlGetLastChild(ctxt->node);
1524 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001525 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1526 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001527 } else if (xmlNodeIsText(lastChild))
1528 return(0);
1529 else if ((ctxt->node->children != NULL) &&
1530 (xmlNodeIsText(ctxt->node->children)))
1531 return(0);
1532 return(1);
1533}
1534
1535/*
1536 * Forward definition for recusive behaviour.
1537 */
1538void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1539void xmlParseReference(xmlParserCtxtPtr ctxt);
1540
1541/************************************************************************
1542 * *
1543 * Extra stuff for namespace support *
1544 * Relates to http://www.w3.org/TR/WD-xml-names *
1545 * *
1546 ************************************************************************/
1547
1548/**
1549 * xmlSplitQName:
1550 * @ctxt: an XML parser context
1551 * @name: an XML parser context
1552 * @prefix: a xmlChar **
1553 *
1554 * parse an UTF8 encoded XML qualified name string
1555 *
1556 * [NS 5] QName ::= (Prefix ':')? LocalPart
1557 *
1558 * [NS 6] Prefix ::= NCName
1559 *
1560 * [NS 7] LocalPart ::= NCName
1561 *
1562 * Returns the local part, and prefix is updated
1563 * to get the Prefix if any.
1564 */
1565
1566xmlChar *
1567xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1568 xmlChar buf[XML_MAX_NAMELEN + 5];
1569 xmlChar *buffer = NULL;
1570 int len = 0;
1571 int max = XML_MAX_NAMELEN;
1572 xmlChar *ret = NULL;
1573 const xmlChar *cur = name;
1574 int c;
1575
1576 *prefix = NULL;
1577
1578#ifndef XML_XML_NAMESPACE
1579 /* xml: prefix is not really a namespace */
1580 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1581 (cur[2] == 'l') && (cur[3] == ':'))
1582 return(xmlStrdup(name));
1583#endif
1584
1585 /* nasty but valid */
1586 if (cur[0] == ':')
1587 return(xmlStrdup(name));
1588
1589 c = *cur++;
1590 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1591 buf[len++] = c;
1592 c = *cur++;
1593 }
1594 if (len >= max) {
1595 /*
1596 * Okay someone managed to make a huge name, so he's ready to pay
1597 * for the processing speed.
1598 */
1599 max = len * 2;
1600
1601 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1602 if (buffer == NULL) {
1603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1604 ctxt->sax->error(ctxt->userData,
1605 "xmlSplitQName: out of memory\n");
1606 return(NULL);
1607 }
1608 memcpy(buffer, buf, len);
1609 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1610 if (len + 10 > max) {
1611 max *= 2;
1612 buffer = (xmlChar *) xmlRealloc(buffer,
1613 max * sizeof(xmlChar));
1614 if (buffer == NULL) {
1615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1616 ctxt->sax->error(ctxt->userData,
1617 "xmlSplitQName: out of memory\n");
1618 return(NULL);
1619 }
1620 }
1621 buffer[len++] = c;
1622 c = *cur++;
1623 }
1624 buffer[len] = 0;
1625 }
1626
1627 if (buffer == NULL)
1628 ret = xmlStrndup(buf, len);
1629 else {
1630 ret = buffer;
1631 buffer = NULL;
1632 max = XML_MAX_NAMELEN;
1633 }
1634
1635
1636 if (c == ':') {
1637 c = *cur++;
1638 if (c == 0) return(ret);
1639 *prefix = ret;
1640 len = 0;
1641
1642 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1643 buf[len++] = c;
1644 c = *cur++;
1645 }
1646 if (len >= max) {
1647 /*
1648 * Okay someone managed to make a huge name, so he's ready to pay
1649 * for the processing speed.
1650 */
1651 max = len * 2;
1652
1653 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1654 if (buffer == NULL) {
1655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1656 ctxt->sax->error(ctxt->userData,
1657 "xmlSplitQName: out of memory\n");
1658 return(NULL);
1659 }
1660 memcpy(buffer, buf, len);
1661 while (c != 0) { /* tested bigname2.xml */
1662 if (len + 10 > max) {
1663 max *= 2;
1664 buffer = (xmlChar *) xmlRealloc(buffer,
1665 max * sizeof(xmlChar));
1666 if (buffer == NULL) {
1667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1668 ctxt->sax->error(ctxt->userData,
1669 "xmlSplitQName: out of memory\n");
1670 return(NULL);
1671 }
1672 }
1673 buffer[len++] = c;
1674 c = *cur++;
1675 }
1676 buffer[len] = 0;
1677 }
1678
1679 if (buffer == NULL)
1680 ret = xmlStrndup(buf, len);
1681 else {
1682 ret = buffer;
1683 }
1684 }
1685
1686 return(ret);
1687}
1688
1689/************************************************************************
1690 * *
1691 * The parser itself *
1692 * Relates to http://www.w3.org/TR/REC-xml *
1693 * *
1694 ************************************************************************/
1695
Daniel Veillard76d66f42001-05-16 21:05:17 +00001696static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001697/**
1698 * xmlParseName:
1699 * @ctxt: an XML parser context
1700 *
1701 * parse an XML name.
1702 *
1703 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1704 * CombiningChar | Extender
1705 *
1706 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1707 *
1708 * [6] Names ::= Name (S Name)*
1709 *
1710 * Returns the Name parsed or NULL
1711 */
1712
1713xmlChar *
1714xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001715 const xmlChar *in;
1716 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001717 int count = 0;
1718
1719 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001720
1721 /*
1722 * Accelerator for simple ASCII names
1723 */
1724 in = ctxt->input->cur;
1725 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1726 ((*in >= 0x41) && (*in <= 0x5A)) ||
1727 (*in == '_') || (*in == ':')) {
1728 in++;
1729 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1730 ((*in >= 0x41) && (*in <= 0x5A)) ||
1731 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001732 (*in == '_') || (*in == '-') ||
1733 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001734 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001735 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001736 count = in - ctxt->input->cur;
1737 ret = xmlStrndup(ctxt->input->cur, count);
1738 ctxt->input->cur = in;
1739 return(ret);
1740 }
1741 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001742 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001743}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001744
Daniel Veillard76d66f42001-05-16 21:05:17 +00001745static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001746xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1747 xmlChar buf[XML_MAX_NAMELEN + 5];
1748 int len = 0, l;
1749 int c;
1750 int count = 0;
1751
1752 /*
1753 * Handler for more complex cases
1754 */
1755 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001756 c = CUR_CHAR(l);
1757 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1758 (!IS_LETTER(c) && (c != '_') &&
1759 (c != ':'))) {
1760 return(NULL);
1761 }
1762
1763 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1764 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1765 (c == '.') || (c == '-') ||
1766 (c == '_') || (c == ':') ||
1767 (IS_COMBINING(c)) ||
1768 (IS_EXTENDER(c)))) {
1769 if (count++ > 100) {
1770 count = 0;
1771 GROW;
1772 }
1773 COPY_BUF(l,buf,len,c);
1774 NEXTL(l);
1775 c = CUR_CHAR(l);
1776 if (len >= XML_MAX_NAMELEN) {
1777 /*
1778 * Okay someone managed to make a huge name, so he's ready to pay
1779 * for the processing speed.
1780 */
1781 xmlChar *buffer;
1782 int max = len * 2;
1783
1784 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1785 if (buffer == NULL) {
1786 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1787 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001788 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001789 return(NULL);
1790 }
1791 memcpy(buffer, buf, len);
1792 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1793 (c == '.') || (c == '-') ||
1794 (c == '_') || (c == ':') ||
1795 (IS_COMBINING(c)) ||
1796 (IS_EXTENDER(c))) {
1797 if (count++ > 100) {
1798 count = 0;
1799 GROW;
1800 }
1801 if (len + 10 > max) {
1802 max *= 2;
1803 buffer = (xmlChar *) xmlRealloc(buffer,
1804 max * sizeof(xmlChar));
1805 if (buffer == NULL) {
1806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1807 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001808 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001809 return(NULL);
1810 }
1811 }
1812 COPY_BUF(l,buffer,len,c);
1813 NEXTL(l);
1814 c = CUR_CHAR(l);
1815 }
1816 buffer[len] = 0;
1817 return(buffer);
1818 }
1819 }
1820 return(xmlStrndup(buf, len));
1821}
1822
1823/**
1824 * xmlParseStringName:
1825 * @ctxt: an XML parser context
1826 * @str: a pointer to the string pointer (IN/OUT)
1827 *
1828 * parse an XML name.
1829 *
1830 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1831 * CombiningChar | Extender
1832 *
1833 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1834 *
1835 * [6] Names ::= Name (S Name)*
1836 *
1837 * Returns the Name parsed or NULL. The str pointer
1838 * is updated to the current location in the string.
1839 */
1840
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001841static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001842xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1843 xmlChar buf[XML_MAX_NAMELEN + 5];
1844 const xmlChar *cur = *str;
1845 int len = 0, l;
1846 int c;
1847
1848 c = CUR_SCHAR(cur, l);
1849 if (!IS_LETTER(c) && (c != '_') &&
1850 (c != ':')) {
1851 return(NULL);
1852 }
1853
1854 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1855 (c == '.') || (c == '-') ||
1856 (c == '_') || (c == ':') ||
1857 (IS_COMBINING(c)) ||
1858 (IS_EXTENDER(c))) {
1859 COPY_BUF(l,buf,len,c);
1860 cur += l;
1861 c = CUR_SCHAR(cur, l);
1862 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1863 /*
1864 * Okay someone managed to make a huge name, so he's ready to pay
1865 * for the processing speed.
1866 */
1867 xmlChar *buffer;
1868 int max = len * 2;
1869
1870 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1871 if (buffer == NULL) {
1872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1873 ctxt->sax->error(ctxt->userData,
1874 "xmlParseStringName: out of memory\n");
1875 return(NULL);
1876 }
1877 memcpy(buffer, buf, len);
1878 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1879 (c == '.') || (c == '-') ||
1880 (c == '_') || (c == ':') ||
1881 (IS_COMBINING(c)) ||
1882 (IS_EXTENDER(c))) {
1883 if (len + 10 > max) {
1884 max *= 2;
1885 buffer = (xmlChar *) xmlRealloc(buffer,
1886 max * sizeof(xmlChar));
1887 if (buffer == NULL) {
1888 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1889 ctxt->sax->error(ctxt->userData,
1890 "xmlParseStringName: out of memory\n");
1891 return(NULL);
1892 }
1893 }
1894 COPY_BUF(l,buffer,len,c);
1895 cur += l;
1896 c = CUR_SCHAR(cur, l);
1897 }
1898 buffer[len] = 0;
1899 *str = cur;
1900 return(buffer);
1901 }
1902 }
1903 *str = cur;
1904 return(xmlStrndup(buf, len));
1905}
1906
1907/**
1908 * xmlParseNmtoken:
1909 * @ctxt: an XML parser context
1910 *
1911 * parse an XML Nmtoken.
1912 *
1913 * [7] Nmtoken ::= (NameChar)+
1914 *
1915 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1916 *
1917 * Returns the Nmtoken parsed or NULL
1918 */
1919
1920xmlChar *
1921xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1922 xmlChar buf[XML_MAX_NAMELEN + 5];
1923 int len = 0, l;
1924 int c;
1925 int count = 0;
1926
1927 GROW;
1928 c = CUR_CHAR(l);
1929
1930 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1931 (c == '.') || (c == '-') ||
1932 (c == '_') || (c == ':') ||
1933 (IS_COMBINING(c)) ||
1934 (IS_EXTENDER(c))) {
1935 if (count++ > 100) {
1936 count = 0;
1937 GROW;
1938 }
1939 COPY_BUF(l,buf,len,c);
1940 NEXTL(l);
1941 c = CUR_CHAR(l);
1942 if (len >= XML_MAX_NAMELEN) {
1943 /*
1944 * Okay someone managed to make a huge token, so he's ready to pay
1945 * for the processing speed.
1946 */
1947 xmlChar *buffer;
1948 int max = len * 2;
1949
1950 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1951 if (buffer == NULL) {
1952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1953 ctxt->sax->error(ctxt->userData,
1954 "xmlParseNmtoken: out of memory\n");
1955 return(NULL);
1956 }
1957 memcpy(buffer, buf, len);
1958 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1959 (c == '.') || (c == '-') ||
1960 (c == '_') || (c == ':') ||
1961 (IS_COMBINING(c)) ||
1962 (IS_EXTENDER(c))) {
1963 if (count++ > 100) {
1964 count = 0;
1965 GROW;
1966 }
1967 if (len + 10 > max) {
1968 max *= 2;
1969 buffer = (xmlChar *) xmlRealloc(buffer,
1970 max * sizeof(xmlChar));
1971 if (buffer == NULL) {
1972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1973 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001974 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001975 return(NULL);
1976 }
1977 }
1978 COPY_BUF(l,buffer,len,c);
1979 NEXTL(l);
1980 c = CUR_CHAR(l);
1981 }
1982 buffer[len] = 0;
1983 return(buffer);
1984 }
1985 }
1986 if (len == 0)
1987 return(NULL);
1988 return(xmlStrndup(buf, len));
1989}
1990
1991/**
1992 * xmlParseEntityValue:
1993 * @ctxt: an XML parser context
1994 * @orig: if non-NULL store a copy of the original entity value
1995 *
1996 * parse a value for ENTITY declarations
1997 *
1998 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1999 * "'" ([^%&'] | PEReference | Reference)* "'"
2000 *
2001 * Returns the EntityValue parsed with reference substitued or NULL
2002 */
2003
2004xmlChar *
2005xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2006 xmlChar *buf = NULL;
2007 int len = 0;
2008 int size = XML_PARSER_BUFFER_SIZE;
2009 int c, l;
2010 xmlChar stop;
2011 xmlChar *ret = NULL;
2012 const xmlChar *cur = NULL;
2013 xmlParserInputPtr input;
2014
2015 if (RAW == '"') stop = '"';
2016 else if (RAW == '\'') stop = '\'';
2017 else {
2018 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2020 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2021 ctxt->wellFormed = 0;
2022 ctxt->disableSAX = 1;
2023 return(NULL);
2024 }
2025 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2026 if (buf == NULL) {
2027 xmlGenericError(xmlGenericErrorContext,
2028 "malloc of %d byte failed\n", size);
2029 return(NULL);
2030 }
2031
2032 /*
2033 * The content of the entity definition is copied in a buffer.
2034 */
2035
2036 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2037 input = ctxt->input;
2038 GROW;
2039 NEXT;
2040 c = CUR_CHAR(l);
2041 /*
2042 * NOTE: 4.4.5 Included in Literal
2043 * When a parameter entity reference appears in a literal entity
2044 * value, ... a single or double quote character in the replacement
2045 * text is always treated as a normal data character and will not
2046 * terminate the literal.
2047 * In practice it means we stop the loop only when back at parsing
2048 * the initial entity and the quote is found
2049 */
2050 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2051 (ctxt->input != input))) {
2052 if (len + 5 >= size) {
2053 size *= 2;
2054 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2055 if (buf == NULL) {
2056 xmlGenericError(xmlGenericErrorContext,
2057 "realloc of %d byte failed\n", size);
2058 return(NULL);
2059 }
2060 }
2061 COPY_BUF(l,buf,len,c);
2062 NEXTL(l);
2063 /*
2064 * Pop-up of finished entities.
2065 */
2066 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2067 xmlPopInput(ctxt);
2068
2069 GROW;
2070 c = CUR_CHAR(l);
2071 if (c == 0) {
2072 GROW;
2073 c = CUR_CHAR(l);
2074 }
2075 }
2076 buf[len] = 0;
2077
2078 /*
2079 * Raise problem w.r.t. '&' and '%' being used in non-entities
2080 * reference constructs. Note Charref will be handled in
2081 * xmlStringDecodeEntities()
2082 */
2083 cur = buf;
2084 while (*cur != 0) { /* non input consuming */
2085 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2086 xmlChar *name;
2087 xmlChar tmp = *cur;
2088
2089 cur++;
2090 name = xmlParseStringName(ctxt, &cur);
2091 if ((name == NULL) || (*cur != ';')) {
2092 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2094 ctxt->sax->error(ctxt->userData,
2095 "EntityValue: '%c' forbidden except for entities references\n",
2096 tmp);
2097 ctxt->wellFormed = 0;
2098 ctxt->disableSAX = 1;
2099 }
2100 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2101 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2103 ctxt->sax->error(ctxt->userData,
2104 "EntityValue: PEReferences forbidden in internal subset\n",
2105 tmp);
2106 ctxt->wellFormed = 0;
2107 ctxt->disableSAX = 1;
2108 }
2109 if (name != NULL)
2110 xmlFree(name);
2111 }
2112 cur++;
2113 }
2114
2115 /*
2116 * Then PEReference entities are substituted.
2117 */
2118 if (c != stop) {
2119 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2120 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2121 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2122 ctxt->wellFormed = 0;
2123 ctxt->disableSAX = 1;
2124 xmlFree(buf);
2125 } else {
2126 NEXT;
2127 /*
2128 * NOTE: 4.4.7 Bypassed
2129 * When a general entity reference appears in the EntityValue in
2130 * an entity declaration, it is bypassed and left as is.
2131 * so XML_SUBSTITUTE_REF is not set here.
2132 */
2133 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2134 0, 0, 0);
2135 if (orig != NULL)
2136 *orig = buf;
2137 else
2138 xmlFree(buf);
2139 }
2140
2141 return(ret);
2142}
2143
2144/**
2145 * xmlParseAttValue:
2146 * @ctxt: an XML parser context
2147 *
2148 * parse a value for an attribute
2149 * Note: the parser won't do substitution of entities here, this
2150 * will be handled later in xmlStringGetNodeList
2151 *
2152 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2153 * "'" ([^<&'] | Reference)* "'"
2154 *
2155 * 3.3.3 Attribute-Value Normalization:
2156 * Before the value of an attribute is passed to the application or
2157 * checked for validity, the XML processor must normalize it as follows:
2158 * - a character reference is processed by appending the referenced
2159 * character to the attribute value
2160 * - an entity reference is processed by recursively processing the
2161 * replacement text of the entity
2162 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2163 * appending #x20 to the normalized value, except that only a single
2164 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2165 * parsed entity or the literal entity value of an internal parsed entity
2166 * - other characters are processed by appending them to the normalized value
2167 * If the declared value is not CDATA, then the XML processor must further
2168 * process the normalized attribute value by discarding any leading and
2169 * trailing space (#x20) characters, and by replacing sequences of space
2170 * (#x20) characters by a single space (#x20) character.
2171 * All attributes for which no declaration has been read should be treated
2172 * by a non-validating parser as if declared CDATA.
2173 *
2174 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2175 */
2176
2177xmlChar *
2178xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2179 xmlChar limit = 0;
2180 xmlChar *buf = NULL;
2181 int len = 0;
2182 int buf_size = 0;
2183 int c, l;
2184 xmlChar *current = NULL;
2185 xmlEntityPtr ent;
2186
2187
2188 SHRINK;
2189 if (NXT(0) == '"') {
2190 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2191 limit = '"';
2192 NEXT;
2193 } else if (NXT(0) == '\'') {
2194 limit = '\'';
2195 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2196 NEXT;
2197 } else {
2198 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2200 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2201 ctxt->wellFormed = 0;
2202 ctxt->disableSAX = 1;
2203 return(NULL);
2204 }
2205
2206 /*
2207 * allocate a translation buffer.
2208 */
2209 buf_size = XML_PARSER_BUFFER_SIZE;
2210 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2211 if (buf == NULL) {
2212 perror("xmlParseAttValue: malloc failed");
2213 return(NULL);
2214 }
2215
2216 /*
2217 * Ok loop until we reach one of the ending char or a size limit.
2218 */
2219 c = CUR_CHAR(l);
2220 while (((NXT(0) != limit) && /* checked */
2221 (c != '<')) || (ctxt->token != 0)) {
2222 if (c == 0) break;
2223 if (ctxt->token == '&') {
2224 /*
2225 * The reparsing will be done in xmlStringGetNodeList()
2226 * called by the attribute() function in SAX.c
2227 */
2228 static xmlChar buffer[6] = "&#38;";
2229
2230 if (len > buf_size - 10) {
2231 growBuffer(buf);
2232 }
2233 current = &buffer[0];
2234 while (*current != 0) { /* non input consuming */
2235 buf[len++] = *current++;
2236 }
2237 ctxt->token = 0;
2238 } else if (c == '&') {
2239 if (NXT(1) == '#') {
2240 int val = xmlParseCharRef(ctxt);
2241 if (val == '&') {
2242 /*
2243 * The reparsing will be done in xmlStringGetNodeList()
2244 * called by the attribute() function in SAX.c
2245 */
2246 static xmlChar buffer[6] = "&#38;";
2247
2248 if (len > buf_size - 10) {
2249 growBuffer(buf);
2250 }
2251 current = &buffer[0];
2252 while (*current != 0) { /* non input consuming */
2253 buf[len++] = *current++;
2254 }
2255 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002256 if (len > buf_size - 10) {
2257 growBuffer(buf);
2258 }
Owen Taylor3473f882001-02-23 17:55:21 +00002259 len += xmlCopyChar(0, &buf[len], val);
2260 }
2261 } else {
2262 ent = xmlParseEntityRef(ctxt);
2263 if ((ent != NULL) &&
2264 (ctxt->replaceEntities != 0)) {
2265 xmlChar *rep;
2266
2267 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2268 rep = xmlStringDecodeEntities(ctxt, ent->content,
2269 XML_SUBSTITUTE_REF, 0, 0, 0);
2270 if (rep != NULL) {
2271 current = rep;
2272 while (*current != 0) { /* non input consuming */
2273 buf[len++] = *current++;
2274 if (len > buf_size - 10) {
2275 growBuffer(buf);
2276 }
2277 }
2278 xmlFree(rep);
2279 }
2280 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002281 if (len > buf_size - 10) {
2282 growBuffer(buf);
2283 }
Owen Taylor3473f882001-02-23 17:55:21 +00002284 if (ent->content != NULL)
2285 buf[len++] = ent->content[0];
2286 }
2287 } else if (ent != NULL) {
2288 int i = xmlStrlen(ent->name);
2289 const xmlChar *cur = ent->name;
2290
2291 /*
2292 * This may look absurd but is needed to detect
2293 * entities problems
2294 */
2295 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2296 (ent->content != NULL)) {
2297 xmlChar *rep;
2298 rep = xmlStringDecodeEntities(ctxt, ent->content,
2299 XML_SUBSTITUTE_REF, 0, 0, 0);
2300 if (rep != NULL)
2301 xmlFree(rep);
2302 }
2303
2304 /*
2305 * Just output the reference
2306 */
2307 buf[len++] = '&';
2308 if (len > buf_size - i - 10) {
2309 growBuffer(buf);
2310 }
2311 for (;i > 0;i--)
2312 buf[len++] = *cur++;
2313 buf[len++] = ';';
2314 }
2315 }
2316 } else {
2317 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2318 COPY_BUF(l,buf,len,0x20);
2319 if (len > buf_size - 10) {
2320 growBuffer(buf);
2321 }
2322 } else {
2323 COPY_BUF(l,buf,len,c);
2324 if (len > buf_size - 10) {
2325 growBuffer(buf);
2326 }
2327 }
2328 NEXTL(l);
2329 }
2330 GROW;
2331 c = CUR_CHAR(l);
2332 }
2333 buf[len++] = 0;
2334 if (RAW == '<') {
2335 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2337 ctxt->sax->error(ctxt->userData,
2338 "Unescaped '<' not allowed in attributes values\n");
2339 ctxt->wellFormed = 0;
2340 ctxt->disableSAX = 1;
2341 } else if (RAW != limit) {
2342 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2344 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2345 ctxt->wellFormed = 0;
2346 ctxt->disableSAX = 1;
2347 } else
2348 NEXT;
2349 return(buf);
2350}
2351
2352/**
2353 * xmlParseSystemLiteral:
2354 * @ctxt: an XML parser context
2355 *
2356 * parse an XML Literal
2357 *
2358 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2359 *
2360 * Returns the SystemLiteral parsed or NULL
2361 */
2362
2363xmlChar *
2364xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2365 xmlChar *buf = NULL;
2366 int len = 0;
2367 int size = XML_PARSER_BUFFER_SIZE;
2368 int cur, l;
2369 xmlChar stop;
2370 int state = ctxt->instate;
2371 int count = 0;
2372
2373 SHRINK;
2374 if (RAW == '"') {
2375 NEXT;
2376 stop = '"';
2377 } else if (RAW == '\'') {
2378 NEXT;
2379 stop = '\'';
2380 } else {
2381 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2383 ctxt->sax->error(ctxt->userData,
2384 "SystemLiteral \" or ' expected\n");
2385 ctxt->wellFormed = 0;
2386 ctxt->disableSAX = 1;
2387 return(NULL);
2388 }
2389
2390 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2391 if (buf == NULL) {
2392 xmlGenericError(xmlGenericErrorContext,
2393 "malloc of %d byte failed\n", size);
2394 return(NULL);
2395 }
2396 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2397 cur = CUR_CHAR(l);
2398 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2399 if (len + 5 >= size) {
2400 size *= 2;
2401 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2402 if (buf == NULL) {
2403 xmlGenericError(xmlGenericErrorContext,
2404 "realloc of %d byte failed\n", size);
2405 ctxt->instate = (xmlParserInputState) state;
2406 return(NULL);
2407 }
2408 }
2409 count++;
2410 if (count > 50) {
2411 GROW;
2412 count = 0;
2413 }
2414 COPY_BUF(l,buf,len,cur);
2415 NEXTL(l);
2416 cur = CUR_CHAR(l);
2417 if (cur == 0) {
2418 GROW;
2419 SHRINK;
2420 cur = CUR_CHAR(l);
2421 }
2422 }
2423 buf[len] = 0;
2424 ctxt->instate = (xmlParserInputState) state;
2425 if (!IS_CHAR(cur)) {
2426 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2427 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2428 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2429 ctxt->wellFormed = 0;
2430 ctxt->disableSAX = 1;
2431 } else {
2432 NEXT;
2433 }
2434 return(buf);
2435}
2436
2437/**
2438 * xmlParsePubidLiteral:
2439 * @ctxt: an XML parser context
2440 *
2441 * parse an XML public literal
2442 *
2443 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2444 *
2445 * Returns the PubidLiteral parsed or NULL.
2446 */
2447
2448xmlChar *
2449xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2450 xmlChar *buf = NULL;
2451 int len = 0;
2452 int size = XML_PARSER_BUFFER_SIZE;
2453 xmlChar cur;
2454 xmlChar stop;
2455 int count = 0;
2456
2457 SHRINK;
2458 if (RAW == '"') {
2459 NEXT;
2460 stop = '"';
2461 } else if (RAW == '\'') {
2462 NEXT;
2463 stop = '\'';
2464 } else {
2465 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2467 ctxt->sax->error(ctxt->userData,
2468 "SystemLiteral \" or ' expected\n");
2469 ctxt->wellFormed = 0;
2470 ctxt->disableSAX = 1;
2471 return(NULL);
2472 }
2473 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2474 if (buf == NULL) {
2475 xmlGenericError(xmlGenericErrorContext,
2476 "malloc of %d byte failed\n", size);
2477 return(NULL);
2478 }
2479 cur = CUR;
2480 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2481 if (len + 1 >= size) {
2482 size *= 2;
2483 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2484 if (buf == NULL) {
2485 xmlGenericError(xmlGenericErrorContext,
2486 "realloc of %d byte failed\n", size);
2487 return(NULL);
2488 }
2489 }
2490 buf[len++] = cur;
2491 count++;
2492 if (count > 50) {
2493 GROW;
2494 count = 0;
2495 }
2496 NEXT;
2497 cur = CUR;
2498 if (cur == 0) {
2499 GROW;
2500 SHRINK;
2501 cur = CUR;
2502 }
2503 }
2504 buf[len] = 0;
2505 if (cur != stop) {
2506 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2508 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2509 ctxt->wellFormed = 0;
2510 ctxt->disableSAX = 1;
2511 } else {
2512 NEXT;
2513 }
2514 return(buf);
2515}
2516
Daniel Veillard48b2f892001-02-25 16:11:03 +00002517void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002518/**
2519 * xmlParseCharData:
2520 * @ctxt: an XML parser context
2521 * @cdata: int indicating whether we are within a CDATA section
2522 *
2523 * parse a CharData section.
2524 * if we are within a CDATA section ']]>' marks an end of section.
2525 *
2526 * The right angle bracket (>) may be represented using the string "&gt;",
2527 * and must, for compatibility, be escaped using "&gt;" or a character
2528 * reference when it appears in the string "]]>" in content, when that
2529 * string is not marking the end of a CDATA section.
2530 *
2531 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2532 */
2533
2534void
2535xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002536 const xmlChar *in;
2537 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002538 int line = ctxt->input->line;
2539 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002540
2541 SHRINK;
2542 GROW;
2543 /*
2544 * Accelerated common case where input don't need to be
2545 * modified before passing it to the handler.
2546 */
2547 if ((ctxt->token == 0) && (!cdata)) {
2548 in = ctxt->input->cur;
2549 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002550get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002551 while (((*in >= 0x20) && (*in != '<') &&
2552 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2553 in++;
2554 if (*in == 0xA) {
2555 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002556 in++;
2557 while (*in == 0xA) {
2558 ctxt->input->line++;
2559 in++;
2560 }
2561 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002562 }
2563 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002564 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002565 if (IS_BLANK(*ctxt->input->cur)) {
2566 const xmlChar *tmp = ctxt->input->cur;
2567 ctxt->input->cur = in;
2568 if (areBlanks(ctxt, tmp, nbchar)) {
2569 if (ctxt->sax->ignorableWhitespace != NULL)
2570 ctxt->sax->ignorableWhitespace(ctxt->userData,
2571 tmp, nbchar);
2572 } else {
2573 if (ctxt->sax->characters != NULL)
2574 ctxt->sax->characters(ctxt->userData,
2575 tmp, nbchar);
2576 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002577 line = ctxt->input->line;
2578 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002579 } else {
2580 if (ctxt->sax->characters != NULL)
2581 ctxt->sax->characters(ctxt->userData,
2582 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002583 line = ctxt->input->line;
2584 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002585 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002586 }
2587 ctxt->input->cur = in;
2588 if (*in == 0xD) {
2589 in++;
2590 if (*in == 0xA) {
2591 ctxt->input->cur = in;
2592 in++;
2593 ctxt->input->line++;
2594 continue; /* while */
2595 }
2596 in--;
2597 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002598 if (*in == '<') {
2599 return;
2600 }
2601 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002602 return;
2603 }
2604 SHRINK;
2605 GROW;
2606 in = ctxt->input->cur;
2607 } while ((*in >= 0x20) && (*in <= 0x7F));
2608 nbchar = 0;
2609 }
Daniel Veillard50582112001-03-26 22:52:16 +00002610 ctxt->input->line = line;
2611 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002612 xmlParseCharDataComplex(ctxt, cdata);
2613}
2614
2615void
2616xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002617 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2618 int nbchar = 0;
2619 int cur, l;
2620 int count = 0;
2621
2622 SHRINK;
2623 GROW;
2624 cur = CUR_CHAR(l);
2625 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2626 ((cur != '&') || (ctxt->token == '&')) &&
2627 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2628 if ((cur == ']') && (NXT(1) == ']') &&
2629 (NXT(2) == '>')) {
2630 if (cdata) break;
2631 else {
2632 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2634 ctxt->sax->error(ctxt->userData,
2635 "Sequence ']]>' not allowed in content\n");
2636 /* Should this be relaxed ??? I see a "must here */
2637 ctxt->wellFormed = 0;
2638 ctxt->disableSAX = 1;
2639 }
2640 }
2641 COPY_BUF(l,buf,nbchar,cur);
2642 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2643 /*
2644 * Ok the segment is to be consumed as chars.
2645 */
2646 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2647 if (areBlanks(ctxt, buf, nbchar)) {
2648 if (ctxt->sax->ignorableWhitespace != NULL)
2649 ctxt->sax->ignorableWhitespace(ctxt->userData,
2650 buf, nbchar);
2651 } else {
2652 if (ctxt->sax->characters != NULL)
2653 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2654 }
2655 }
2656 nbchar = 0;
2657 }
2658 count++;
2659 if (count > 50) {
2660 GROW;
2661 count = 0;
2662 }
2663 NEXTL(l);
2664 cur = CUR_CHAR(l);
2665 }
2666 if (nbchar != 0) {
2667 /*
2668 * Ok the segment is to be consumed as chars.
2669 */
2670 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2671 if (areBlanks(ctxt, buf, nbchar)) {
2672 if (ctxt->sax->ignorableWhitespace != NULL)
2673 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2674 } else {
2675 if (ctxt->sax->characters != NULL)
2676 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2677 }
2678 }
2679 }
2680}
2681
2682/**
2683 * xmlParseExternalID:
2684 * @ctxt: an XML parser context
2685 * @publicID: a xmlChar** receiving PubidLiteral
2686 * @strict: indicate whether we should restrict parsing to only
2687 * production [75], see NOTE below
2688 *
2689 * Parse an External ID or a Public ID
2690 *
2691 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2692 * 'PUBLIC' S PubidLiteral S SystemLiteral
2693 *
2694 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2695 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2696 *
2697 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2698 *
2699 * Returns the function returns SystemLiteral and in the second
2700 * case publicID receives PubidLiteral, is strict is off
2701 * it is possible to return NULL and have publicID set.
2702 */
2703
2704xmlChar *
2705xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2706 xmlChar *URI = NULL;
2707
2708 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002709
2710 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002711 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2712 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2713 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2714 SKIP(6);
2715 if (!IS_BLANK(CUR)) {
2716 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2717 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2718 ctxt->sax->error(ctxt->userData,
2719 "Space required after 'SYSTEM'\n");
2720 ctxt->wellFormed = 0;
2721 ctxt->disableSAX = 1;
2722 }
2723 SKIP_BLANKS;
2724 URI = xmlParseSystemLiteral(ctxt);
2725 if (URI == NULL) {
2726 ctxt->errNo = XML_ERR_URI_REQUIRED;
2727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2728 ctxt->sax->error(ctxt->userData,
2729 "xmlParseExternalID: SYSTEM, no URI\n");
2730 ctxt->wellFormed = 0;
2731 ctxt->disableSAX = 1;
2732 }
2733 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2734 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2735 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2736 SKIP(6);
2737 if (!IS_BLANK(CUR)) {
2738 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2740 ctxt->sax->error(ctxt->userData,
2741 "Space required after 'PUBLIC'\n");
2742 ctxt->wellFormed = 0;
2743 ctxt->disableSAX = 1;
2744 }
2745 SKIP_BLANKS;
2746 *publicID = xmlParsePubidLiteral(ctxt);
2747 if (*publicID == NULL) {
2748 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2750 ctxt->sax->error(ctxt->userData,
2751 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2752 ctxt->wellFormed = 0;
2753 ctxt->disableSAX = 1;
2754 }
2755 if (strict) {
2756 /*
2757 * We don't handle [83] so "S SystemLiteral" is required.
2758 */
2759 if (!IS_BLANK(CUR)) {
2760 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2762 ctxt->sax->error(ctxt->userData,
2763 "Space required after the Public Identifier\n");
2764 ctxt->wellFormed = 0;
2765 ctxt->disableSAX = 1;
2766 }
2767 } else {
2768 /*
2769 * We handle [83] so we return immediately, if
2770 * "S SystemLiteral" is not detected. From a purely parsing
2771 * point of view that's a nice mess.
2772 */
2773 const xmlChar *ptr;
2774 GROW;
2775
2776 ptr = CUR_PTR;
2777 if (!IS_BLANK(*ptr)) return(NULL);
2778
2779 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2780 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2781 }
2782 SKIP_BLANKS;
2783 URI = xmlParseSystemLiteral(ctxt);
2784 if (URI == NULL) {
2785 ctxt->errNo = XML_ERR_URI_REQUIRED;
2786 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2787 ctxt->sax->error(ctxt->userData,
2788 "xmlParseExternalID: PUBLIC, no URI\n");
2789 ctxt->wellFormed = 0;
2790 ctxt->disableSAX = 1;
2791 }
2792 }
2793 return(URI);
2794}
2795
2796/**
2797 * xmlParseComment:
2798 * @ctxt: an XML parser context
2799 *
2800 * Skip an XML (SGML) comment <!-- .... -->
2801 * The spec says that "For compatibility, the string "--" (double-hyphen)
2802 * must not occur within comments. "
2803 *
2804 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2805 */
2806void
2807xmlParseComment(xmlParserCtxtPtr ctxt) {
2808 xmlChar *buf = NULL;
2809 int len;
2810 int size = XML_PARSER_BUFFER_SIZE;
2811 int q, ql;
2812 int r, rl;
2813 int cur, l;
2814 xmlParserInputState state;
2815 xmlParserInputPtr input = ctxt->input;
2816 int count = 0;
2817
2818 /*
2819 * Check that there is a comment right here.
2820 */
2821 if ((RAW != '<') || (NXT(1) != '!') ||
2822 (NXT(2) != '-') || (NXT(3) != '-')) return;
2823
2824 state = ctxt->instate;
2825 ctxt->instate = XML_PARSER_COMMENT;
2826 SHRINK;
2827 SKIP(4);
2828 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2829 if (buf == NULL) {
2830 xmlGenericError(xmlGenericErrorContext,
2831 "malloc of %d byte failed\n", size);
2832 ctxt->instate = state;
2833 return;
2834 }
2835 q = CUR_CHAR(ql);
2836 NEXTL(ql);
2837 r = CUR_CHAR(rl);
2838 NEXTL(rl);
2839 cur = CUR_CHAR(l);
2840 len = 0;
2841 while (IS_CHAR(cur) && /* checked */
2842 ((cur != '>') ||
2843 (r != '-') || (q != '-'))) {
2844 if ((r == '-') && (q == '-') && (len > 1)) {
2845 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2847 ctxt->sax->error(ctxt->userData,
2848 "Comment must not contain '--' (double-hyphen)`\n");
2849 ctxt->wellFormed = 0;
2850 ctxt->disableSAX = 1;
2851 }
2852 if (len + 5 >= size) {
2853 size *= 2;
2854 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2855 if (buf == NULL) {
2856 xmlGenericError(xmlGenericErrorContext,
2857 "realloc of %d byte failed\n", size);
2858 ctxt->instate = state;
2859 return;
2860 }
2861 }
2862 COPY_BUF(ql,buf,len,q);
2863 q = r;
2864 ql = rl;
2865 r = cur;
2866 rl = l;
2867
2868 count++;
2869 if (count > 50) {
2870 GROW;
2871 count = 0;
2872 }
2873 NEXTL(l);
2874 cur = CUR_CHAR(l);
2875 if (cur == 0) {
2876 SHRINK;
2877 GROW;
2878 cur = CUR_CHAR(l);
2879 }
2880 }
2881 buf[len] = 0;
2882 if (!IS_CHAR(cur)) {
2883 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2884 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2885 ctxt->sax->error(ctxt->userData,
2886 "Comment not terminated \n<!--%.50s\n", buf);
2887 ctxt->wellFormed = 0;
2888 ctxt->disableSAX = 1;
2889 xmlFree(buf);
2890 } else {
2891 if (input != ctxt->input) {
2892 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2893 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2894 ctxt->sax->error(ctxt->userData,
2895"Comment doesn't start and stop in the same entity\n");
2896 ctxt->wellFormed = 0;
2897 ctxt->disableSAX = 1;
2898 }
2899 NEXT;
2900 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2901 (!ctxt->disableSAX))
2902 ctxt->sax->comment(ctxt->userData, buf);
2903 xmlFree(buf);
2904 }
2905 ctxt->instate = state;
2906}
2907
2908/**
2909 * xmlParsePITarget:
2910 * @ctxt: an XML parser context
2911 *
2912 * parse the name of a PI
2913 *
2914 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2915 *
2916 * Returns the PITarget name or NULL
2917 */
2918
2919xmlChar *
2920xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2921 xmlChar *name;
2922
2923 name = xmlParseName(ctxt);
2924 if ((name != NULL) &&
2925 ((name[0] == 'x') || (name[0] == 'X')) &&
2926 ((name[1] == 'm') || (name[1] == 'M')) &&
2927 ((name[2] == 'l') || (name[2] == 'L'))) {
2928 int i;
2929 if ((name[0] == 'x') && (name[1] == 'm') &&
2930 (name[2] == 'l') && (name[3] == 0)) {
2931 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2933 ctxt->sax->error(ctxt->userData,
2934 "XML declaration allowed only at the start of the document\n");
2935 ctxt->wellFormed = 0;
2936 ctxt->disableSAX = 1;
2937 return(name);
2938 } else if (name[3] == 0) {
2939 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2941 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2942 ctxt->wellFormed = 0;
2943 ctxt->disableSAX = 1;
2944 return(name);
2945 }
2946 for (i = 0;;i++) {
2947 if (xmlW3CPIs[i] == NULL) break;
2948 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2949 return(name);
2950 }
2951 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2952 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2953 ctxt->sax->warning(ctxt->userData,
2954 "xmlParsePItarget: invalid name prefix 'xml'\n");
2955 }
2956 }
2957 return(name);
2958}
2959
2960/**
2961 * xmlParsePI:
2962 * @ctxt: an XML parser context
2963 *
2964 * parse an XML Processing Instruction.
2965 *
2966 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2967 *
2968 * The processing is transfered to SAX once parsed.
2969 */
2970
2971void
2972xmlParsePI(xmlParserCtxtPtr ctxt) {
2973 xmlChar *buf = NULL;
2974 int len = 0;
2975 int size = XML_PARSER_BUFFER_SIZE;
2976 int cur, l;
2977 xmlChar *target;
2978 xmlParserInputState state;
2979 int count = 0;
2980
2981 if ((RAW == '<') && (NXT(1) == '?')) {
2982 xmlParserInputPtr input = ctxt->input;
2983 state = ctxt->instate;
2984 ctxt->instate = XML_PARSER_PI;
2985 /*
2986 * this is a Processing Instruction.
2987 */
2988 SKIP(2);
2989 SHRINK;
2990
2991 /*
2992 * Parse the target name and check for special support like
2993 * namespace.
2994 */
2995 target = xmlParsePITarget(ctxt);
2996 if (target != NULL) {
2997 if ((RAW == '?') && (NXT(1) == '>')) {
2998 if (input != ctxt->input) {
2999 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3001 ctxt->sax->error(ctxt->userData,
3002 "PI declaration doesn't start and stop in the same entity\n");
3003 ctxt->wellFormed = 0;
3004 ctxt->disableSAX = 1;
3005 }
3006 SKIP(2);
3007
3008 /*
3009 * SAX: PI detected.
3010 */
3011 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3012 (ctxt->sax->processingInstruction != NULL))
3013 ctxt->sax->processingInstruction(ctxt->userData,
3014 target, NULL);
3015 ctxt->instate = state;
3016 xmlFree(target);
3017 return;
3018 }
3019 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3020 if (buf == NULL) {
3021 xmlGenericError(xmlGenericErrorContext,
3022 "malloc of %d byte failed\n", size);
3023 ctxt->instate = state;
3024 return;
3025 }
3026 cur = CUR;
3027 if (!IS_BLANK(cur)) {
3028 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3030 ctxt->sax->error(ctxt->userData,
3031 "xmlParsePI: PI %s space expected\n", target);
3032 ctxt->wellFormed = 0;
3033 ctxt->disableSAX = 1;
3034 }
3035 SKIP_BLANKS;
3036 cur = CUR_CHAR(l);
3037 while (IS_CHAR(cur) && /* checked */
3038 ((cur != '?') || (NXT(1) != '>'))) {
3039 if (len + 5 >= size) {
3040 size *= 2;
3041 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3042 if (buf == NULL) {
3043 xmlGenericError(xmlGenericErrorContext,
3044 "realloc of %d byte failed\n", size);
3045 ctxt->instate = state;
3046 return;
3047 }
3048 }
3049 count++;
3050 if (count > 50) {
3051 GROW;
3052 count = 0;
3053 }
3054 COPY_BUF(l,buf,len,cur);
3055 NEXTL(l);
3056 cur = CUR_CHAR(l);
3057 if (cur == 0) {
3058 SHRINK;
3059 GROW;
3060 cur = CUR_CHAR(l);
3061 }
3062 }
3063 buf[len] = 0;
3064 if (cur != '?') {
3065 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3067 ctxt->sax->error(ctxt->userData,
3068 "xmlParsePI: PI %s never end ...\n", target);
3069 ctxt->wellFormed = 0;
3070 ctxt->disableSAX = 1;
3071 } else {
3072 if (input != ctxt->input) {
3073 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3074 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3075 ctxt->sax->error(ctxt->userData,
3076 "PI declaration doesn't start and stop in the same entity\n");
3077 ctxt->wellFormed = 0;
3078 ctxt->disableSAX = 1;
3079 }
3080 SKIP(2);
3081
3082 /*
3083 * SAX: PI detected.
3084 */
3085 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3086 (ctxt->sax->processingInstruction != NULL))
3087 ctxt->sax->processingInstruction(ctxt->userData,
3088 target, buf);
3089 }
3090 xmlFree(buf);
3091 xmlFree(target);
3092 } else {
3093 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3095 ctxt->sax->error(ctxt->userData,
3096 "xmlParsePI : no target name\n");
3097 ctxt->wellFormed = 0;
3098 ctxt->disableSAX = 1;
3099 }
3100 ctxt->instate = state;
3101 }
3102}
3103
3104/**
3105 * xmlParseNotationDecl:
3106 * @ctxt: an XML parser context
3107 *
3108 * parse a notation declaration
3109 *
3110 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3111 *
3112 * Hence there is actually 3 choices:
3113 * 'PUBLIC' S PubidLiteral
3114 * 'PUBLIC' S PubidLiteral S SystemLiteral
3115 * and 'SYSTEM' S SystemLiteral
3116 *
3117 * See the NOTE on xmlParseExternalID().
3118 */
3119
3120void
3121xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3122 xmlChar *name;
3123 xmlChar *Pubid;
3124 xmlChar *Systemid;
3125
3126 if ((RAW == '<') && (NXT(1) == '!') &&
3127 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3128 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3129 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3130 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3131 xmlParserInputPtr input = ctxt->input;
3132 SHRINK;
3133 SKIP(10);
3134 if (!IS_BLANK(CUR)) {
3135 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3136 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3137 ctxt->sax->error(ctxt->userData,
3138 "Space required after '<!NOTATION'\n");
3139 ctxt->wellFormed = 0;
3140 ctxt->disableSAX = 1;
3141 return;
3142 }
3143 SKIP_BLANKS;
3144
Daniel Veillard76d66f42001-05-16 21:05:17 +00003145 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003146 if (name == NULL) {
3147 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3149 ctxt->sax->error(ctxt->userData,
3150 "NOTATION: Name expected here\n");
3151 ctxt->wellFormed = 0;
3152 ctxt->disableSAX = 1;
3153 return;
3154 }
3155 if (!IS_BLANK(CUR)) {
3156 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3158 ctxt->sax->error(ctxt->userData,
3159 "Space required after the NOTATION name'\n");
3160 ctxt->wellFormed = 0;
3161 ctxt->disableSAX = 1;
3162 return;
3163 }
3164 SKIP_BLANKS;
3165
3166 /*
3167 * Parse the IDs.
3168 */
3169 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3170 SKIP_BLANKS;
3171
3172 if (RAW == '>') {
3173 if (input != ctxt->input) {
3174 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3176 ctxt->sax->error(ctxt->userData,
3177"Notation declaration doesn't start and stop in the same entity\n");
3178 ctxt->wellFormed = 0;
3179 ctxt->disableSAX = 1;
3180 }
3181 NEXT;
3182 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3183 (ctxt->sax->notationDecl != NULL))
3184 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3185 } else {
3186 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3188 ctxt->sax->error(ctxt->userData,
3189 "'>' required to close NOTATION declaration\n");
3190 ctxt->wellFormed = 0;
3191 ctxt->disableSAX = 1;
3192 }
3193 xmlFree(name);
3194 if (Systemid != NULL) xmlFree(Systemid);
3195 if (Pubid != NULL) xmlFree(Pubid);
3196 }
3197}
3198
3199/**
3200 * xmlParseEntityDecl:
3201 * @ctxt: an XML parser context
3202 *
3203 * parse <!ENTITY declarations
3204 *
3205 * [70] EntityDecl ::= GEDecl | PEDecl
3206 *
3207 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3208 *
3209 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3210 *
3211 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3212 *
3213 * [74] PEDef ::= EntityValue | ExternalID
3214 *
3215 * [76] NDataDecl ::= S 'NDATA' S Name
3216 *
3217 * [ VC: Notation Declared ]
3218 * The Name must match the declared name of a notation.
3219 */
3220
3221void
3222xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3223 xmlChar *name = NULL;
3224 xmlChar *value = NULL;
3225 xmlChar *URI = NULL, *literal = NULL;
3226 xmlChar *ndata = NULL;
3227 int isParameter = 0;
3228 xmlChar *orig = NULL;
3229
3230 GROW;
3231 if ((RAW == '<') && (NXT(1) == '!') &&
3232 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3233 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3234 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3235 xmlParserInputPtr input = ctxt->input;
3236 ctxt->instate = XML_PARSER_ENTITY_DECL;
3237 SHRINK;
3238 SKIP(8);
3239 if (!IS_BLANK(CUR)) {
3240 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3242 ctxt->sax->error(ctxt->userData,
3243 "Space required after '<!ENTITY'\n");
3244 ctxt->wellFormed = 0;
3245 ctxt->disableSAX = 1;
3246 }
3247 SKIP_BLANKS;
3248
3249 if (RAW == '%') {
3250 NEXT;
3251 if (!IS_BLANK(CUR)) {
3252 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3254 ctxt->sax->error(ctxt->userData,
3255 "Space required after '%'\n");
3256 ctxt->wellFormed = 0;
3257 ctxt->disableSAX = 1;
3258 }
3259 SKIP_BLANKS;
3260 isParameter = 1;
3261 }
3262
Daniel Veillard76d66f42001-05-16 21:05:17 +00003263 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003264 if (name == NULL) {
3265 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3267 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3268 ctxt->wellFormed = 0;
3269 ctxt->disableSAX = 1;
3270 return;
3271 }
3272 if (!IS_BLANK(CUR)) {
3273 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3275 ctxt->sax->error(ctxt->userData,
3276 "Space required after the entity name\n");
3277 ctxt->wellFormed = 0;
3278 ctxt->disableSAX = 1;
3279 }
3280 SKIP_BLANKS;
3281
3282 /*
3283 * handle the various case of definitions...
3284 */
3285 if (isParameter) {
3286 if ((RAW == '"') || (RAW == '\'')) {
3287 value = xmlParseEntityValue(ctxt, &orig);
3288 if (value) {
3289 if ((ctxt->sax != NULL) &&
3290 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3291 ctxt->sax->entityDecl(ctxt->userData, name,
3292 XML_INTERNAL_PARAMETER_ENTITY,
3293 NULL, NULL, value);
3294 }
3295 } else {
3296 URI = xmlParseExternalID(ctxt, &literal, 1);
3297 if ((URI == NULL) && (literal == NULL)) {
3298 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3300 ctxt->sax->error(ctxt->userData,
3301 "Entity value required\n");
3302 ctxt->wellFormed = 0;
3303 ctxt->disableSAX = 1;
3304 }
3305 if (URI) {
3306 xmlURIPtr uri;
3307
3308 uri = xmlParseURI((const char *) URI);
3309 if (uri == NULL) {
3310 ctxt->errNo = XML_ERR_INVALID_URI;
3311 if ((ctxt->sax != NULL) &&
3312 (!ctxt->disableSAX) &&
3313 (ctxt->sax->error != NULL))
3314 ctxt->sax->error(ctxt->userData,
3315 "Invalid URI: %s\n", URI);
3316 ctxt->wellFormed = 0;
3317 } else {
3318 if (uri->fragment != NULL) {
3319 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3320 if ((ctxt->sax != NULL) &&
3321 (!ctxt->disableSAX) &&
3322 (ctxt->sax->error != NULL))
3323 ctxt->sax->error(ctxt->userData,
3324 "Fragment not allowed: %s\n", URI);
3325 ctxt->wellFormed = 0;
3326 } else {
3327 if ((ctxt->sax != NULL) &&
3328 (!ctxt->disableSAX) &&
3329 (ctxt->sax->entityDecl != NULL))
3330 ctxt->sax->entityDecl(ctxt->userData, name,
3331 XML_EXTERNAL_PARAMETER_ENTITY,
3332 literal, URI, NULL);
3333 }
3334 xmlFreeURI(uri);
3335 }
3336 }
3337 }
3338 } else {
3339 if ((RAW == '"') || (RAW == '\'')) {
3340 value = xmlParseEntityValue(ctxt, &orig);
3341 if ((ctxt->sax != NULL) &&
3342 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3343 ctxt->sax->entityDecl(ctxt->userData, name,
3344 XML_INTERNAL_GENERAL_ENTITY,
3345 NULL, NULL, value);
3346 } else {
3347 URI = xmlParseExternalID(ctxt, &literal, 1);
3348 if ((URI == NULL) && (literal == NULL)) {
3349 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3351 ctxt->sax->error(ctxt->userData,
3352 "Entity value required\n");
3353 ctxt->wellFormed = 0;
3354 ctxt->disableSAX = 1;
3355 }
3356 if (URI) {
3357 xmlURIPtr uri;
3358
3359 uri = xmlParseURI((const char *)URI);
3360 if (uri == NULL) {
3361 ctxt->errNo = XML_ERR_INVALID_URI;
3362 if ((ctxt->sax != NULL) &&
3363 (!ctxt->disableSAX) &&
3364 (ctxt->sax->error != NULL))
3365 ctxt->sax->error(ctxt->userData,
3366 "Invalid URI: %s\n", URI);
3367 ctxt->wellFormed = 0;
3368 } else {
3369 if (uri->fragment != NULL) {
3370 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3371 if ((ctxt->sax != NULL) &&
3372 (!ctxt->disableSAX) &&
3373 (ctxt->sax->error != NULL))
3374 ctxt->sax->error(ctxt->userData,
3375 "Fragment not allowed: %s\n", URI);
3376 ctxt->wellFormed = 0;
3377 }
3378 xmlFreeURI(uri);
3379 }
3380 }
3381 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3382 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3384 ctxt->sax->error(ctxt->userData,
3385 "Space required before 'NDATA'\n");
3386 ctxt->wellFormed = 0;
3387 ctxt->disableSAX = 1;
3388 }
3389 SKIP_BLANKS;
3390 if ((RAW == 'N') && (NXT(1) == 'D') &&
3391 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3392 (NXT(4) == 'A')) {
3393 SKIP(5);
3394 if (!IS_BLANK(CUR)) {
3395 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3397 ctxt->sax->error(ctxt->userData,
3398 "Space required after 'NDATA'\n");
3399 ctxt->wellFormed = 0;
3400 ctxt->disableSAX = 1;
3401 }
3402 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003403 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003404 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3405 (ctxt->sax->unparsedEntityDecl != NULL))
3406 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3407 literal, URI, ndata);
3408 } else {
3409 if ((ctxt->sax != NULL) &&
3410 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3411 ctxt->sax->entityDecl(ctxt->userData, name,
3412 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3413 literal, URI, NULL);
3414 }
3415 }
3416 }
3417 SKIP_BLANKS;
3418 if (RAW != '>') {
3419 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3420 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3421 ctxt->sax->error(ctxt->userData,
3422 "xmlParseEntityDecl: entity %s not terminated\n", name);
3423 ctxt->wellFormed = 0;
3424 ctxt->disableSAX = 1;
3425 } else {
3426 if (input != ctxt->input) {
3427 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3428 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3429 ctxt->sax->error(ctxt->userData,
3430"Entity declaration doesn't start and stop in the same entity\n");
3431 ctxt->wellFormed = 0;
3432 ctxt->disableSAX = 1;
3433 }
3434 NEXT;
3435 }
3436 if (orig != NULL) {
3437 /*
3438 * Ugly mechanism to save the raw entity value.
3439 */
3440 xmlEntityPtr cur = NULL;
3441
3442 if (isParameter) {
3443 if ((ctxt->sax != NULL) &&
3444 (ctxt->sax->getParameterEntity != NULL))
3445 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3446 } else {
3447 if ((ctxt->sax != NULL) &&
3448 (ctxt->sax->getEntity != NULL))
3449 cur = ctxt->sax->getEntity(ctxt->userData, name);
3450 }
3451 if (cur != NULL) {
3452 if (cur->orig != NULL)
3453 xmlFree(orig);
3454 else
3455 cur->orig = orig;
3456 } else
3457 xmlFree(orig);
3458 }
3459 if (name != NULL) xmlFree(name);
3460 if (value != NULL) xmlFree(value);
3461 if (URI != NULL) xmlFree(URI);
3462 if (literal != NULL) xmlFree(literal);
3463 if (ndata != NULL) xmlFree(ndata);
3464 }
3465}
3466
3467/**
3468 * xmlParseDefaultDecl:
3469 * @ctxt: an XML parser context
3470 * @value: Receive a possible fixed default value for the attribute
3471 *
3472 * Parse an attribute default declaration
3473 *
3474 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3475 *
3476 * [ VC: Required Attribute ]
3477 * if the default declaration is the keyword #REQUIRED, then the
3478 * attribute must be specified for all elements of the type in the
3479 * attribute-list declaration.
3480 *
3481 * [ VC: Attribute Default Legal ]
3482 * The declared default value must meet the lexical constraints of
3483 * the declared attribute type c.f. xmlValidateAttributeDecl()
3484 *
3485 * [ VC: Fixed Attribute Default ]
3486 * if an attribute has a default value declared with the #FIXED
3487 * keyword, instances of that attribute must match the default value.
3488 *
3489 * [ WFC: No < in Attribute Values ]
3490 * handled in xmlParseAttValue()
3491 *
3492 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3493 * or XML_ATTRIBUTE_FIXED.
3494 */
3495
3496int
3497xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3498 int val;
3499 xmlChar *ret;
3500
3501 *value = NULL;
3502 if ((RAW == '#') && (NXT(1) == 'R') &&
3503 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3504 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3505 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3506 (NXT(8) == 'D')) {
3507 SKIP(9);
3508 return(XML_ATTRIBUTE_REQUIRED);
3509 }
3510 if ((RAW == '#') && (NXT(1) == 'I') &&
3511 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3512 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3513 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3514 SKIP(8);
3515 return(XML_ATTRIBUTE_IMPLIED);
3516 }
3517 val = XML_ATTRIBUTE_NONE;
3518 if ((RAW == '#') && (NXT(1) == 'F') &&
3519 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3520 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3521 SKIP(6);
3522 val = XML_ATTRIBUTE_FIXED;
3523 if (!IS_BLANK(CUR)) {
3524 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3526 ctxt->sax->error(ctxt->userData,
3527 "Space required after '#FIXED'\n");
3528 ctxt->wellFormed = 0;
3529 ctxt->disableSAX = 1;
3530 }
3531 SKIP_BLANKS;
3532 }
3533 ret = xmlParseAttValue(ctxt);
3534 ctxt->instate = XML_PARSER_DTD;
3535 if (ret == NULL) {
3536 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3537 ctxt->sax->error(ctxt->userData,
3538 "Attribute default value declaration error\n");
3539 ctxt->wellFormed = 0;
3540 ctxt->disableSAX = 1;
3541 } else
3542 *value = ret;
3543 return(val);
3544}
3545
3546/**
3547 * xmlParseNotationType:
3548 * @ctxt: an XML parser context
3549 *
3550 * parse an Notation attribute type.
3551 *
3552 * Note: the leading 'NOTATION' S part has already being parsed...
3553 *
3554 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3555 *
3556 * [ VC: Notation Attributes ]
3557 * Values of this type must match one of the notation names included
3558 * in the declaration; all notation names in the declaration must be declared.
3559 *
3560 * Returns: the notation attribute tree built while parsing
3561 */
3562
3563xmlEnumerationPtr
3564xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3565 xmlChar *name;
3566 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3567
3568 if (RAW != '(') {
3569 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3571 ctxt->sax->error(ctxt->userData,
3572 "'(' required to start 'NOTATION'\n");
3573 ctxt->wellFormed = 0;
3574 ctxt->disableSAX = 1;
3575 return(NULL);
3576 }
3577 SHRINK;
3578 do {
3579 NEXT;
3580 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003581 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003582 if (name == NULL) {
3583 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3585 ctxt->sax->error(ctxt->userData,
3586 "Name expected in NOTATION declaration\n");
3587 ctxt->wellFormed = 0;
3588 ctxt->disableSAX = 1;
3589 return(ret);
3590 }
3591 cur = xmlCreateEnumeration(name);
3592 xmlFree(name);
3593 if (cur == NULL) return(ret);
3594 if (last == NULL) ret = last = cur;
3595 else {
3596 last->next = cur;
3597 last = cur;
3598 }
3599 SKIP_BLANKS;
3600 } while (RAW == '|');
3601 if (RAW != ')') {
3602 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3604 ctxt->sax->error(ctxt->userData,
3605 "')' required to finish NOTATION declaration\n");
3606 ctxt->wellFormed = 0;
3607 ctxt->disableSAX = 1;
3608 if ((last != NULL) && (last != ret))
3609 xmlFreeEnumeration(last);
3610 return(ret);
3611 }
3612 NEXT;
3613 return(ret);
3614}
3615
3616/**
3617 * xmlParseEnumerationType:
3618 * @ctxt: an XML parser context
3619 *
3620 * parse an Enumeration attribute type.
3621 *
3622 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3623 *
3624 * [ VC: Enumeration ]
3625 * Values of this type must match one of the Nmtoken tokens in
3626 * the declaration
3627 *
3628 * Returns: the enumeration attribute tree built while parsing
3629 */
3630
3631xmlEnumerationPtr
3632xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3633 xmlChar *name;
3634 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3635
3636 if (RAW != '(') {
3637 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3639 ctxt->sax->error(ctxt->userData,
3640 "'(' required to start ATTLIST enumeration\n");
3641 ctxt->wellFormed = 0;
3642 ctxt->disableSAX = 1;
3643 return(NULL);
3644 }
3645 SHRINK;
3646 do {
3647 NEXT;
3648 SKIP_BLANKS;
3649 name = xmlParseNmtoken(ctxt);
3650 if (name == NULL) {
3651 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3653 ctxt->sax->error(ctxt->userData,
3654 "NmToken expected in ATTLIST enumeration\n");
3655 ctxt->wellFormed = 0;
3656 ctxt->disableSAX = 1;
3657 return(ret);
3658 }
3659 cur = xmlCreateEnumeration(name);
3660 xmlFree(name);
3661 if (cur == NULL) return(ret);
3662 if (last == NULL) ret = last = cur;
3663 else {
3664 last->next = cur;
3665 last = cur;
3666 }
3667 SKIP_BLANKS;
3668 } while (RAW == '|');
3669 if (RAW != ')') {
3670 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3672 ctxt->sax->error(ctxt->userData,
3673 "')' required to finish ATTLIST enumeration\n");
3674 ctxt->wellFormed = 0;
3675 ctxt->disableSAX = 1;
3676 return(ret);
3677 }
3678 NEXT;
3679 return(ret);
3680}
3681
3682/**
3683 * xmlParseEnumeratedType:
3684 * @ctxt: an XML parser context
3685 * @tree: the enumeration tree built while parsing
3686 *
3687 * parse an Enumerated attribute type.
3688 *
3689 * [57] EnumeratedType ::= NotationType | Enumeration
3690 *
3691 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3692 *
3693 *
3694 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3695 */
3696
3697int
3698xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3699 if ((RAW == 'N') && (NXT(1) == 'O') &&
3700 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3701 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3702 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3703 SKIP(8);
3704 if (!IS_BLANK(CUR)) {
3705 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3707 ctxt->sax->error(ctxt->userData,
3708 "Space required after 'NOTATION'\n");
3709 ctxt->wellFormed = 0;
3710 ctxt->disableSAX = 1;
3711 return(0);
3712 }
3713 SKIP_BLANKS;
3714 *tree = xmlParseNotationType(ctxt);
3715 if (*tree == NULL) return(0);
3716 return(XML_ATTRIBUTE_NOTATION);
3717 }
3718 *tree = xmlParseEnumerationType(ctxt);
3719 if (*tree == NULL) return(0);
3720 return(XML_ATTRIBUTE_ENUMERATION);
3721}
3722
3723/**
3724 * xmlParseAttributeType:
3725 * @ctxt: an XML parser context
3726 * @tree: the enumeration tree built while parsing
3727 *
3728 * parse the Attribute list def for an element
3729 *
3730 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3731 *
3732 * [55] StringType ::= 'CDATA'
3733 *
3734 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3735 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3736 *
3737 * Validity constraints for attribute values syntax are checked in
3738 * xmlValidateAttributeValue()
3739 *
3740 * [ VC: ID ]
3741 * Values of type ID must match the Name production. A name must not
3742 * appear more than once in an XML document as a value of this type;
3743 * i.e., ID values must uniquely identify the elements which bear them.
3744 *
3745 * [ VC: One ID per Element Type ]
3746 * No element type may have more than one ID attribute specified.
3747 *
3748 * [ VC: ID Attribute Default ]
3749 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3750 *
3751 * [ VC: IDREF ]
3752 * Values of type IDREF must match the Name production, and values
3753 * of type IDREFS must match Names; each IDREF Name must match the value
3754 * of an ID attribute on some element in the XML document; i.e. IDREF
3755 * values must match the value of some ID attribute.
3756 *
3757 * [ VC: Entity Name ]
3758 * Values of type ENTITY must match the Name production, values
3759 * of type ENTITIES must match Names; each Entity Name must match the
3760 * name of an unparsed entity declared in the DTD.
3761 *
3762 * [ VC: Name Token ]
3763 * Values of type NMTOKEN must match the Nmtoken production; values
3764 * of type NMTOKENS must match Nmtokens.
3765 *
3766 * Returns the attribute type
3767 */
3768int
3769xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3770 SHRINK;
3771 if ((RAW == 'C') && (NXT(1) == 'D') &&
3772 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3773 (NXT(4) == 'A')) {
3774 SKIP(5);
3775 return(XML_ATTRIBUTE_CDATA);
3776 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3777 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3778 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3779 SKIP(6);
3780 return(XML_ATTRIBUTE_IDREFS);
3781 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3782 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3783 (NXT(4) == 'F')) {
3784 SKIP(5);
3785 return(XML_ATTRIBUTE_IDREF);
3786 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3787 SKIP(2);
3788 return(XML_ATTRIBUTE_ID);
3789 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3790 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3791 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3792 SKIP(6);
3793 return(XML_ATTRIBUTE_ENTITY);
3794 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3795 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3796 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3797 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3798 SKIP(8);
3799 return(XML_ATTRIBUTE_ENTITIES);
3800 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3801 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3802 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3803 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3804 SKIP(8);
3805 return(XML_ATTRIBUTE_NMTOKENS);
3806 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3807 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3808 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3809 (NXT(6) == 'N')) {
3810 SKIP(7);
3811 return(XML_ATTRIBUTE_NMTOKEN);
3812 }
3813 return(xmlParseEnumeratedType(ctxt, tree));
3814}
3815
3816/**
3817 * xmlParseAttributeListDecl:
3818 * @ctxt: an XML parser context
3819 *
3820 * : parse the Attribute list def for an element
3821 *
3822 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3823 *
3824 * [53] AttDef ::= S Name S AttType S DefaultDecl
3825 *
3826 */
3827void
3828xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3829 xmlChar *elemName;
3830 xmlChar *attrName;
3831 xmlEnumerationPtr tree;
3832
3833 if ((RAW == '<') && (NXT(1) == '!') &&
3834 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3835 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3836 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3837 (NXT(8) == 'T')) {
3838 xmlParserInputPtr input = ctxt->input;
3839
3840 SKIP(9);
3841 if (!IS_BLANK(CUR)) {
3842 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3844 ctxt->sax->error(ctxt->userData,
3845 "Space required after '<!ATTLIST'\n");
3846 ctxt->wellFormed = 0;
3847 ctxt->disableSAX = 1;
3848 }
3849 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003850 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003851 if (elemName == NULL) {
3852 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3854 ctxt->sax->error(ctxt->userData,
3855 "ATTLIST: no name for Element\n");
3856 ctxt->wellFormed = 0;
3857 ctxt->disableSAX = 1;
3858 return;
3859 }
3860 SKIP_BLANKS;
3861 GROW;
3862 while (RAW != '>') {
3863 const xmlChar *check = CUR_PTR;
3864 int type;
3865 int def;
3866 xmlChar *defaultValue = NULL;
3867
3868 GROW;
3869 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003870 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003871 if (attrName == NULL) {
3872 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3874 ctxt->sax->error(ctxt->userData,
3875 "ATTLIST: no name for Attribute\n");
3876 ctxt->wellFormed = 0;
3877 ctxt->disableSAX = 1;
3878 break;
3879 }
3880 GROW;
3881 if (!IS_BLANK(CUR)) {
3882 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3884 ctxt->sax->error(ctxt->userData,
3885 "Space required after the attribute name\n");
3886 ctxt->wellFormed = 0;
3887 ctxt->disableSAX = 1;
3888 if (attrName != NULL)
3889 xmlFree(attrName);
3890 if (defaultValue != NULL)
3891 xmlFree(defaultValue);
3892 break;
3893 }
3894 SKIP_BLANKS;
3895
3896 type = xmlParseAttributeType(ctxt, &tree);
3897 if (type <= 0) {
3898 if (attrName != NULL)
3899 xmlFree(attrName);
3900 if (defaultValue != NULL)
3901 xmlFree(defaultValue);
3902 break;
3903 }
3904
3905 GROW;
3906 if (!IS_BLANK(CUR)) {
3907 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3909 ctxt->sax->error(ctxt->userData,
3910 "Space required after the attribute type\n");
3911 ctxt->wellFormed = 0;
3912 ctxt->disableSAX = 1;
3913 if (attrName != NULL)
3914 xmlFree(attrName);
3915 if (defaultValue != NULL)
3916 xmlFree(defaultValue);
3917 if (tree != NULL)
3918 xmlFreeEnumeration(tree);
3919 break;
3920 }
3921 SKIP_BLANKS;
3922
3923 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3924 if (def <= 0) {
3925 if (attrName != NULL)
3926 xmlFree(attrName);
3927 if (defaultValue != NULL)
3928 xmlFree(defaultValue);
3929 if (tree != NULL)
3930 xmlFreeEnumeration(tree);
3931 break;
3932 }
3933
3934 GROW;
3935 if (RAW != '>') {
3936 if (!IS_BLANK(CUR)) {
3937 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3939 ctxt->sax->error(ctxt->userData,
3940 "Space required after the attribute default value\n");
3941 ctxt->wellFormed = 0;
3942 ctxt->disableSAX = 1;
3943 if (attrName != NULL)
3944 xmlFree(attrName);
3945 if (defaultValue != NULL)
3946 xmlFree(defaultValue);
3947 if (tree != NULL)
3948 xmlFreeEnumeration(tree);
3949 break;
3950 }
3951 SKIP_BLANKS;
3952 }
3953 if (check == CUR_PTR) {
3954 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3955 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3956 ctxt->sax->error(ctxt->userData,
3957 "xmlParseAttributeListDecl: detected internal error\n");
3958 if (attrName != NULL)
3959 xmlFree(attrName);
3960 if (defaultValue != NULL)
3961 xmlFree(defaultValue);
3962 if (tree != NULL)
3963 xmlFreeEnumeration(tree);
3964 break;
3965 }
3966 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3967 (ctxt->sax->attributeDecl != NULL))
3968 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3969 type, def, defaultValue, tree);
3970 if (attrName != NULL)
3971 xmlFree(attrName);
3972 if (defaultValue != NULL)
3973 xmlFree(defaultValue);
3974 GROW;
3975 }
3976 if (RAW == '>') {
3977 if (input != ctxt->input) {
3978 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3979 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3980 ctxt->sax->error(ctxt->userData,
3981"Attribute list declaration doesn't start and stop in the same entity\n");
3982 ctxt->wellFormed = 0;
3983 ctxt->disableSAX = 1;
3984 }
3985 NEXT;
3986 }
3987
3988 xmlFree(elemName);
3989 }
3990}
3991
3992/**
3993 * xmlParseElementMixedContentDecl:
3994 * @ctxt: an XML parser context
3995 *
3996 * parse the declaration for a Mixed Element content
3997 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3998 *
3999 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4000 * '(' S? '#PCDATA' S? ')'
4001 *
4002 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4003 *
4004 * [ VC: No Duplicate Types ]
4005 * The same name must not appear more than once in a single
4006 * mixed-content declaration.
4007 *
4008 * returns: the list of the xmlElementContentPtr describing the element choices
4009 */
4010xmlElementContentPtr
4011xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
4012 xmlElementContentPtr ret = NULL, cur = NULL, n;
4013 xmlChar *elem = NULL;
4014
4015 GROW;
4016 if ((RAW == '#') && (NXT(1) == 'P') &&
4017 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4018 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4019 (NXT(6) == 'A')) {
4020 SKIP(7);
4021 SKIP_BLANKS;
4022 SHRINK;
4023 if (RAW == ')') {
4024 ctxt->entity = ctxt->input;
4025 NEXT;
4026 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4027 if (RAW == '*') {
4028 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4029 NEXT;
4030 }
4031 return(ret);
4032 }
4033 if ((RAW == '(') || (RAW == '|')) {
4034 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4035 if (ret == NULL) return(NULL);
4036 }
4037 while (RAW == '|') {
4038 NEXT;
4039 if (elem == NULL) {
4040 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4041 if (ret == NULL) return(NULL);
4042 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004043 if (cur != NULL)
4044 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004045 cur = ret;
4046 } else {
4047 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4048 if (n == NULL) return(NULL);
4049 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004050 if (n->c1 != NULL)
4051 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004052 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004053 if (n != NULL)
4054 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004055 cur = n;
4056 xmlFree(elem);
4057 }
4058 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004059 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004060 if (elem == NULL) {
4061 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4062 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4063 ctxt->sax->error(ctxt->userData,
4064 "xmlParseElementMixedContentDecl : Name expected\n");
4065 ctxt->wellFormed = 0;
4066 ctxt->disableSAX = 1;
4067 xmlFreeElementContent(cur);
4068 return(NULL);
4069 }
4070 SKIP_BLANKS;
4071 GROW;
4072 }
4073 if ((RAW == ')') && (NXT(1) == '*')) {
4074 if (elem != NULL) {
4075 cur->c2 = xmlNewElementContent(elem,
4076 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004077 if (cur->c2 != NULL)
4078 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004079 xmlFree(elem);
4080 }
4081 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4082 ctxt->entity = ctxt->input;
4083 SKIP(2);
4084 } else {
4085 if (elem != NULL) xmlFree(elem);
4086 xmlFreeElementContent(ret);
4087 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4088 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4089 ctxt->sax->error(ctxt->userData,
4090 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4091 ctxt->wellFormed = 0;
4092 ctxt->disableSAX = 1;
4093 return(NULL);
4094 }
4095
4096 } else {
4097 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4099 ctxt->sax->error(ctxt->userData,
4100 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4101 ctxt->wellFormed = 0;
4102 ctxt->disableSAX = 1;
4103 }
4104 return(ret);
4105}
4106
4107/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004108 * xmlParseElementChildrenContentD:
4109 * @ctxt: an XML parser context
4110 *
4111 * VMS version of xmlParseElementChildrenContentDecl()
4112 *
4113 * Returns the tree of xmlElementContentPtr describing the element
4114 * hierarchy.
4115 */
4116/**
Owen Taylor3473f882001-02-23 17:55:21 +00004117 * xmlParseElementChildrenContentDecl:
4118 * @ctxt: an XML parser context
4119 *
4120 * parse the declaration for a Mixed Element content
4121 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4122 *
4123 *
4124 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4125 *
4126 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4127 *
4128 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4129 *
4130 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4131 *
4132 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4133 * TODO Parameter-entity replacement text must be properly nested
4134 * with parenthetized groups. That is to say, if either of the
4135 * opening or closing parentheses in a choice, seq, or Mixed
4136 * construct is contained in the replacement text for a parameter
4137 * entity, both must be contained in the same replacement text. For
4138 * interoperability, if a parameter-entity reference appears in a
4139 * choice, seq, or Mixed construct, its replacement text should not
4140 * be empty, and neither the first nor last non-blank character of
4141 * the replacement text should be a connector (| or ,).
4142 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004143 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004144 * hierarchy.
4145 */
4146xmlElementContentPtr
4147#ifdef VMS
4148xmlParseElementChildrenContentD
4149#else
4150xmlParseElementChildrenContentDecl
4151#endif
4152(xmlParserCtxtPtr ctxt) {
4153 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4154 xmlChar *elem;
4155 xmlChar type = 0;
4156
4157 SKIP_BLANKS;
4158 GROW;
4159 if (RAW == '(') {
4160 /* Recurse on first child */
4161 NEXT;
4162 SKIP_BLANKS;
4163 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4164 SKIP_BLANKS;
4165 GROW;
4166 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004167 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004168 if (elem == NULL) {
4169 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4171 ctxt->sax->error(ctxt->userData,
4172 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4173 ctxt->wellFormed = 0;
4174 ctxt->disableSAX = 1;
4175 return(NULL);
4176 }
4177 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4178 GROW;
4179 if (RAW == '?') {
4180 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4181 NEXT;
4182 } else if (RAW == '*') {
4183 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4184 NEXT;
4185 } else if (RAW == '+') {
4186 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4187 NEXT;
4188 } else {
4189 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4190 }
4191 xmlFree(elem);
4192 GROW;
4193 }
4194 SKIP_BLANKS;
4195 SHRINK;
4196 while (RAW != ')') {
4197 /*
4198 * Each loop we parse one separator and one element.
4199 */
4200 if (RAW == ',') {
4201 if (type == 0) type = CUR;
4202
4203 /*
4204 * Detect "Name | Name , Name" error
4205 */
4206 else if (type != CUR) {
4207 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4208 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4209 ctxt->sax->error(ctxt->userData,
4210 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4211 type);
4212 ctxt->wellFormed = 0;
4213 ctxt->disableSAX = 1;
4214 if ((op != NULL) && (op != ret))
4215 xmlFreeElementContent(op);
4216 if ((last != NULL) && (last != ret) &&
4217 (last != ret->c1) && (last != ret->c2))
4218 xmlFreeElementContent(last);
4219 if (ret != NULL)
4220 xmlFreeElementContent(ret);
4221 return(NULL);
4222 }
4223 NEXT;
4224
4225 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4226 if (op == NULL) {
4227 xmlFreeElementContent(ret);
4228 return(NULL);
4229 }
4230 if (last == NULL) {
4231 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004232 if (ret != NULL)
4233 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004234 ret = cur = op;
4235 } else {
4236 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004237 if (op != NULL)
4238 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004239 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004240 if (last != NULL)
4241 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004242 cur =op;
4243 last = NULL;
4244 }
4245 } else if (RAW == '|') {
4246 if (type == 0) type = CUR;
4247
4248 /*
4249 * Detect "Name , Name | Name" error
4250 */
4251 else if (type != CUR) {
4252 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4254 ctxt->sax->error(ctxt->userData,
4255 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4256 type);
4257 ctxt->wellFormed = 0;
4258 ctxt->disableSAX = 1;
4259 if ((op != NULL) && (op != ret) && (op != last))
4260 xmlFreeElementContent(op);
4261 if ((last != NULL) && (last != ret) &&
4262 (last != ret->c1) && (last != ret->c2))
4263 xmlFreeElementContent(last);
4264 if (ret != NULL)
4265 xmlFreeElementContent(ret);
4266 return(NULL);
4267 }
4268 NEXT;
4269
4270 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4271 if (op == NULL) {
4272 if ((op != NULL) && (op != ret))
4273 xmlFreeElementContent(op);
4274 if ((last != NULL) && (last != ret) &&
4275 (last != ret->c1) && (last != ret->c2))
4276 xmlFreeElementContent(last);
4277 if (ret != NULL)
4278 xmlFreeElementContent(ret);
4279 return(NULL);
4280 }
4281 if (last == NULL) {
4282 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004283 if (ret != NULL)
4284 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004285 ret = cur = op;
4286 } else {
4287 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004288 if (op != NULL)
4289 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004290 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004291 if (last != NULL)
4292 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004293 cur =op;
4294 last = NULL;
4295 }
4296 } else {
4297 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4298 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4299 ctxt->sax->error(ctxt->userData,
4300 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4301 ctxt->wellFormed = 0;
4302 ctxt->disableSAX = 1;
4303 if ((op != NULL) && (op != ret))
4304 xmlFreeElementContent(op);
4305 if ((last != NULL) && (last != ret) &&
4306 (last != ret->c1) && (last != ret->c2))
4307 xmlFreeElementContent(last);
4308 if (ret != NULL)
4309 xmlFreeElementContent(ret);
4310 return(NULL);
4311 }
4312 GROW;
4313 SKIP_BLANKS;
4314 GROW;
4315 if (RAW == '(') {
4316 /* Recurse on second child */
4317 NEXT;
4318 SKIP_BLANKS;
4319 last = xmlParseElementChildrenContentDecl(ctxt);
4320 SKIP_BLANKS;
4321 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004322 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004323 if (elem == NULL) {
4324 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4326 ctxt->sax->error(ctxt->userData,
4327 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4328 ctxt->wellFormed = 0;
4329 ctxt->disableSAX = 1;
4330 if ((op != NULL) && (op != ret))
4331 xmlFreeElementContent(op);
4332 if ((last != NULL) && (last != ret) &&
4333 (last != ret->c1) && (last != ret->c2))
4334 xmlFreeElementContent(last);
4335 if (ret != NULL)
4336 xmlFreeElementContent(ret);
4337 return(NULL);
4338 }
4339 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4340 xmlFree(elem);
4341 if (RAW == '?') {
4342 last->ocur = XML_ELEMENT_CONTENT_OPT;
4343 NEXT;
4344 } else if (RAW == '*') {
4345 last->ocur = XML_ELEMENT_CONTENT_MULT;
4346 NEXT;
4347 } else if (RAW == '+') {
4348 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4349 NEXT;
4350 } else {
4351 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4352 }
4353 }
4354 SKIP_BLANKS;
4355 GROW;
4356 }
4357 if ((cur != NULL) && (last != NULL)) {
4358 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004359 if (last != NULL)
4360 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004361 }
4362 ctxt->entity = ctxt->input;
4363 NEXT;
4364 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004365 if (ret != NULL)
4366 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004367 NEXT;
4368 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004369 if (ret != NULL)
4370 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004371 NEXT;
4372 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004373 if (ret != NULL)
4374 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004375 NEXT;
4376 }
4377 return(ret);
4378}
4379
4380/**
4381 * xmlParseElementContentDecl:
4382 * @ctxt: an XML parser context
4383 * @name: the name of the element being defined.
4384 * @result: the Element Content pointer will be stored here if any
4385 *
4386 * parse the declaration for an Element content either Mixed or Children,
4387 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4388 *
4389 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4390 *
4391 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4392 */
4393
4394int
4395xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4396 xmlElementContentPtr *result) {
4397
4398 xmlElementContentPtr tree = NULL;
4399 xmlParserInputPtr input = ctxt->input;
4400 int res;
4401
4402 *result = NULL;
4403
4404 if (RAW != '(') {
4405 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4406 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4407 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004408 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004409 ctxt->wellFormed = 0;
4410 ctxt->disableSAX = 1;
4411 return(-1);
4412 }
4413 NEXT;
4414 GROW;
4415 SKIP_BLANKS;
4416 if ((RAW == '#') && (NXT(1) == 'P') &&
4417 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4418 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4419 (NXT(6) == 'A')) {
4420 tree = xmlParseElementMixedContentDecl(ctxt);
4421 res = XML_ELEMENT_TYPE_MIXED;
4422 } else {
4423 tree = xmlParseElementChildrenContentDecl(ctxt);
4424 res = XML_ELEMENT_TYPE_ELEMENT;
4425 }
4426 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4427 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4428 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4429 ctxt->sax->error(ctxt->userData,
4430"Element content declaration doesn't start and stop in the same entity\n");
4431 ctxt->wellFormed = 0;
4432 ctxt->disableSAX = 1;
4433 }
4434 SKIP_BLANKS;
4435 *result = tree;
4436 return(res);
4437}
4438
4439/**
4440 * xmlParseElementDecl:
4441 * @ctxt: an XML parser context
4442 *
4443 * parse an Element declaration.
4444 *
4445 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4446 *
4447 * [ VC: Unique Element Type Declaration ]
4448 * No element type may be declared more than once
4449 *
4450 * Returns the type of the element, or -1 in case of error
4451 */
4452int
4453xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4454 xmlChar *name;
4455 int ret = -1;
4456 xmlElementContentPtr content = NULL;
4457
4458 GROW;
4459 if ((RAW == '<') && (NXT(1) == '!') &&
4460 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4461 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4462 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4463 (NXT(8) == 'T')) {
4464 xmlParserInputPtr input = ctxt->input;
4465
4466 SKIP(9);
4467 if (!IS_BLANK(CUR)) {
4468 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4470 ctxt->sax->error(ctxt->userData,
4471 "Space required after 'ELEMENT'\n");
4472 ctxt->wellFormed = 0;
4473 ctxt->disableSAX = 1;
4474 }
4475 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004476 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004477 if (name == NULL) {
4478 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4480 ctxt->sax->error(ctxt->userData,
4481 "xmlParseElementDecl: no name for Element\n");
4482 ctxt->wellFormed = 0;
4483 ctxt->disableSAX = 1;
4484 return(-1);
4485 }
4486 while ((RAW == 0) && (ctxt->inputNr > 1))
4487 xmlPopInput(ctxt);
4488 if (!IS_BLANK(CUR)) {
4489 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4491 ctxt->sax->error(ctxt->userData,
4492 "Space required after the element name\n");
4493 ctxt->wellFormed = 0;
4494 ctxt->disableSAX = 1;
4495 }
4496 SKIP_BLANKS;
4497 if ((RAW == 'E') && (NXT(1) == 'M') &&
4498 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4499 (NXT(4) == 'Y')) {
4500 SKIP(5);
4501 /*
4502 * Element must always be empty.
4503 */
4504 ret = XML_ELEMENT_TYPE_EMPTY;
4505 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4506 (NXT(2) == 'Y')) {
4507 SKIP(3);
4508 /*
4509 * Element is a generic container.
4510 */
4511 ret = XML_ELEMENT_TYPE_ANY;
4512 } else if (RAW == '(') {
4513 ret = xmlParseElementContentDecl(ctxt, name, &content);
4514 } else {
4515 /*
4516 * [ WFC: PEs in Internal Subset ] error handling.
4517 */
4518 if ((RAW == '%') && (ctxt->external == 0) &&
4519 (ctxt->inputNr == 1)) {
4520 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4522 ctxt->sax->error(ctxt->userData,
4523 "PEReference: forbidden within markup decl in internal subset\n");
4524 } else {
4525 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4526 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4527 ctxt->sax->error(ctxt->userData,
4528 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4529 }
4530 ctxt->wellFormed = 0;
4531 ctxt->disableSAX = 1;
4532 if (name != NULL) xmlFree(name);
4533 return(-1);
4534 }
4535
4536 SKIP_BLANKS;
4537 /*
4538 * Pop-up of finished entities.
4539 */
4540 while ((RAW == 0) && (ctxt->inputNr > 1))
4541 xmlPopInput(ctxt);
4542 SKIP_BLANKS;
4543
4544 if (RAW != '>') {
4545 ctxt->errNo = XML_ERR_GT_REQUIRED;
4546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4547 ctxt->sax->error(ctxt->userData,
4548 "xmlParseElementDecl: expected '>' at the end\n");
4549 ctxt->wellFormed = 0;
4550 ctxt->disableSAX = 1;
4551 } else {
4552 if (input != ctxt->input) {
4553 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4555 ctxt->sax->error(ctxt->userData,
4556"Element declaration doesn't start and stop in the same entity\n");
4557 ctxt->wellFormed = 0;
4558 ctxt->disableSAX = 1;
4559 }
4560
4561 NEXT;
4562 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4563 (ctxt->sax->elementDecl != NULL))
4564 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4565 content);
4566 }
4567 if (content != NULL) {
4568 xmlFreeElementContent(content);
4569 }
4570 if (name != NULL) {
4571 xmlFree(name);
4572 }
4573 }
4574 return(ret);
4575}
4576
4577/**
4578 * xmlParseMarkupDecl:
4579 * @ctxt: an XML parser context
4580 *
4581 * parse Markup declarations
4582 *
4583 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4584 * NotationDecl | PI | Comment
4585 *
4586 * [ VC: Proper Declaration/PE Nesting ]
4587 * Parameter-entity replacement text must be properly nested with
4588 * markup declarations. That is to say, if either the first character
4589 * or the last character of a markup declaration (markupdecl above) is
4590 * contained in the replacement text for a parameter-entity reference,
4591 * both must be contained in the same replacement text.
4592 *
4593 * [ WFC: PEs in Internal Subset ]
4594 * In the internal DTD subset, parameter-entity references can occur
4595 * only where markup declarations can occur, not within markup declarations.
4596 * (This does not apply to references that occur in external parameter
4597 * entities or to the external subset.)
4598 */
4599void
4600xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4601 GROW;
4602 xmlParseElementDecl(ctxt);
4603 xmlParseAttributeListDecl(ctxt);
4604 xmlParseEntityDecl(ctxt);
4605 xmlParseNotationDecl(ctxt);
4606 xmlParsePI(ctxt);
4607 xmlParseComment(ctxt);
4608 /*
4609 * This is only for internal subset. On external entities,
4610 * the replacement is done before parsing stage
4611 */
4612 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4613 xmlParsePEReference(ctxt);
4614 ctxt->instate = XML_PARSER_DTD;
4615}
4616
4617/**
4618 * xmlParseTextDecl:
4619 * @ctxt: an XML parser context
4620 *
4621 * parse an XML declaration header for external entities
4622 *
4623 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4624 *
4625 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4626 */
4627
4628void
4629xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4630 xmlChar *version;
4631
4632 /*
4633 * We know that '<?xml' is here.
4634 */
4635 if ((RAW == '<') && (NXT(1) == '?') &&
4636 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4637 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4638 SKIP(5);
4639 } else {
4640 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4642 ctxt->sax->error(ctxt->userData,
4643 "Text declaration '<?xml' required\n");
4644 ctxt->wellFormed = 0;
4645 ctxt->disableSAX = 1;
4646
4647 return;
4648 }
4649
4650 if (!IS_BLANK(CUR)) {
4651 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4653 ctxt->sax->error(ctxt->userData,
4654 "Space needed after '<?xml'\n");
4655 ctxt->wellFormed = 0;
4656 ctxt->disableSAX = 1;
4657 }
4658 SKIP_BLANKS;
4659
4660 /*
4661 * We may have the VersionInfo here.
4662 */
4663 version = xmlParseVersionInfo(ctxt);
4664 if (version == NULL)
4665 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4666 ctxt->input->version = version;
4667
4668 /*
4669 * We must have the encoding declaration
4670 */
4671 if (!IS_BLANK(CUR)) {
4672 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4674 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4675 ctxt->wellFormed = 0;
4676 ctxt->disableSAX = 1;
4677 }
4678 xmlParseEncodingDecl(ctxt);
4679 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4680 /*
4681 * The XML REC instructs us to stop parsing right here
4682 */
4683 return;
4684 }
4685
4686 SKIP_BLANKS;
4687 if ((RAW == '?') && (NXT(1) == '>')) {
4688 SKIP(2);
4689 } else if (RAW == '>') {
4690 /* Deprecated old WD ... */
4691 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4693 ctxt->sax->error(ctxt->userData,
4694 "XML declaration must end-up with '?>'\n");
4695 ctxt->wellFormed = 0;
4696 ctxt->disableSAX = 1;
4697 NEXT;
4698 } else {
4699 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4701 ctxt->sax->error(ctxt->userData,
4702 "parsing XML declaration: '?>' expected\n");
4703 ctxt->wellFormed = 0;
4704 ctxt->disableSAX = 1;
4705 MOVETO_ENDTAG(CUR_PTR);
4706 NEXT;
4707 }
4708}
4709
4710/*
4711 * xmlParseConditionalSections
4712 * @ctxt: an XML parser context
4713 *
4714 * [61] conditionalSect ::= includeSect | ignoreSect
4715 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4716 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4717 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4718 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4719 */
4720
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004721static void
Owen Taylor3473f882001-02-23 17:55:21 +00004722xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4723 SKIP(3);
4724 SKIP_BLANKS;
4725 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4726 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4727 (NXT(6) == 'E')) {
4728 SKIP(7);
4729 SKIP_BLANKS;
4730 if (RAW != '[') {
4731 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4733 ctxt->sax->error(ctxt->userData,
4734 "XML conditional section '[' expected\n");
4735 ctxt->wellFormed = 0;
4736 ctxt->disableSAX = 1;
4737 } else {
4738 NEXT;
4739 }
4740 if (xmlParserDebugEntities) {
4741 if ((ctxt->input != NULL) && (ctxt->input->filename))
4742 xmlGenericError(xmlGenericErrorContext,
4743 "%s(%d): ", ctxt->input->filename,
4744 ctxt->input->line);
4745 xmlGenericError(xmlGenericErrorContext,
4746 "Entering INCLUDE Conditional Section\n");
4747 }
4748
4749 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4750 (NXT(2) != '>'))) {
4751 const xmlChar *check = CUR_PTR;
4752 int cons = ctxt->input->consumed;
4753 int tok = ctxt->token;
4754
4755 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4756 xmlParseConditionalSections(ctxt);
4757 } else if (IS_BLANK(CUR)) {
4758 NEXT;
4759 } else if (RAW == '%') {
4760 xmlParsePEReference(ctxt);
4761 } else
4762 xmlParseMarkupDecl(ctxt);
4763
4764 /*
4765 * Pop-up of finished entities.
4766 */
4767 while ((RAW == 0) && (ctxt->inputNr > 1))
4768 xmlPopInput(ctxt);
4769
4770 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4771 (tok == ctxt->token)) {
4772 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4774 ctxt->sax->error(ctxt->userData,
4775 "Content error in the external subset\n");
4776 ctxt->wellFormed = 0;
4777 ctxt->disableSAX = 1;
4778 break;
4779 }
4780 }
4781 if (xmlParserDebugEntities) {
4782 if ((ctxt->input != NULL) && (ctxt->input->filename))
4783 xmlGenericError(xmlGenericErrorContext,
4784 "%s(%d): ", ctxt->input->filename,
4785 ctxt->input->line);
4786 xmlGenericError(xmlGenericErrorContext,
4787 "Leaving INCLUDE Conditional Section\n");
4788 }
4789
4790 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4791 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4792 int state;
4793 int instate;
4794 int depth = 0;
4795
4796 SKIP(6);
4797 SKIP_BLANKS;
4798 if (RAW != '[') {
4799 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4801 ctxt->sax->error(ctxt->userData,
4802 "XML conditional section '[' expected\n");
4803 ctxt->wellFormed = 0;
4804 ctxt->disableSAX = 1;
4805 } else {
4806 NEXT;
4807 }
4808 if (xmlParserDebugEntities) {
4809 if ((ctxt->input != NULL) && (ctxt->input->filename))
4810 xmlGenericError(xmlGenericErrorContext,
4811 "%s(%d): ", ctxt->input->filename,
4812 ctxt->input->line);
4813 xmlGenericError(xmlGenericErrorContext,
4814 "Entering IGNORE Conditional Section\n");
4815 }
4816
4817 /*
4818 * Parse up to the end of the conditionnal section
4819 * But disable SAX event generating DTD building in the meantime
4820 */
4821 state = ctxt->disableSAX;
4822 instate = ctxt->instate;
4823 ctxt->disableSAX = 1;
4824 ctxt->instate = XML_PARSER_IGNORE;
4825
4826 while (depth >= 0) {
4827 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4828 depth++;
4829 SKIP(3);
4830 continue;
4831 }
4832 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4833 if (--depth >= 0) SKIP(3);
4834 continue;
4835 }
4836 NEXT;
4837 continue;
4838 }
4839
4840 ctxt->disableSAX = state;
4841 ctxt->instate = instate;
4842
4843 if (xmlParserDebugEntities) {
4844 if ((ctxt->input != NULL) && (ctxt->input->filename))
4845 xmlGenericError(xmlGenericErrorContext,
4846 "%s(%d): ", ctxt->input->filename,
4847 ctxt->input->line);
4848 xmlGenericError(xmlGenericErrorContext,
4849 "Leaving IGNORE Conditional Section\n");
4850 }
4851
4852 } else {
4853 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4855 ctxt->sax->error(ctxt->userData,
4856 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4857 ctxt->wellFormed = 0;
4858 ctxt->disableSAX = 1;
4859 }
4860
4861 if (RAW == 0)
4862 SHRINK;
4863
4864 if (RAW == 0) {
4865 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4867 ctxt->sax->error(ctxt->userData,
4868 "XML conditional section not closed\n");
4869 ctxt->wellFormed = 0;
4870 ctxt->disableSAX = 1;
4871 } else {
4872 SKIP(3);
4873 }
4874}
4875
4876/**
4877 * xmlParseExternalSubset:
4878 * @ctxt: an XML parser context
4879 * @ExternalID: the external identifier
4880 * @SystemID: the system identifier (or URL)
4881 *
4882 * parse Markup declarations from an external subset
4883 *
4884 * [30] extSubset ::= textDecl? extSubsetDecl
4885 *
4886 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4887 */
4888void
4889xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4890 const xmlChar *SystemID) {
4891 GROW;
4892 if ((RAW == '<') && (NXT(1) == '?') &&
4893 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4894 (NXT(4) == 'l')) {
4895 xmlParseTextDecl(ctxt);
4896 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4897 /*
4898 * The XML REC instructs us to stop parsing right here
4899 */
4900 ctxt->instate = XML_PARSER_EOF;
4901 return;
4902 }
4903 }
4904 if (ctxt->myDoc == NULL) {
4905 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4906 }
4907 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4908 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4909
4910 ctxt->instate = XML_PARSER_DTD;
4911 ctxt->external = 1;
4912 while (((RAW == '<') && (NXT(1) == '?')) ||
4913 ((RAW == '<') && (NXT(1) == '!')) ||
4914 IS_BLANK(CUR)) {
4915 const xmlChar *check = CUR_PTR;
4916 int cons = ctxt->input->consumed;
4917 int tok = ctxt->token;
4918
4919 GROW;
4920 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4921 xmlParseConditionalSections(ctxt);
4922 } else if (IS_BLANK(CUR)) {
4923 NEXT;
4924 } else if (RAW == '%') {
4925 xmlParsePEReference(ctxt);
4926 } else
4927 xmlParseMarkupDecl(ctxt);
4928
4929 /*
4930 * Pop-up of finished entities.
4931 */
4932 while ((RAW == 0) && (ctxt->inputNr > 1))
4933 xmlPopInput(ctxt);
4934
4935 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4936 (tok == ctxt->token)) {
4937 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4939 ctxt->sax->error(ctxt->userData,
4940 "Content error in the external subset\n");
4941 ctxt->wellFormed = 0;
4942 ctxt->disableSAX = 1;
4943 break;
4944 }
4945 }
4946
4947 if (RAW != 0) {
4948 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4950 ctxt->sax->error(ctxt->userData,
4951 "Extra content at the end of the document\n");
4952 ctxt->wellFormed = 0;
4953 ctxt->disableSAX = 1;
4954 }
4955
4956}
4957
4958/**
4959 * xmlParseReference:
4960 * @ctxt: an XML parser context
4961 *
4962 * parse and handle entity references in content, depending on the SAX
4963 * interface, this may end-up in a call to character() if this is a
4964 * CharRef, a predefined entity, if there is no reference() callback.
4965 * or if the parser was asked to switch to that mode.
4966 *
4967 * [67] Reference ::= EntityRef | CharRef
4968 */
4969void
4970xmlParseReference(xmlParserCtxtPtr ctxt) {
4971 xmlEntityPtr ent;
4972 xmlChar *val;
4973 if (RAW != '&') return;
4974
4975 if (NXT(1) == '#') {
4976 int i = 0;
4977 xmlChar out[10];
4978 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004979 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004980
4981 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4982 /*
4983 * So we are using non-UTF-8 buffers
4984 * Check that the char fit on 8bits, if not
4985 * generate a CharRef.
4986 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004987 if (value <= 0xFF) {
4988 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004989 out[1] = 0;
4990 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4991 (!ctxt->disableSAX))
4992 ctxt->sax->characters(ctxt->userData, out, 1);
4993 } else {
4994 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004995 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004996 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004997 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004998 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4999 (!ctxt->disableSAX))
5000 ctxt->sax->reference(ctxt->userData, out);
5001 }
5002 } else {
5003 /*
5004 * Just encode the value in UTF-8
5005 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005006 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005007 out[i] = 0;
5008 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5009 (!ctxt->disableSAX))
5010 ctxt->sax->characters(ctxt->userData, out, i);
5011 }
5012 } else {
5013 ent = xmlParseEntityRef(ctxt);
5014 if (ent == NULL) return;
5015 if ((ent->name != NULL) &&
5016 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5017 xmlNodePtr list = NULL;
5018 int ret;
5019
5020
5021 /*
5022 * The first reference to the entity trigger a parsing phase
5023 * where the ent->children is filled with the result from
5024 * the parsing.
5025 */
5026 if (ent->children == NULL) {
5027 xmlChar *value;
5028 value = ent->content;
5029
5030 /*
5031 * Check that this entity is well formed
5032 */
5033 if ((value != NULL) &&
5034 (value[1] == 0) && (value[0] == '<') &&
5035 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5036 /*
5037 * DONE: get definite answer on this !!!
5038 * Lots of entity decls are used to declare a single
5039 * char
5040 * <!ENTITY lt "<">
5041 * Which seems to be valid since
5042 * 2.4: The ampersand character (&) and the left angle
5043 * bracket (<) may appear in their literal form only
5044 * when used ... They are also legal within the literal
5045 * entity value of an internal entity declaration;i
5046 * see "4.3.2 Well-Formed Parsed Entities".
5047 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5048 * Looking at the OASIS test suite and James Clark
5049 * tests, this is broken. However the XML REC uses
5050 * it. Is the XML REC not well-formed ????
5051 * This is a hack to avoid this problem
5052 *
5053 * ANSWER: since lt gt amp .. are already defined,
5054 * this is a redefinition and hence the fact that the
5055 * contentis not well balanced is not a Wf error, this
5056 * is lousy but acceptable.
5057 */
5058 list = xmlNewDocText(ctxt->myDoc, value);
5059 if (list != NULL) {
5060 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5061 (ent->children == NULL)) {
5062 ent->children = list;
5063 ent->last = list;
5064 list->parent = (xmlNodePtr) ent;
5065 } else {
5066 xmlFreeNodeList(list);
5067 }
5068 } else if (list != NULL) {
5069 xmlFreeNodeList(list);
5070 }
5071 } else {
5072 /*
5073 * 4.3.2: An internal general parsed entity is well-formed
5074 * if its replacement text matches the production labeled
5075 * content.
5076 */
5077 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5078 ctxt->depth++;
5079 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5080 ctxt->sax, NULL, ctxt->depth,
5081 value, &list);
5082 ctxt->depth--;
5083 } else if (ent->etype ==
5084 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5085 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005086 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005087 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005088 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005089 ctxt->depth--;
5090 } else {
5091 ret = -1;
5092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5093 ctxt->sax->error(ctxt->userData,
5094 "Internal: invalid entity type\n");
5095 }
5096 if (ret == XML_ERR_ENTITY_LOOP) {
5097 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5099 ctxt->sax->error(ctxt->userData,
5100 "Detected entity reference loop\n");
5101 ctxt->wellFormed = 0;
5102 ctxt->disableSAX = 1;
5103 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005104 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5105 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005106 (ent->children == NULL)) {
5107 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005108 if (ctxt->replaceEntities) {
5109 /*
5110 * Prune it directly in the generated document
5111 * except for single text nodes.
5112 */
5113 if ((list->type == XML_TEXT_NODE) &&
5114 (list->next == NULL)) {
5115 list->parent = (xmlNodePtr) ent;
5116 list = NULL;
5117 } else {
5118 while (list != NULL) {
5119 list->parent = (xmlNodePtr) ctxt->node;
5120 if (list->next == NULL)
5121 ent->last = list;
5122 list = list->next;
5123 }
5124 list = ent->children;
5125 }
5126 } else {
5127 while (list != NULL) {
5128 list->parent = (xmlNodePtr) ent;
5129 if (list->next == NULL)
5130 ent->last = list;
5131 list = list->next;
5132 }
Owen Taylor3473f882001-02-23 17:55:21 +00005133 }
5134 } else {
5135 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005136 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005137 }
5138 } else if (ret > 0) {
5139 ctxt->errNo = ret;
5140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5141 ctxt->sax->error(ctxt->userData,
5142 "Entity value required\n");
5143 ctxt->wellFormed = 0;
5144 ctxt->disableSAX = 1;
5145 } else if (list != NULL) {
5146 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005147 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005148 }
5149 }
5150 }
5151 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5152 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5153 /*
5154 * Create a node.
5155 */
5156 ctxt->sax->reference(ctxt->userData, ent->name);
5157 return;
5158 } else if (ctxt->replaceEntities) {
5159 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5160 /*
5161 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005162 * a simple tree copy for all references except the first
5163 * In the first occurence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005164 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005165 if (list == NULL) {
5166 xmlNodePtr new, cur;
5167 cur = ent->children;
5168 while (cur != NULL) {
5169 new = xmlCopyNode(cur, 1);
5170 xmlAddChild(ctxt->node, new);
5171 if (cur == ent->last)
5172 break;
5173 cur = cur->next;
5174 }
5175 } else {
5176 /*
5177 * the name change is to avoid coalescing of the
5178 * node with a prossible previous text one which
5179 * would make ent->children a dandling pointer
5180 */
5181 if (ent->children->type == XML_TEXT_NODE)
5182 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5183 if ((ent->last != ent->children) &&
5184 (ent->last->type == XML_TEXT_NODE))
5185 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5186 xmlAddChildList(ctxt->node, ent->children);
5187 }
5188
Owen Taylor3473f882001-02-23 17:55:21 +00005189 /*
5190 * This is to avoid a nasty side effect, see
5191 * characters() in SAX.c
5192 */
5193 ctxt->nodemem = 0;
5194 ctxt->nodelen = 0;
5195 return;
5196 } else {
5197 /*
5198 * Probably running in SAX mode
5199 */
5200 xmlParserInputPtr input;
5201
5202 input = xmlNewEntityInputStream(ctxt, ent);
5203 xmlPushInput(ctxt, input);
5204 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5205 (RAW == '<') && (NXT(1) == '?') &&
5206 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5207 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5208 xmlParseTextDecl(ctxt);
5209 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5210 /*
5211 * The XML REC instructs us to stop parsing right here
5212 */
5213 ctxt->instate = XML_PARSER_EOF;
5214 return;
5215 }
5216 if (input->standalone == 1) {
5217 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5219 ctxt->sax->error(ctxt->userData,
5220 "external parsed entities cannot be standalone\n");
5221 ctxt->wellFormed = 0;
5222 ctxt->disableSAX = 1;
5223 }
5224 }
5225 return;
5226 }
5227 }
5228 } else {
5229 val = ent->content;
5230 if (val == NULL) return;
5231 /*
5232 * inline the entity.
5233 */
5234 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5235 (!ctxt->disableSAX))
5236 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5237 }
5238 }
5239}
5240
5241/**
5242 * xmlParseEntityRef:
5243 * @ctxt: an XML parser context
5244 *
5245 * parse ENTITY references declarations
5246 *
5247 * [68] EntityRef ::= '&' Name ';'
5248 *
5249 * [ WFC: Entity Declared ]
5250 * In a document without any DTD, a document with only an internal DTD
5251 * subset which contains no parameter entity references, or a document
5252 * with "standalone='yes'", the Name given in the entity reference
5253 * must match that in an entity declaration, except that well-formed
5254 * documents need not declare any of the following entities: amp, lt,
5255 * gt, apos, quot. The declaration of a parameter entity must precede
5256 * any reference to it. Similarly, the declaration of a general entity
5257 * must precede any reference to it which appears in a default value in an
5258 * attribute-list declaration. Note that if entities are declared in the
5259 * external subset or in external parameter entities, a non-validating
5260 * processor is not obligated to read and process their declarations;
5261 * for such documents, the rule that an entity must be declared is a
5262 * well-formedness constraint only if standalone='yes'.
5263 *
5264 * [ WFC: Parsed Entity ]
5265 * An entity reference must not contain the name of an unparsed entity
5266 *
5267 * Returns the xmlEntityPtr if found, or NULL otherwise.
5268 */
5269xmlEntityPtr
5270xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5271 xmlChar *name;
5272 xmlEntityPtr ent = NULL;
5273
5274 GROW;
5275
5276 if (RAW == '&') {
5277 NEXT;
5278 name = xmlParseName(ctxt);
5279 if (name == NULL) {
5280 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5281 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5282 ctxt->sax->error(ctxt->userData,
5283 "xmlParseEntityRef: no name\n");
5284 ctxt->wellFormed = 0;
5285 ctxt->disableSAX = 1;
5286 } else {
5287 if (RAW == ';') {
5288 NEXT;
5289 /*
5290 * Ask first SAX for entity resolution, otherwise try the
5291 * predefined set.
5292 */
5293 if (ctxt->sax != NULL) {
5294 if (ctxt->sax->getEntity != NULL)
5295 ent = ctxt->sax->getEntity(ctxt->userData, name);
5296 if (ent == NULL)
5297 ent = xmlGetPredefinedEntity(name);
5298 }
5299 /*
5300 * [ WFC: Entity Declared ]
5301 * In a document without any DTD, a document with only an
5302 * internal DTD subset which contains no parameter entity
5303 * references, or a document with "standalone='yes'", the
5304 * Name given in the entity reference must match that in an
5305 * entity declaration, except that well-formed documents
5306 * need not declare any of the following entities: amp, lt,
5307 * gt, apos, quot.
5308 * The declaration of a parameter entity must precede any
5309 * reference to it.
5310 * Similarly, the declaration of a general entity must
5311 * precede any reference to it which appears in a default
5312 * value in an attribute-list declaration. Note that if
5313 * entities are declared in the external subset or in
5314 * external parameter entities, a non-validating processor
5315 * is not obligated to read and process their declarations;
5316 * for such documents, the rule that an entity must be
5317 * declared is a well-formedness constraint only if
5318 * standalone='yes'.
5319 */
5320 if (ent == NULL) {
5321 if ((ctxt->standalone == 1) ||
5322 ((ctxt->hasExternalSubset == 0) &&
5323 (ctxt->hasPErefs == 0))) {
5324 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5326 ctxt->sax->error(ctxt->userData,
5327 "Entity '%s' not defined\n", name);
5328 ctxt->wellFormed = 0;
5329 ctxt->disableSAX = 1;
5330 } else {
5331 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5332 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005333 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005334 "Entity '%s' not defined\n", name);
5335 }
5336 }
5337
5338 /*
5339 * [ WFC: Parsed Entity ]
5340 * An entity reference must not contain the name of an
5341 * unparsed entity
5342 */
5343 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5344 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5345 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5346 ctxt->sax->error(ctxt->userData,
5347 "Entity reference to unparsed entity %s\n", name);
5348 ctxt->wellFormed = 0;
5349 ctxt->disableSAX = 1;
5350 }
5351
5352 /*
5353 * [ WFC: No External Entity References ]
5354 * Attribute values cannot contain direct or indirect
5355 * entity references to external entities.
5356 */
5357 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5358 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5359 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5361 ctxt->sax->error(ctxt->userData,
5362 "Attribute references external entity '%s'\n", name);
5363 ctxt->wellFormed = 0;
5364 ctxt->disableSAX = 1;
5365 }
5366 /*
5367 * [ WFC: No < in Attribute Values ]
5368 * The replacement text of any entity referred to directly or
5369 * indirectly in an attribute value (other than "&lt;") must
5370 * not contain a <.
5371 */
5372 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5373 (ent != NULL) &&
5374 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5375 (ent->content != NULL) &&
5376 (xmlStrchr(ent->content, '<'))) {
5377 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5378 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5379 ctxt->sax->error(ctxt->userData,
5380 "'<' in entity '%s' is not allowed in attributes values\n", name);
5381 ctxt->wellFormed = 0;
5382 ctxt->disableSAX = 1;
5383 }
5384
5385 /*
5386 * Internal check, no parameter entities here ...
5387 */
5388 else {
5389 switch (ent->etype) {
5390 case XML_INTERNAL_PARAMETER_ENTITY:
5391 case XML_EXTERNAL_PARAMETER_ENTITY:
5392 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5394 ctxt->sax->error(ctxt->userData,
5395 "Attempt to reference the parameter entity '%s'\n", name);
5396 ctxt->wellFormed = 0;
5397 ctxt->disableSAX = 1;
5398 break;
5399 default:
5400 break;
5401 }
5402 }
5403
5404 /*
5405 * [ WFC: No Recursion ]
5406 * A parsed entity must not contain a recursive reference
5407 * to itself, either directly or indirectly.
5408 * Done somewhere else
5409 */
5410
5411 } else {
5412 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5414 ctxt->sax->error(ctxt->userData,
5415 "xmlParseEntityRef: expecting ';'\n");
5416 ctxt->wellFormed = 0;
5417 ctxt->disableSAX = 1;
5418 }
5419 xmlFree(name);
5420 }
5421 }
5422 return(ent);
5423}
5424
5425/**
5426 * xmlParseStringEntityRef:
5427 * @ctxt: an XML parser context
5428 * @str: a pointer to an index in the string
5429 *
5430 * parse ENTITY references declarations, but this version parses it from
5431 * a string value.
5432 *
5433 * [68] EntityRef ::= '&' Name ';'
5434 *
5435 * [ WFC: Entity Declared ]
5436 * In a document without any DTD, a document with only an internal DTD
5437 * subset which contains no parameter entity references, or a document
5438 * with "standalone='yes'", the Name given in the entity reference
5439 * must match that in an entity declaration, except that well-formed
5440 * documents need not declare any of the following entities: amp, lt,
5441 * gt, apos, quot. The declaration of a parameter entity must precede
5442 * any reference to it. Similarly, the declaration of a general entity
5443 * must precede any reference to it which appears in a default value in an
5444 * attribute-list declaration. Note that if entities are declared in the
5445 * external subset or in external parameter entities, a non-validating
5446 * processor is not obligated to read and process their declarations;
5447 * for such documents, the rule that an entity must be declared is a
5448 * well-formedness constraint only if standalone='yes'.
5449 *
5450 * [ WFC: Parsed Entity ]
5451 * An entity reference must not contain the name of an unparsed entity
5452 *
5453 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5454 * is updated to the current location in the string.
5455 */
5456xmlEntityPtr
5457xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5458 xmlChar *name;
5459 const xmlChar *ptr;
5460 xmlChar cur;
5461 xmlEntityPtr ent = NULL;
5462
5463 if ((str == NULL) || (*str == NULL))
5464 return(NULL);
5465 ptr = *str;
5466 cur = *ptr;
5467 if (cur == '&') {
5468 ptr++;
5469 cur = *ptr;
5470 name = xmlParseStringName(ctxt, &ptr);
5471 if (name == NULL) {
5472 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5473 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5474 ctxt->sax->error(ctxt->userData,
5475 "xmlParseEntityRef: no name\n");
5476 ctxt->wellFormed = 0;
5477 ctxt->disableSAX = 1;
5478 } else {
5479 if (*ptr == ';') {
5480 ptr++;
5481 /*
5482 * Ask first SAX for entity resolution, otherwise try the
5483 * predefined set.
5484 */
5485 if (ctxt->sax != NULL) {
5486 if (ctxt->sax->getEntity != NULL)
5487 ent = ctxt->sax->getEntity(ctxt->userData, name);
5488 if (ent == NULL)
5489 ent = xmlGetPredefinedEntity(name);
5490 }
5491 /*
5492 * [ WFC: Entity Declared ]
5493 * In a document without any DTD, a document with only an
5494 * internal DTD subset which contains no parameter entity
5495 * references, or a document with "standalone='yes'", the
5496 * Name given in the entity reference must match that in an
5497 * entity declaration, except that well-formed documents
5498 * need not declare any of the following entities: amp, lt,
5499 * gt, apos, quot.
5500 * The declaration of a parameter entity must precede any
5501 * reference to it.
5502 * Similarly, the declaration of a general entity must
5503 * precede any reference to it which appears in a default
5504 * value in an attribute-list declaration. Note that if
5505 * entities are declared in the external subset or in
5506 * external parameter entities, a non-validating processor
5507 * is not obligated to read and process their declarations;
5508 * for such documents, the rule that an entity must be
5509 * declared is a well-formedness constraint only if
5510 * standalone='yes'.
5511 */
5512 if (ent == NULL) {
5513 if ((ctxt->standalone == 1) ||
5514 ((ctxt->hasExternalSubset == 0) &&
5515 (ctxt->hasPErefs == 0))) {
5516 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5518 ctxt->sax->error(ctxt->userData,
5519 "Entity '%s' not defined\n", name);
5520 ctxt->wellFormed = 0;
5521 ctxt->disableSAX = 1;
5522 } else {
5523 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5524 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5525 ctxt->sax->warning(ctxt->userData,
5526 "Entity '%s' not defined\n", name);
5527 }
5528 }
5529
5530 /*
5531 * [ WFC: Parsed Entity ]
5532 * An entity reference must not contain the name of an
5533 * unparsed entity
5534 */
5535 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5536 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5538 ctxt->sax->error(ctxt->userData,
5539 "Entity reference to unparsed entity %s\n", name);
5540 ctxt->wellFormed = 0;
5541 ctxt->disableSAX = 1;
5542 }
5543
5544 /*
5545 * [ WFC: No External Entity References ]
5546 * Attribute values cannot contain direct or indirect
5547 * entity references to external entities.
5548 */
5549 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5550 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5551 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5553 ctxt->sax->error(ctxt->userData,
5554 "Attribute references external entity '%s'\n", name);
5555 ctxt->wellFormed = 0;
5556 ctxt->disableSAX = 1;
5557 }
5558 /*
5559 * [ WFC: No < in Attribute Values ]
5560 * The replacement text of any entity referred to directly or
5561 * indirectly in an attribute value (other than "&lt;") must
5562 * not contain a <.
5563 */
5564 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5565 (ent != NULL) &&
5566 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5567 (ent->content != NULL) &&
5568 (xmlStrchr(ent->content, '<'))) {
5569 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5571 ctxt->sax->error(ctxt->userData,
5572 "'<' in entity '%s' is not allowed in attributes values\n", name);
5573 ctxt->wellFormed = 0;
5574 ctxt->disableSAX = 1;
5575 }
5576
5577 /*
5578 * Internal check, no parameter entities here ...
5579 */
5580 else {
5581 switch (ent->etype) {
5582 case XML_INTERNAL_PARAMETER_ENTITY:
5583 case XML_EXTERNAL_PARAMETER_ENTITY:
5584 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5586 ctxt->sax->error(ctxt->userData,
5587 "Attempt to reference the parameter entity '%s'\n", name);
5588 ctxt->wellFormed = 0;
5589 ctxt->disableSAX = 1;
5590 break;
5591 default:
5592 break;
5593 }
5594 }
5595
5596 /*
5597 * [ WFC: No Recursion ]
5598 * A parsed entity must not contain a recursive reference
5599 * to itself, either directly or indirectly.
5600 * Done somewhwere else
5601 */
5602
5603 } else {
5604 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5606 ctxt->sax->error(ctxt->userData,
5607 "xmlParseEntityRef: expecting ';'\n");
5608 ctxt->wellFormed = 0;
5609 ctxt->disableSAX = 1;
5610 }
5611 xmlFree(name);
5612 }
5613 }
5614 *str = ptr;
5615 return(ent);
5616}
5617
5618/**
5619 * xmlParsePEReference:
5620 * @ctxt: an XML parser context
5621 *
5622 * parse PEReference declarations
5623 * The entity content is handled directly by pushing it's content as
5624 * a new input stream.
5625 *
5626 * [69] PEReference ::= '%' Name ';'
5627 *
5628 * [ WFC: No Recursion ]
5629 * A parsed entity must not contain a recursive
5630 * reference to itself, either directly or indirectly.
5631 *
5632 * [ WFC: Entity Declared ]
5633 * In a document without any DTD, a document with only an internal DTD
5634 * subset which contains no parameter entity references, or a document
5635 * with "standalone='yes'", ... ... The declaration of a parameter
5636 * entity must precede any reference to it...
5637 *
5638 * [ VC: Entity Declared ]
5639 * In a document with an external subset or external parameter entities
5640 * with "standalone='no'", ... ... The declaration of a parameter entity
5641 * must precede any reference to it...
5642 *
5643 * [ WFC: In DTD ]
5644 * Parameter-entity references may only appear in the DTD.
5645 * NOTE: misleading but this is handled.
5646 */
5647void
5648xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5649 xmlChar *name;
5650 xmlEntityPtr entity = NULL;
5651 xmlParserInputPtr input;
5652
5653 if (RAW == '%') {
5654 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005655 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005656 if (name == NULL) {
5657 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5659 ctxt->sax->error(ctxt->userData,
5660 "xmlParsePEReference: no name\n");
5661 ctxt->wellFormed = 0;
5662 ctxt->disableSAX = 1;
5663 } else {
5664 if (RAW == ';') {
5665 NEXT;
5666 if ((ctxt->sax != NULL) &&
5667 (ctxt->sax->getParameterEntity != NULL))
5668 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5669 name);
5670 if (entity == NULL) {
5671 /*
5672 * [ WFC: Entity Declared ]
5673 * In a document without any DTD, a document with only an
5674 * internal DTD subset which contains no parameter entity
5675 * references, or a document with "standalone='yes'", ...
5676 * ... The declaration of a parameter entity must precede
5677 * any reference to it...
5678 */
5679 if ((ctxt->standalone == 1) ||
5680 ((ctxt->hasExternalSubset == 0) &&
5681 (ctxt->hasPErefs == 0))) {
5682 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5683 if ((!ctxt->disableSAX) &&
5684 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5685 ctxt->sax->error(ctxt->userData,
5686 "PEReference: %%%s; not found\n", name);
5687 ctxt->wellFormed = 0;
5688 ctxt->disableSAX = 1;
5689 } else {
5690 /*
5691 * [ VC: Entity Declared ]
5692 * In a document with an external subset or external
5693 * parameter entities with "standalone='no'", ...
5694 * ... The declaration of a parameter entity must precede
5695 * any reference to it...
5696 */
5697 if ((!ctxt->disableSAX) &&
5698 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5699 ctxt->sax->warning(ctxt->userData,
5700 "PEReference: %%%s; not found\n", name);
5701 ctxt->valid = 0;
5702 }
5703 } else {
5704 /*
5705 * Internal checking in case the entity quest barfed
5706 */
5707 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5708 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5709 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5710 ctxt->sax->warning(ctxt->userData,
5711 "Internal: %%%s; is not a parameter entity\n", name);
5712 } else {
5713 /*
5714 * TODO !!!
5715 * handle the extra spaces added before and after
5716 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5717 */
5718 input = xmlNewEntityInputStream(ctxt, entity);
5719 xmlPushInput(ctxt, input);
5720 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5721 (RAW == '<') && (NXT(1) == '?') &&
5722 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5723 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5724 xmlParseTextDecl(ctxt);
5725 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5726 /*
5727 * The XML REC instructs us to stop parsing
5728 * right here
5729 */
5730 ctxt->instate = XML_PARSER_EOF;
5731 xmlFree(name);
5732 return;
5733 }
5734 }
5735 if (ctxt->token == 0)
5736 ctxt->token = ' ';
5737 }
5738 }
5739 ctxt->hasPErefs = 1;
5740 } else {
5741 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5743 ctxt->sax->error(ctxt->userData,
5744 "xmlParsePEReference: expecting ';'\n");
5745 ctxt->wellFormed = 0;
5746 ctxt->disableSAX = 1;
5747 }
5748 xmlFree(name);
5749 }
5750 }
5751}
5752
5753/**
5754 * xmlParseStringPEReference:
5755 * @ctxt: an XML parser context
5756 * @str: a pointer to an index in the string
5757 *
5758 * parse PEReference declarations
5759 *
5760 * [69] PEReference ::= '%' Name ';'
5761 *
5762 * [ WFC: No Recursion ]
5763 * A parsed entity must not contain a recursive
5764 * reference to itself, either directly or indirectly.
5765 *
5766 * [ WFC: Entity Declared ]
5767 * In a document without any DTD, a document with only an internal DTD
5768 * subset which contains no parameter entity references, or a document
5769 * with "standalone='yes'", ... ... The declaration of a parameter
5770 * entity must precede any reference to it...
5771 *
5772 * [ VC: Entity Declared ]
5773 * In a document with an external subset or external parameter entities
5774 * with "standalone='no'", ... ... The declaration of a parameter entity
5775 * must precede any reference to it...
5776 *
5777 * [ WFC: In DTD ]
5778 * Parameter-entity references may only appear in the DTD.
5779 * NOTE: misleading but this is handled.
5780 *
5781 * Returns the string of the entity content.
5782 * str is updated to the current value of the index
5783 */
5784xmlEntityPtr
5785xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5786 const xmlChar *ptr;
5787 xmlChar cur;
5788 xmlChar *name;
5789 xmlEntityPtr entity = NULL;
5790
5791 if ((str == NULL) || (*str == NULL)) return(NULL);
5792 ptr = *str;
5793 cur = *ptr;
5794 if (cur == '%') {
5795 ptr++;
5796 cur = *ptr;
5797 name = xmlParseStringName(ctxt, &ptr);
5798 if (name == NULL) {
5799 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5801 ctxt->sax->error(ctxt->userData,
5802 "xmlParseStringPEReference: no name\n");
5803 ctxt->wellFormed = 0;
5804 ctxt->disableSAX = 1;
5805 } else {
5806 cur = *ptr;
5807 if (cur == ';') {
5808 ptr++;
5809 cur = *ptr;
5810 if ((ctxt->sax != NULL) &&
5811 (ctxt->sax->getParameterEntity != NULL))
5812 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5813 name);
5814 if (entity == NULL) {
5815 /*
5816 * [ WFC: Entity Declared ]
5817 * In a document without any DTD, a document with only an
5818 * internal DTD subset which contains no parameter entity
5819 * references, or a document with "standalone='yes'", ...
5820 * ... The declaration of a parameter entity must precede
5821 * any reference to it...
5822 */
5823 if ((ctxt->standalone == 1) ||
5824 ((ctxt->hasExternalSubset == 0) &&
5825 (ctxt->hasPErefs == 0))) {
5826 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5828 ctxt->sax->error(ctxt->userData,
5829 "PEReference: %%%s; not found\n", name);
5830 ctxt->wellFormed = 0;
5831 ctxt->disableSAX = 1;
5832 } else {
5833 /*
5834 * [ VC: Entity Declared ]
5835 * In a document with an external subset or external
5836 * parameter entities with "standalone='no'", ...
5837 * ... The declaration of a parameter entity must
5838 * precede any reference to it...
5839 */
5840 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5841 ctxt->sax->warning(ctxt->userData,
5842 "PEReference: %%%s; not found\n", name);
5843 ctxt->valid = 0;
5844 }
5845 } else {
5846 /*
5847 * Internal checking in case the entity quest barfed
5848 */
5849 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5850 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5851 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5852 ctxt->sax->warning(ctxt->userData,
5853 "Internal: %%%s; is not a parameter entity\n", name);
5854 }
5855 }
5856 ctxt->hasPErefs = 1;
5857 } else {
5858 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5860 ctxt->sax->error(ctxt->userData,
5861 "xmlParseStringPEReference: expecting ';'\n");
5862 ctxt->wellFormed = 0;
5863 ctxt->disableSAX = 1;
5864 }
5865 xmlFree(name);
5866 }
5867 }
5868 *str = ptr;
5869 return(entity);
5870}
5871
5872/**
5873 * xmlParseDocTypeDecl:
5874 * @ctxt: an XML parser context
5875 *
5876 * parse a DOCTYPE declaration
5877 *
5878 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5879 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5880 *
5881 * [ VC: Root Element Type ]
5882 * The Name in the document type declaration must match the element
5883 * type of the root element.
5884 */
5885
5886void
5887xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5888 xmlChar *name = NULL;
5889 xmlChar *ExternalID = NULL;
5890 xmlChar *URI = NULL;
5891
5892 /*
5893 * We know that '<!DOCTYPE' has been detected.
5894 */
5895 SKIP(9);
5896
5897 SKIP_BLANKS;
5898
5899 /*
5900 * Parse the DOCTYPE name.
5901 */
5902 name = xmlParseName(ctxt);
5903 if (name == NULL) {
5904 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5906 ctxt->sax->error(ctxt->userData,
5907 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5908 ctxt->wellFormed = 0;
5909 ctxt->disableSAX = 1;
5910 }
5911 ctxt->intSubName = name;
5912
5913 SKIP_BLANKS;
5914
5915 /*
5916 * Check for SystemID and ExternalID
5917 */
5918 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5919
5920 if ((URI != NULL) || (ExternalID != NULL)) {
5921 ctxt->hasExternalSubset = 1;
5922 }
5923 ctxt->extSubURI = URI;
5924 ctxt->extSubSystem = ExternalID;
5925
5926 SKIP_BLANKS;
5927
5928 /*
5929 * Create and update the internal subset.
5930 */
5931 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5932 (!ctxt->disableSAX))
5933 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5934
5935 /*
5936 * Is there any internal subset declarations ?
5937 * they are handled separately in xmlParseInternalSubset()
5938 */
5939 if (RAW == '[')
5940 return;
5941
5942 /*
5943 * We should be at the end of the DOCTYPE declaration.
5944 */
5945 if (RAW != '>') {
5946 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5948 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5949 ctxt->wellFormed = 0;
5950 ctxt->disableSAX = 1;
5951 }
5952 NEXT;
5953}
5954
5955/**
5956 * xmlParseInternalsubset:
5957 * @ctxt: an XML parser context
5958 *
5959 * parse the internal subset declaration
5960 *
5961 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5962 */
5963
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005964static void
Owen Taylor3473f882001-02-23 17:55:21 +00005965xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5966 /*
5967 * Is there any DTD definition ?
5968 */
5969 if (RAW == '[') {
5970 ctxt->instate = XML_PARSER_DTD;
5971 NEXT;
5972 /*
5973 * Parse the succession of Markup declarations and
5974 * PEReferences.
5975 * Subsequence (markupdecl | PEReference | S)*
5976 */
5977 while (RAW != ']') {
5978 const xmlChar *check = CUR_PTR;
5979 int cons = ctxt->input->consumed;
5980
5981 SKIP_BLANKS;
5982 xmlParseMarkupDecl(ctxt);
5983 xmlParsePEReference(ctxt);
5984
5985 /*
5986 * Pop-up of finished entities.
5987 */
5988 while ((RAW == 0) && (ctxt->inputNr > 1))
5989 xmlPopInput(ctxt);
5990
5991 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5992 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5994 ctxt->sax->error(ctxt->userData,
5995 "xmlParseInternalSubset: error detected in Markup declaration\n");
5996 ctxt->wellFormed = 0;
5997 ctxt->disableSAX = 1;
5998 break;
5999 }
6000 }
6001 if (RAW == ']') {
6002 NEXT;
6003 SKIP_BLANKS;
6004 }
6005 }
6006
6007 /*
6008 * We should be at the end of the DOCTYPE declaration.
6009 */
6010 if (RAW != '>') {
6011 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6013 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6014 ctxt->wellFormed = 0;
6015 ctxt->disableSAX = 1;
6016 }
6017 NEXT;
6018}
6019
6020/**
6021 * xmlParseAttribute:
6022 * @ctxt: an XML parser context
6023 * @value: a xmlChar ** used to store the value of the attribute
6024 *
6025 * parse an attribute
6026 *
6027 * [41] Attribute ::= Name Eq AttValue
6028 *
6029 * [ WFC: No External Entity References ]
6030 * Attribute values cannot contain direct or indirect entity references
6031 * to external entities.
6032 *
6033 * [ WFC: No < in Attribute Values ]
6034 * The replacement text of any entity referred to directly or indirectly in
6035 * an attribute value (other than "&lt;") must not contain a <.
6036 *
6037 * [ VC: Attribute Value Type ]
6038 * The attribute must have been declared; the value must be of the type
6039 * declared for it.
6040 *
6041 * [25] Eq ::= S? '=' S?
6042 *
6043 * With namespace:
6044 *
6045 * [NS 11] Attribute ::= QName Eq AttValue
6046 *
6047 * Also the case QName == xmlns:??? is handled independently as a namespace
6048 * definition.
6049 *
6050 * Returns the attribute name, and the value in *value.
6051 */
6052
6053xmlChar *
6054xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6055 xmlChar *name, *val;
6056
6057 *value = NULL;
6058 name = xmlParseName(ctxt);
6059 if (name == NULL) {
6060 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6062 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6063 ctxt->wellFormed = 0;
6064 ctxt->disableSAX = 1;
6065 return(NULL);
6066 }
6067
6068 /*
6069 * read the value
6070 */
6071 SKIP_BLANKS;
6072 if (RAW == '=') {
6073 NEXT;
6074 SKIP_BLANKS;
6075 val = xmlParseAttValue(ctxt);
6076 ctxt->instate = XML_PARSER_CONTENT;
6077 } else {
6078 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6080 ctxt->sax->error(ctxt->userData,
6081 "Specification mandate value for attribute %s\n", name);
6082 ctxt->wellFormed = 0;
6083 ctxt->disableSAX = 1;
6084 xmlFree(name);
6085 return(NULL);
6086 }
6087
6088 /*
6089 * Check that xml:lang conforms to the specification
6090 * No more registered as an error, just generate a warning now
6091 * since this was deprecated in XML second edition
6092 */
6093 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6094 if (!xmlCheckLanguageID(val)) {
6095 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6096 ctxt->sax->warning(ctxt->userData,
6097 "Malformed value for xml:lang : %s\n", val);
6098 }
6099 }
6100
6101 /*
6102 * Check that xml:space conforms to the specification
6103 */
6104 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6105 if (xmlStrEqual(val, BAD_CAST "default"))
6106 *(ctxt->space) = 0;
6107 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6108 *(ctxt->space) = 1;
6109 else {
6110 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6111 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6112 ctxt->sax->error(ctxt->userData,
6113"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6114 val);
6115 ctxt->wellFormed = 0;
6116 ctxt->disableSAX = 1;
6117 }
6118 }
6119
6120 *value = val;
6121 return(name);
6122}
6123
6124/**
6125 * xmlParseStartTag:
6126 * @ctxt: an XML parser context
6127 *
6128 * parse a start of tag either for rule element or
6129 * EmptyElement. In both case we don't parse the tag closing chars.
6130 *
6131 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6132 *
6133 * [ WFC: Unique Att Spec ]
6134 * No attribute name may appear more than once in the same start-tag or
6135 * empty-element tag.
6136 *
6137 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6138 *
6139 * [ WFC: Unique Att Spec ]
6140 * No attribute name may appear more than once in the same start-tag or
6141 * empty-element tag.
6142 *
6143 * With namespace:
6144 *
6145 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6146 *
6147 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6148 *
6149 * Returns the element name parsed
6150 */
6151
6152xmlChar *
6153xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6154 xmlChar *name;
6155 xmlChar *attname;
6156 xmlChar *attvalue;
6157 const xmlChar **atts = NULL;
6158 int nbatts = 0;
6159 int maxatts = 0;
6160 int i;
6161
6162 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006163 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006164
6165 name = xmlParseName(ctxt);
6166 if (name == NULL) {
6167 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6168 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6169 ctxt->sax->error(ctxt->userData,
6170 "xmlParseStartTag: invalid element name\n");
6171 ctxt->wellFormed = 0;
6172 ctxt->disableSAX = 1;
6173 return(NULL);
6174 }
6175
6176 /*
6177 * Now parse the attributes, it ends up with the ending
6178 *
6179 * (S Attribute)* S?
6180 */
6181 SKIP_BLANKS;
6182 GROW;
6183
Daniel Veillard21a0f912001-02-25 19:54:14 +00006184 while ((RAW != '>') &&
6185 ((RAW != '/') || (NXT(1) != '>')) &&
6186 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006187 const xmlChar *q = CUR_PTR;
6188 int cons = ctxt->input->consumed;
6189
6190 attname = xmlParseAttribute(ctxt, &attvalue);
6191 if ((attname != NULL) && (attvalue != NULL)) {
6192 /*
6193 * [ WFC: Unique Att Spec ]
6194 * No attribute name may appear more than once in the same
6195 * start-tag or empty-element tag.
6196 */
6197 for (i = 0; i < nbatts;i += 2) {
6198 if (xmlStrEqual(atts[i], attname)) {
6199 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6201 ctxt->sax->error(ctxt->userData,
6202 "Attribute %s redefined\n",
6203 attname);
6204 ctxt->wellFormed = 0;
6205 ctxt->disableSAX = 1;
6206 xmlFree(attname);
6207 xmlFree(attvalue);
6208 goto failed;
6209 }
6210 }
6211
6212 /*
6213 * Add the pair to atts
6214 */
6215 if (atts == NULL) {
6216 maxatts = 10;
6217 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6218 if (atts == NULL) {
6219 xmlGenericError(xmlGenericErrorContext,
6220 "malloc of %ld byte failed\n",
6221 maxatts * (long)sizeof(xmlChar *));
6222 return(NULL);
6223 }
6224 } else if (nbatts + 4 > maxatts) {
6225 maxatts *= 2;
6226 atts = (const xmlChar **) xmlRealloc((void *) atts,
6227 maxatts * sizeof(xmlChar *));
6228 if (atts == NULL) {
6229 xmlGenericError(xmlGenericErrorContext,
6230 "realloc of %ld byte failed\n",
6231 maxatts * (long)sizeof(xmlChar *));
6232 return(NULL);
6233 }
6234 }
6235 atts[nbatts++] = attname;
6236 atts[nbatts++] = attvalue;
6237 atts[nbatts] = NULL;
6238 atts[nbatts + 1] = NULL;
6239 } else {
6240 if (attname != NULL)
6241 xmlFree(attname);
6242 if (attvalue != NULL)
6243 xmlFree(attvalue);
6244 }
6245
6246failed:
6247
6248 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6249 break;
6250 if (!IS_BLANK(RAW)) {
6251 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6253 ctxt->sax->error(ctxt->userData,
6254 "attributes construct error\n");
6255 ctxt->wellFormed = 0;
6256 ctxt->disableSAX = 1;
6257 }
6258 SKIP_BLANKS;
6259 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6260 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6262 ctxt->sax->error(ctxt->userData,
6263 "xmlParseStartTag: problem parsing attributes\n");
6264 ctxt->wellFormed = 0;
6265 ctxt->disableSAX = 1;
6266 break;
6267 }
6268 GROW;
6269 }
6270
6271 /*
6272 * SAX: Start of Element !
6273 */
6274 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6275 (!ctxt->disableSAX))
6276 ctxt->sax->startElement(ctxt->userData, name, atts);
6277
6278 if (atts != NULL) {
6279 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6280 xmlFree((void *) atts);
6281 }
6282 return(name);
6283}
6284
6285/**
6286 * xmlParseEndTag:
6287 * @ctxt: an XML parser context
6288 *
6289 * parse an end of tag
6290 *
6291 * [42] ETag ::= '</' Name S? '>'
6292 *
6293 * With namespace
6294 *
6295 * [NS 9] ETag ::= '</' QName S? '>'
6296 */
6297
6298void
6299xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6300 xmlChar *name;
6301 xmlChar *oldname;
6302
6303 GROW;
6304 if ((RAW != '<') || (NXT(1) != '/')) {
6305 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6307 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6308 ctxt->wellFormed = 0;
6309 ctxt->disableSAX = 1;
6310 return;
6311 }
6312 SKIP(2);
6313
6314 name = xmlParseName(ctxt);
6315
6316 /*
6317 * We should definitely be at the ending "S? '>'" part
6318 */
6319 GROW;
6320 SKIP_BLANKS;
6321 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6322 ctxt->errNo = XML_ERR_GT_REQUIRED;
6323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6324 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6325 ctxt->wellFormed = 0;
6326 ctxt->disableSAX = 1;
6327 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006328 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006329
6330 /*
6331 * [ WFC: Element Type Match ]
6332 * The Name in an element's end-tag must match the element type in the
6333 * start-tag.
6334 *
6335 */
6336 if ((name == NULL) || (ctxt->name == NULL) ||
6337 (!xmlStrEqual(name, ctxt->name))) {
6338 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6340 if ((name != NULL) && (ctxt->name != NULL)) {
6341 ctxt->sax->error(ctxt->userData,
6342 "Opening and ending tag mismatch: %s and %s\n",
6343 ctxt->name, name);
6344 } else if (ctxt->name != NULL) {
6345 ctxt->sax->error(ctxt->userData,
6346 "Ending tag eror for: %s\n", ctxt->name);
6347 } else {
6348 ctxt->sax->error(ctxt->userData,
6349 "Ending tag error: internal error ???\n");
6350 }
6351
6352 }
6353 ctxt->wellFormed = 0;
6354 ctxt->disableSAX = 1;
6355 }
6356
6357 /*
6358 * SAX: End of Tag
6359 */
6360 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6361 (!ctxt->disableSAX))
6362 ctxt->sax->endElement(ctxt->userData, name);
6363
6364 if (name != NULL)
6365 xmlFree(name);
6366 oldname = namePop(ctxt);
6367 spacePop(ctxt);
6368 if (oldname != NULL) {
6369#ifdef DEBUG_STACK
6370 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6371#endif
6372 xmlFree(oldname);
6373 }
6374 return;
6375}
6376
6377/**
6378 * xmlParseCDSect:
6379 * @ctxt: an XML parser context
6380 *
6381 * Parse escaped pure raw content.
6382 *
6383 * [18] CDSect ::= CDStart CData CDEnd
6384 *
6385 * [19] CDStart ::= '<![CDATA['
6386 *
6387 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6388 *
6389 * [21] CDEnd ::= ']]>'
6390 */
6391void
6392xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6393 xmlChar *buf = NULL;
6394 int len = 0;
6395 int size = XML_PARSER_BUFFER_SIZE;
6396 int r, rl;
6397 int s, sl;
6398 int cur, l;
6399 int count = 0;
6400
6401 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6402 (NXT(2) == '[') && (NXT(3) == 'C') &&
6403 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6404 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6405 (NXT(8) == '[')) {
6406 SKIP(9);
6407 } else
6408 return;
6409
6410 ctxt->instate = XML_PARSER_CDATA_SECTION;
6411 r = CUR_CHAR(rl);
6412 if (!IS_CHAR(r)) {
6413 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6415 ctxt->sax->error(ctxt->userData,
6416 "CData section not finished\n");
6417 ctxt->wellFormed = 0;
6418 ctxt->disableSAX = 1;
6419 ctxt->instate = XML_PARSER_CONTENT;
6420 return;
6421 }
6422 NEXTL(rl);
6423 s = CUR_CHAR(sl);
6424 if (!IS_CHAR(s)) {
6425 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6427 ctxt->sax->error(ctxt->userData,
6428 "CData section not finished\n");
6429 ctxt->wellFormed = 0;
6430 ctxt->disableSAX = 1;
6431 ctxt->instate = XML_PARSER_CONTENT;
6432 return;
6433 }
6434 NEXTL(sl);
6435 cur = CUR_CHAR(l);
6436 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6437 if (buf == NULL) {
6438 xmlGenericError(xmlGenericErrorContext,
6439 "malloc of %d byte failed\n", size);
6440 return;
6441 }
6442 while (IS_CHAR(cur) &&
6443 ((r != ']') || (s != ']') || (cur != '>'))) {
6444 if (len + 5 >= size) {
6445 size *= 2;
6446 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6447 if (buf == NULL) {
6448 xmlGenericError(xmlGenericErrorContext,
6449 "realloc of %d byte failed\n", size);
6450 return;
6451 }
6452 }
6453 COPY_BUF(rl,buf,len,r);
6454 r = s;
6455 rl = sl;
6456 s = cur;
6457 sl = l;
6458 count++;
6459 if (count > 50) {
6460 GROW;
6461 count = 0;
6462 }
6463 NEXTL(l);
6464 cur = CUR_CHAR(l);
6465 }
6466 buf[len] = 0;
6467 ctxt->instate = XML_PARSER_CONTENT;
6468 if (cur != '>') {
6469 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6471 ctxt->sax->error(ctxt->userData,
6472 "CData section not finished\n%.50s\n", buf);
6473 ctxt->wellFormed = 0;
6474 ctxt->disableSAX = 1;
6475 xmlFree(buf);
6476 return;
6477 }
6478 NEXTL(l);
6479
6480 /*
6481 * Ok the buffer is to be consumed as cdata.
6482 */
6483 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6484 if (ctxt->sax->cdataBlock != NULL)
6485 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006486 else if (ctxt->sax->characters != NULL)
6487 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006488 }
6489 xmlFree(buf);
6490}
6491
6492/**
6493 * xmlParseContent:
6494 * @ctxt: an XML parser context
6495 *
6496 * Parse a content:
6497 *
6498 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6499 */
6500
6501void
6502xmlParseContent(xmlParserCtxtPtr ctxt) {
6503 GROW;
6504 while (((RAW != 0) || (ctxt->token != 0)) &&
6505 ((RAW != '<') || (NXT(1) != '/'))) {
6506 const xmlChar *test = CUR_PTR;
6507 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006508 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006509 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006510
6511 /*
6512 * Handle possible processed charrefs.
6513 */
6514 if (ctxt->token != 0) {
6515 xmlParseCharData(ctxt, 0);
6516 }
6517 /*
6518 * First case : a Processing Instruction.
6519 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006520 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006521 xmlParsePI(ctxt);
6522 }
6523
6524 /*
6525 * Second case : a CDSection
6526 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006527 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006528 (NXT(2) == '[') && (NXT(3) == 'C') &&
6529 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6530 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6531 (NXT(8) == '[')) {
6532 xmlParseCDSect(ctxt);
6533 }
6534
6535 /*
6536 * Third case : a comment
6537 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006538 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006539 (NXT(2) == '-') && (NXT(3) == '-')) {
6540 xmlParseComment(ctxt);
6541 ctxt->instate = XML_PARSER_CONTENT;
6542 }
6543
6544 /*
6545 * Fourth case : a sub-element.
6546 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006547 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006548 xmlParseElement(ctxt);
6549 }
6550
6551 /*
6552 * Fifth case : a reference. If if has not been resolved,
6553 * parsing returns it's Name, create the node
6554 */
6555
Daniel Veillard21a0f912001-02-25 19:54:14 +00006556 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006557 xmlParseReference(ctxt);
6558 }
6559
6560 /*
6561 * Last case, text. Note that References are handled directly.
6562 */
6563 else {
6564 xmlParseCharData(ctxt, 0);
6565 }
6566
6567 GROW;
6568 /*
6569 * Pop-up of finished entities.
6570 */
6571 while ((RAW == 0) && (ctxt->inputNr > 1))
6572 xmlPopInput(ctxt);
6573 SHRINK;
6574
6575 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6576 (tok == ctxt->token)) {
6577 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6578 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6579 ctxt->sax->error(ctxt->userData,
6580 "detected an error in element content\n");
6581 ctxt->wellFormed = 0;
6582 ctxt->disableSAX = 1;
6583 ctxt->instate = XML_PARSER_EOF;
6584 break;
6585 }
6586 }
6587}
6588
6589/**
6590 * xmlParseElement:
6591 * @ctxt: an XML parser context
6592 *
6593 * parse an XML element, this is highly recursive
6594 *
6595 * [39] element ::= EmptyElemTag | STag content ETag
6596 *
6597 * [ WFC: Element Type Match ]
6598 * The Name in an element's end-tag must match the element type in the
6599 * start-tag.
6600 *
6601 * [ VC: Element Valid ]
6602 * An element is valid if there is a declaration matching elementdecl
6603 * where the Name matches the element type and one of the following holds:
6604 * - The declaration matches EMPTY and the element has no content.
6605 * - The declaration matches children and the sequence of child elements
6606 * belongs to the language generated by the regular expression in the
6607 * content model, with optional white space (characters matching the
6608 * nonterminal S) between each pair of child elements.
6609 * - The declaration matches Mixed and the content consists of character
6610 * data and child elements whose types match names in the content model.
6611 * - The declaration matches ANY, and the types of any child elements have
6612 * been declared.
6613 */
6614
6615void
6616xmlParseElement(xmlParserCtxtPtr ctxt) {
6617 const xmlChar *openTag = CUR_PTR;
6618 xmlChar *name;
6619 xmlChar *oldname;
6620 xmlParserNodeInfo node_info;
6621 xmlNodePtr ret;
6622
6623 /* Capture start position */
6624 if (ctxt->record_info) {
6625 node_info.begin_pos = ctxt->input->consumed +
6626 (CUR_PTR - ctxt->input->base);
6627 node_info.begin_line = ctxt->input->line;
6628 }
6629
6630 if (ctxt->spaceNr == 0)
6631 spacePush(ctxt, -1);
6632 else
6633 spacePush(ctxt, *ctxt->space);
6634
6635 name = xmlParseStartTag(ctxt);
6636 if (name == NULL) {
6637 spacePop(ctxt);
6638 return;
6639 }
6640 namePush(ctxt, name);
6641 ret = ctxt->node;
6642
6643 /*
6644 * [ VC: Root Element Type ]
6645 * The Name in the document type declaration must match the element
6646 * type of the root element.
6647 */
6648 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6649 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6650 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6651
6652 /*
6653 * Check for an Empty Element.
6654 */
6655 if ((RAW == '/') && (NXT(1) == '>')) {
6656 SKIP(2);
6657 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6658 (!ctxt->disableSAX))
6659 ctxt->sax->endElement(ctxt->userData, name);
6660 oldname = namePop(ctxt);
6661 spacePop(ctxt);
6662 if (oldname != NULL) {
6663#ifdef DEBUG_STACK
6664 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6665#endif
6666 xmlFree(oldname);
6667 }
6668 if ( ret != NULL && ctxt->record_info ) {
6669 node_info.end_pos = ctxt->input->consumed +
6670 (CUR_PTR - ctxt->input->base);
6671 node_info.end_line = ctxt->input->line;
6672 node_info.node = ret;
6673 xmlParserAddNodeInfo(ctxt, &node_info);
6674 }
6675 return;
6676 }
6677 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006678 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006679 } else {
6680 ctxt->errNo = XML_ERR_GT_REQUIRED;
6681 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6682 ctxt->sax->error(ctxt->userData,
6683 "Couldn't find end of Start Tag\n%.30s\n",
6684 openTag);
6685 ctxt->wellFormed = 0;
6686 ctxt->disableSAX = 1;
6687
6688 /*
6689 * end of parsing of this node.
6690 */
6691 nodePop(ctxt);
6692 oldname = namePop(ctxt);
6693 spacePop(ctxt);
6694 if (oldname != NULL) {
6695#ifdef DEBUG_STACK
6696 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6697#endif
6698 xmlFree(oldname);
6699 }
6700
6701 /*
6702 * Capture end position and add node
6703 */
6704 if ( ret != NULL && ctxt->record_info ) {
6705 node_info.end_pos = ctxt->input->consumed +
6706 (CUR_PTR - ctxt->input->base);
6707 node_info.end_line = ctxt->input->line;
6708 node_info.node = ret;
6709 xmlParserAddNodeInfo(ctxt, &node_info);
6710 }
6711 return;
6712 }
6713
6714 /*
6715 * Parse the content of the element:
6716 */
6717 xmlParseContent(ctxt);
6718 if (!IS_CHAR(RAW)) {
6719 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6721 ctxt->sax->error(ctxt->userData,
6722 "Premature end of data in tag %.30s\n", openTag);
6723 ctxt->wellFormed = 0;
6724 ctxt->disableSAX = 1;
6725
6726 /*
6727 * end of parsing of this node.
6728 */
6729 nodePop(ctxt);
6730 oldname = namePop(ctxt);
6731 spacePop(ctxt);
6732 if (oldname != NULL) {
6733#ifdef DEBUG_STACK
6734 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6735#endif
6736 xmlFree(oldname);
6737 }
6738 return;
6739 }
6740
6741 /*
6742 * parse the end of tag: '</' should be here.
6743 */
6744 xmlParseEndTag(ctxt);
6745
6746 /*
6747 * Capture end position and add node
6748 */
6749 if ( ret != NULL && ctxt->record_info ) {
6750 node_info.end_pos = ctxt->input->consumed +
6751 (CUR_PTR - ctxt->input->base);
6752 node_info.end_line = ctxt->input->line;
6753 node_info.node = ret;
6754 xmlParserAddNodeInfo(ctxt, &node_info);
6755 }
6756}
6757
6758/**
6759 * xmlParseVersionNum:
6760 * @ctxt: an XML parser context
6761 *
6762 * parse the XML version value.
6763 *
6764 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6765 *
6766 * Returns the string giving the XML version number, or NULL
6767 */
6768xmlChar *
6769xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6770 xmlChar *buf = NULL;
6771 int len = 0;
6772 int size = 10;
6773 xmlChar cur;
6774
6775 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6776 if (buf == NULL) {
6777 xmlGenericError(xmlGenericErrorContext,
6778 "malloc of %d byte failed\n", size);
6779 return(NULL);
6780 }
6781 cur = CUR;
6782 while (((cur >= 'a') && (cur <= 'z')) ||
6783 ((cur >= 'A') && (cur <= 'Z')) ||
6784 ((cur >= '0') && (cur <= '9')) ||
6785 (cur == '_') || (cur == '.') ||
6786 (cur == ':') || (cur == '-')) {
6787 if (len + 1 >= size) {
6788 size *= 2;
6789 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6790 if (buf == NULL) {
6791 xmlGenericError(xmlGenericErrorContext,
6792 "realloc of %d byte failed\n", size);
6793 return(NULL);
6794 }
6795 }
6796 buf[len++] = cur;
6797 NEXT;
6798 cur=CUR;
6799 }
6800 buf[len] = 0;
6801 return(buf);
6802}
6803
6804/**
6805 * xmlParseVersionInfo:
6806 * @ctxt: an XML parser context
6807 *
6808 * parse the XML version.
6809 *
6810 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6811 *
6812 * [25] Eq ::= S? '=' S?
6813 *
6814 * Returns the version string, e.g. "1.0"
6815 */
6816
6817xmlChar *
6818xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6819 xmlChar *version = NULL;
6820 const xmlChar *q;
6821
6822 if ((RAW == 'v') && (NXT(1) == 'e') &&
6823 (NXT(2) == 'r') && (NXT(3) == 's') &&
6824 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6825 (NXT(6) == 'n')) {
6826 SKIP(7);
6827 SKIP_BLANKS;
6828 if (RAW != '=') {
6829 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6831 ctxt->sax->error(ctxt->userData,
6832 "xmlParseVersionInfo : expected '='\n");
6833 ctxt->wellFormed = 0;
6834 ctxt->disableSAX = 1;
6835 return(NULL);
6836 }
6837 NEXT;
6838 SKIP_BLANKS;
6839 if (RAW == '"') {
6840 NEXT;
6841 q = CUR_PTR;
6842 version = xmlParseVersionNum(ctxt);
6843 if (RAW != '"') {
6844 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6846 ctxt->sax->error(ctxt->userData,
6847 "String not closed\n%.50s\n", q);
6848 ctxt->wellFormed = 0;
6849 ctxt->disableSAX = 1;
6850 } else
6851 NEXT;
6852 } else if (RAW == '\''){
6853 NEXT;
6854 q = CUR_PTR;
6855 version = xmlParseVersionNum(ctxt);
6856 if (RAW != '\'') {
6857 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6859 ctxt->sax->error(ctxt->userData,
6860 "String not closed\n%.50s\n", q);
6861 ctxt->wellFormed = 0;
6862 ctxt->disableSAX = 1;
6863 } else
6864 NEXT;
6865 } else {
6866 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6868 ctxt->sax->error(ctxt->userData,
6869 "xmlParseVersionInfo : expected ' or \"\n");
6870 ctxt->wellFormed = 0;
6871 ctxt->disableSAX = 1;
6872 }
6873 }
6874 return(version);
6875}
6876
6877/**
6878 * xmlParseEncName:
6879 * @ctxt: an XML parser context
6880 *
6881 * parse the XML encoding name
6882 *
6883 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6884 *
6885 * Returns the encoding name value or NULL
6886 */
6887xmlChar *
6888xmlParseEncName(xmlParserCtxtPtr ctxt) {
6889 xmlChar *buf = NULL;
6890 int len = 0;
6891 int size = 10;
6892 xmlChar cur;
6893
6894 cur = CUR;
6895 if (((cur >= 'a') && (cur <= 'z')) ||
6896 ((cur >= 'A') && (cur <= 'Z'))) {
6897 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6898 if (buf == NULL) {
6899 xmlGenericError(xmlGenericErrorContext,
6900 "malloc of %d byte failed\n", size);
6901 return(NULL);
6902 }
6903
6904 buf[len++] = cur;
6905 NEXT;
6906 cur = CUR;
6907 while (((cur >= 'a') && (cur <= 'z')) ||
6908 ((cur >= 'A') && (cur <= 'Z')) ||
6909 ((cur >= '0') && (cur <= '9')) ||
6910 (cur == '.') || (cur == '_') ||
6911 (cur == '-')) {
6912 if (len + 1 >= size) {
6913 size *= 2;
6914 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6915 if (buf == NULL) {
6916 xmlGenericError(xmlGenericErrorContext,
6917 "realloc of %d byte failed\n", size);
6918 return(NULL);
6919 }
6920 }
6921 buf[len++] = cur;
6922 NEXT;
6923 cur = CUR;
6924 if (cur == 0) {
6925 SHRINK;
6926 GROW;
6927 cur = CUR;
6928 }
6929 }
6930 buf[len] = 0;
6931 } else {
6932 ctxt->errNo = XML_ERR_ENCODING_NAME;
6933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6934 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6935 ctxt->wellFormed = 0;
6936 ctxt->disableSAX = 1;
6937 }
6938 return(buf);
6939}
6940
6941/**
6942 * xmlParseEncodingDecl:
6943 * @ctxt: an XML parser context
6944 *
6945 * parse the XML encoding declaration
6946 *
6947 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6948 *
6949 * this setups the conversion filters.
6950 *
6951 * Returns the encoding value or NULL
6952 */
6953
6954xmlChar *
6955xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6956 xmlChar *encoding = NULL;
6957 const xmlChar *q;
6958
6959 SKIP_BLANKS;
6960 if ((RAW == 'e') && (NXT(1) == 'n') &&
6961 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6962 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6963 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6964 SKIP(8);
6965 SKIP_BLANKS;
6966 if (RAW != '=') {
6967 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6969 ctxt->sax->error(ctxt->userData,
6970 "xmlParseEncodingDecl : expected '='\n");
6971 ctxt->wellFormed = 0;
6972 ctxt->disableSAX = 1;
6973 return(NULL);
6974 }
6975 NEXT;
6976 SKIP_BLANKS;
6977 if (RAW == '"') {
6978 NEXT;
6979 q = CUR_PTR;
6980 encoding = xmlParseEncName(ctxt);
6981 if (RAW != '"') {
6982 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6984 ctxt->sax->error(ctxt->userData,
6985 "String not closed\n%.50s\n", q);
6986 ctxt->wellFormed = 0;
6987 ctxt->disableSAX = 1;
6988 } else
6989 NEXT;
6990 } else if (RAW == '\''){
6991 NEXT;
6992 q = CUR_PTR;
6993 encoding = xmlParseEncName(ctxt);
6994 if (RAW != '\'') {
6995 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6997 ctxt->sax->error(ctxt->userData,
6998 "String not closed\n%.50s\n", q);
6999 ctxt->wellFormed = 0;
7000 ctxt->disableSAX = 1;
7001 } else
7002 NEXT;
7003 } else if (RAW == '"'){
7004 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7006 ctxt->sax->error(ctxt->userData,
7007 "xmlParseEncodingDecl : expected ' or \"\n");
7008 ctxt->wellFormed = 0;
7009 ctxt->disableSAX = 1;
7010 }
7011 if (encoding != NULL) {
7012 xmlCharEncoding enc;
7013 xmlCharEncodingHandlerPtr handler;
7014
7015 if (ctxt->input->encoding != NULL)
7016 xmlFree((xmlChar *) ctxt->input->encoding);
7017 ctxt->input->encoding = encoding;
7018
7019 enc = xmlParseCharEncoding((const char *) encoding);
7020 /*
7021 * registered set of known encodings
7022 */
7023 if (enc != XML_CHAR_ENCODING_ERROR) {
7024 xmlSwitchEncoding(ctxt, enc);
7025 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7026 xmlFree(encoding);
7027 return(NULL);
7028 }
7029 } else {
7030 /*
7031 * fallback for unknown encodings
7032 */
7033 handler = xmlFindCharEncodingHandler((const char *) encoding);
7034 if (handler != NULL) {
7035 xmlSwitchToEncoding(ctxt, handler);
7036 } else {
7037 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7039 ctxt->sax->error(ctxt->userData,
7040 "Unsupported encoding %s\n", encoding);
7041 return(NULL);
7042 }
7043 }
7044 }
7045 }
7046 return(encoding);
7047}
7048
7049/**
7050 * xmlParseSDDecl:
7051 * @ctxt: an XML parser context
7052 *
7053 * parse the XML standalone declaration
7054 *
7055 * [32] SDDecl ::= S 'standalone' Eq
7056 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7057 *
7058 * [ VC: Standalone Document Declaration ]
7059 * TODO The standalone document declaration must have the value "no"
7060 * if any external markup declarations contain declarations of:
7061 * - attributes with default values, if elements to which these
7062 * attributes apply appear in the document without specifications
7063 * of values for these attributes, or
7064 * - entities (other than amp, lt, gt, apos, quot), if references
7065 * to those entities appear in the document, or
7066 * - attributes with values subject to normalization, where the
7067 * attribute appears in the document with a value which will change
7068 * as a result of normalization, or
7069 * - element types with element content, if white space occurs directly
7070 * within any instance of those types.
7071 *
7072 * Returns 1 if standalone, 0 otherwise
7073 */
7074
7075int
7076xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7077 int standalone = -1;
7078
7079 SKIP_BLANKS;
7080 if ((RAW == 's') && (NXT(1) == 't') &&
7081 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7082 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7083 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7084 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7085 SKIP(10);
7086 SKIP_BLANKS;
7087 if (RAW != '=') {
7088 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7089 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7090 ctxt->sax->error(ctxt->userData,
7091 "XML standalone declaration : expected '='\n");
7092 ctxt->wellFormed = 0;
7093 ctxt->disableSAX = 1;
7094 return(standalone);
7095 }
7096 NEXT;
7097 SKIP_BLANKS;
7098 if (RAW == '\''){
7099 NEXT;
7100 if ((RAW == 'n') && (NXT(1) == 'o')) {
7101 standalone = 0;
7102 SKIP(2);
7103 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7104 (NXT(2) == 's')) {
7105 standalone = 1;
7106 SKIP(3);
7107 } else {
7108 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7110 ctxt->sax->error(ctxt->userData,
7111 "standalone accepts only 'yes' or 'no'\n");
7112 ctxt->wellFormed = 0;
7113 ctxt->disableSAX = 1;
7114 }
7115 if (RAW != '\'') {
7116 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7118 ctxt->sax->error(ctxt->userData, "String not closed\n");
7119 ctxt->wellFormed = 0;
7120 ctxt->disableSAX = 1;
7121 } else
7122 NEXT;
7123 } else if (RAW == '"'){
7124 NEXT;
7125 if ((RAW == 'n') && (NXT(1) == 'o')) {
7126 standalone = 0;
7127 SKIP(2);
7128 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7129 (NXT(2) == 's')) {
7130 standalone = 1;
7131 SKIP(3);
7132 } else {
7133 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7135 ctxt->sax->error(ctxt->userData,
7136 "standalone accepts only 'yes' or 'no'\n");
7137 ctxt->wellFormed = 0;
7138 ctxt->disableSAX = 1;
7139 }
7140 if (RAW != '"') {
7141 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7143 ctxt->sax->error(ctxt->userData, "String not closed\n");
7144 ctxt->wellFormed = 0;
7145 ctxt->disableSAX = 1;
7146 } else
7147 NEXT;
7148 } else {
7149 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7150 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7151 ctxt->sax->error(ctxt->userData,
7152 "Standalone value not found\n");
7153 ctxt->wellFormed = 0;
7154 ctxt->disableSAX = 1;
7155 }
7156 }
7157 return(standalone);
7158}
7159
7160/**
7161 * xmlParseXMLDecl:
7162 * @ctxt: an XML parser context
7163 *
7164 * parse an XML declaration header
7165 *
7166 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7167 */
7168
7169void
7170xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7171 xmlChar *version;
7172
7173 /*
7174 * We know that '<?xml' is here.
7175 */
7176 SKIP(5);
7177
7178 if (!IS_BLANK(RAW)) {
7179 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7181 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7182 ctxt->wellFormed = 0;
7183 ctxt->disableSAX = 1;
7184 }
7185 SKIP_BLANKS;
7186
7187 /*
7188 * We should have the VersionInfo here.
7189 */
7190 version = xmlParseVersionInfo(ctxt);
7191 if (version == NULL)
7192 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7193 ctxt->version = xmlStrdup(version);
7194 xmlFree(version);
7195
7196 /*
7197 * We may have the encoding declaration
7198 */
7199 if (!IS_BLANK(RAW)) {
7200 if ((RAW == '?') && (NXT(1) == '>')) {
7201 SKIP(2);
7202 return;
7203 }
7204 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7206 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7207 ctxt->wellFormed = 0;
7208 ctxt->disableSAX = 1;
7209 }
7210 xmlParseEncodingDecl(ctxt);
7211 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7212 /*
7213 * The XML REC instructs us to stop parsing right here
7214 */
7215 return;
7216 }
7217
7218 /*
7219 * We may have the standalone status.
7220 */
7221 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7222 if ((RAW == '?') && (NXT(1) == '>')) {
7223 SKIP(2);
7224 return;
7225 }
7226 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7228 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7229 ctxt->wellFormed = 0;
7230 ctxt->disableSAX = 1;
7231 }
7232 SKIP_BLANKS;
7233 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7234
7235 SKIP_BLANKS;
7236 if ((RAW == '?') && (NXT(1) == '>')) {
7237 SKIP(2);
7238 } else if (RAW == '>') {
7239 /* Deprecated old WD ... */
7240 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7242 ctxt->sax->error(ctxt->userData,
7243 "XML declaration must end-up with '?>'\n");
7244 ctxt->wellFormed = 0;
7245 ctxt->disableSAX = 1;
7246 NEXT;
7247 } else {
7248 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7250 ctxt->sax->error(ctxt->userData,
7251 "parsing XML declaration: '?>' expected\n");
7252 ctxt->wellFormed = 0;
7253 ctxt->disableSAX = 1;
7254 MOVETO_ENDTAG(CUR_PTR);
7255 NEXT;
7256 }
7257}
7258
7259/**
7260 * xmlParseMisc:
7261 * @ctxt: an XML parser context
7262 *
7263 * parse an XML Misc* optionnal field.
7264 *
7265 * [27] Misc ::= Comment | PI | S
7266 */
7267
7268void
7269xmlParseMisc(xmlParserCtxtPtr ctxt) {
7270 while (((RAW == '<') && (NXT(1) == '?')) ||
7271 ((RAW == '<') && (NXT(1) == '!') &&
7272 (NXT(2) == '-') && (NXT(3) == '-')) ||
7273 IS_BLANK(CUR)) {
7274 if ((RAW == '<') && (NXT(1) == '?')) {
7275 xmlParsePI(ctxt);
7276 } else if (IS_BLANK(CUR)) {
7277 NEXT;
7278 } else
7279 xmlParseComment(ctxt);
7280 }
7281}
7282
7283/**
7284 * xmlParseDocument:
7285 * @ctxt: an XML parser context
7286 *
7287 * parse an XML document (and build a tree if using the standard SAX
7288 * interface).
7289 *
7290 * [1] document ::= prolog element Misc*
7291 *
7292 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7293 *
7294 * Returns 0, -1 in case of error. the parser context is augmented
7295 * as a result of the parsing.
7296 */
7297
7298int
7299xmlParseDocument(xmlParserCtxtPtr ctxt) {
7300 xmlChar start[4];
7301 xmlCharEncoding enc;
7302
7303 xmlInitParser();
7304
7305 GROW;
7306
7307 /*
7308 * SAX: beginning of the document processing.
7309 */
7310 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7311 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7312
7313 /*
7314 * Get the 4 first bytes and decode the charset
7315 * if enc != XML_CHAR_ENCODING_NONE
7316 * plug some encoding conversion routines.
7317 */
7318 start[0] = RAW;
7319 start[1] = NXT(1);
7320 start[2] = NXT(2);
7321 start[3] = NXT(3);
7322 enc = xmlDetectCharEncoding(start, 4);
7323 if (enc != XML_CHAR_ENCODING_NONE) {
7324 xmlSwitchEncoding(ctxt, enc);
7325 }
7326
7327
7328 if (CUR == 0) {
7329 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7331 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7332 ctxt->wellFormed = 0;
7333 ctxt->disableSAX = 1;
7334 }
7335
7336 /*
7337 * Check for the XMLDecl in the Prolog.
7338 */
7339 GROW;
7340 if ((RAW == '<') && (NXT(1) == '?') &&
7341 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7342 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7343
7344 /*
7345 * Note that we will switch encoding on the fly.
7346 */
7347 xmlParseXMLDecl(ctxt);
7348 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7349 /*
7350 * The XML REC instructs us to stop parsing right here
7351 */
7352 return(-1);
7353 }
7354 ctxt->standalone = ctxt->input->standalone;
7355 SKIP_BLANKS;
7356 } else {
7357 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7358 }
7359 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7360 ctxt->sax->startDocument(ctxt->userData);
7361
7362 /*
7363 * The Misc part of the Prolog
7364 */
7365 GROW;
7366 xmlParseMisc(ctxt);
7367
7368 /*
7369 * Then possibly doc type declaration(s) and more Misc
7370 * (doctypedecl Misc*)?
7371 */
7372 GROW;
7373 if ((RAW == '<') && (NXT(1) == '!') &&
7374 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7375 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7376 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7377 (NXT(8) == 'E')) {
7378
7379 ctxt->inSubset = 1;
7380 xmlParseDocTypeDecl(ctxt);
7381 if (RAW == '[') {
7382 ctxt->instate = XML_PARSER_DTD;
7383 xmlParseInternalSubset(ctxt);
7384 }
7385
7386 /*
7387 * Create and update the external subset.
7388 */
7389 ctxt->inSubset = 2;
7390 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7391 (!ctxt->disableSAX))
7392 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7393 ctxt->extSubSystem, ctxt->extSubURI);
7394 ctxt->inSubset = 0;
7395
7396
7397 ctxt->instate = XML_PARSER_PROLOG;
7398 xmlParseMisc(ctxt);
7399 }
7400
7401 /*
7402 * Time to start parsing the tree itself
7403 */
7404 GROW;
7405 if (RAW != '<') {
7406 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7408 ctxt->sax->error(ctxt->userData,
7409 "Start tag expected, '<' not found\n");
7410 ctxt->wellFormed = 0;
7411 ctxt->disableSAX = 1;
7412 ctxt->instate = XML_PARSER_EOF;
7413 } else {
7414 ctxt->instate = XML_PARSER_CONTENT;
7415 xmlParseElement(ctxt);
7416 ctxt->instate = XML_PARSER_EPILOG;
7417
7418
7419 /*
7420 * The Misc part at the end
7421 */
7422 xmlParseMisc(ctxt);
7423
7424 if (RAW != 0) {
7425 ctxt->errNo = XML_ERR_DOCUMENT_END;
7426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7427 ctxt->sax->error(ctxt->userData,
7428 "Extra content at the end of the document\n");
7429 ctxt->wellFormed = 0;
7430 ctxt->disableSAX = 1;
7431 }
7432 ctxt->instate = XML_PARSER_EOF;
7433 }
7434
7435 /*
7436 * SAX: end of the document processing.
7437 */
7438 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7439 (!ctxt->disableSAX))
7440 ctxt->sax->endDocument(ctxt->userData);
7441
7442 if (! ctxt->wellFormed) return(-1);
7443 return(0);
7444}
7445
7446/**
7447 * xmlParseExtParsedEnt:
7448 * @ctxt: an XML parser context
7449 *
7450 * parse a genreral parsed entity
7451 * An external general parsed entity is well-formed if it matches the
7452 * production labeled extParsedEnt.
7453 *
7454 * [78] extParsedEnt ::= TextDecl? content
7455 *
7456 * Returns 0, -1 in case of error. the parser context is augmented
7457 * as a result of the parsing.
7458 */
7459
7460int
7461xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7462 xmlChar start[4];
7463 xmlCharEncoding enc;
7464
7465 xmlDefaultSAXHandlerInit();
7466
7467 GROW;
7468
7469 /*
7470 * SAX: beginning of the document processing.
7471 */
7472 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7473 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7474
7475 /*
7476 * Get the 4 first bytes and decode the charset
7477 * if enc != XML_CHAR_ENCODING_NONE
7478 * plug some encoding conversion routines.
7479 */
7480 start[0] = RAW;
7481 start[1] = NXT(1);
7482 start[2] = NXT(2);
7483 start[3] = NXT(3);
7484 enc = xmlDetectCharEncoding(start, 4);
7485 if (enc != XML_CHAR_ENCODING_NONE) {
7486 xmlSwitchEncoding(ctxt, enc);
7487 }
7488
7489
7490 if (CUR == 0) {
7491 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7493 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7494 ctxt->wellFormed = 0;
7495 ctxt->disableSAX = 1;
7496 }
7497
7498 /*
7499 * Check for the XMLDecl in the Prolog.
7500 */
7501 GROW;
7502 if ((RAW == '<') && (NXT(1) == '?') &&
7503 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7504 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7505
7506 /*
7507 * Note that we will switch encoding on the fly.
7508 */
7509 xmlParseXMLDecl(ctxt);
7510 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7511 /*
7512 * The XML REC instructs us to stop parsing right here
7513 */
7514 return(-1);
7515 }
7516 SKIP_BLANKS;
7517 } else {
7518 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7519 }
7520 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7521 ctxt->sax->startDocument(ctxt->userData);
7522
7523 /*
7524 * Doing validity checking on chunk doesn't make sense
7525 */
7526 ctxt->instate = XML_PARSER_CONTENT;
7527 ctxt->validate = 0;
7528 ctxt->loadsubset = 0;
7529 ctxt->depth = 0;
7530
7531 xmlParseContent(ctxt);
7532
7533 if ((RAW == '<') && (NXT(1) == '/')) {
7534 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7536 ctxt->sax->error(ctxt->userData,
7537 "chunk is not well balanced\n");
7538 ctxt->wellFormed = 0;
7539 ctxt->disableSAX = 1;
7540 } else if (RAW != 0) {
7541 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7542 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7543 ctxt->sax->error(ctxt->userData,
7544 "extra content at the end of well balanced chunk\n");
7545 ctxt->wellFormed = 0;
7546 ctxt->disableSAX = 1;
7547 }
7548
7549 /*
7550 * SAX: end of the document processing.
7551 */
7552 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7553 (!ctxt->disableSAX))
7554 ctxt->sax->endDocument(ctxt->userData);
7555
7556 if (! ctxt->wellFormed) return(-1);
7557 return(0);
7558}
7559
7560/************************************************************************
7561 * *
7562 * Progressive parsing interfaces *
7563 * *
7564 ************************************************************************/
7565
7566/**
7567 * xmlParseLookupSequence:
7568 * @ctxt: an XML parser context
7569 * @first: the first char to lookup
7570 * @next: the next char to lookup or zero
7571 * @third: the next char to lookup or zero
7572 *
7573 * Try to find if a sequence (first, next, third) or just (first next) or
7574 * (first) is available in the input stream.
7575 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7576 * to avoid rescanning sequences of bytes, it DOES change the state of the
7577 * parser, do not use liberally.
7578 *
7579 * Returns the index to the current parsing point if the full sequence
7580 * is available, -1 otherwise.
7581 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007582static int
Owen Taylor3473f882001-02-23 17:55:21 +00007583xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7584 xmlChar next, xmlChar third) {
7585 int base, len;
7586 xmlParserInputPtr in;
7587 const xmlChar *buf;
7588
7589 in = ctxt->input;
7590 if (in == NULL) return(-1);
7591 base = in->cur - in->base;
7592 if (base < 0) return(-1);
7593 if (ctxt->checkIndex > base)
7594 base = ctxt->checkIndex;
7595 if (in->buf == NULL) {
7596 buf = in->base;
7597 len = in->length;
7598 } else {
7599 buf = in->buf->buffer->content;
7600 len = in->buf->buffer->use;
7601 }
7602 /* take into account the sequence length */
7603 if (third) len -= 2;
7604 else if (next) len --;
7605 for (;base < len;base++) {
7606 if (buf[base] == first) {
7607 if (third != 0) {
7608 if ((buf[base + 1] != next) ||
7609 (buf[base + 2] != third)) continue;
7610 } else if (next != 0) {
7611 if (buf[base + 1] != next) continue;
7612 }
7613 ctxt->checkIndex = 0;
7614#ifdef DEBUG_PUSH
7615 if (next == 0)
7616 xmlGenericError(xmlGenericErrorContext,
7617 "PP: lookup '%c' found at %d\n",
7618 first, base);
7619 else if (third == 0)
7620 xmlGenericError(xmlGenericErrorContext,
7621 "PP: lookup '%c%c' found at %d\n",
7622 first, next, base);
7623 else
7624 xmlGenericError(xmlGenericErrorContext,
7625 "PP: lookup '%c%c%c' found at %d\n",
7626 first, next, third, base);
7627#endif
7628 return(base - (in->cur - in->base));
7629 }
7630 }
7631 ctxt->checkIndex = base;
7632#ifdef DEBUG_PUSH
7633 if (next == 0)
7634 xmlGenericError(xmlGenericErrorContext,
7635 "PP: lookup '%c' failed\n", first);
7636 else if (third == 0)
7637 xmlGenericError(xmlGenericErrorContext,
7638 "PP: lookup '%c%c' failed\n", first, next);
7639 else
7640 xmlGenericError(xmlGenericErrorContext,
7641 "PP: lookup '%c%c%c' failed\n", first, next, third);
7642#endif
7643 return(-1);
7644}
7645
7646/**
7647 * xmlParseTryOrFinish:
7648 * @ctxt: an XML parser context
7649 * @terminate: last chunk indicator
7650 *
7651 * Try to progress on parsing
7652 *
7653 * Returns zero if no parsing was possible
7654 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007655static int
Owen Taylor3473f882001-02-23 17:55:21 +00007656xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7657 int ret = 0;
7658 int avail;
7659 xmlChar cur, next;
7660
7661#ifdef DEBUG_PUSH
7662 switch (ctxt->instate) {
7663 case XML_PARSER_EOF:
7664 xmlGenericError(xmlGenericErrorContext,
7665 "PP: try EOF\n"); break;
7666 case XML_PARSER_START:
7667 xmlGenericError(xmlGenericErrorContext,
7668 "PP: try START\n"); break;
7669 case XML_PARSER_MISC:
7670 xmlGenericError(xmlGenericErrorContext,
7671 "PP: try MISC\n");break;
7672 case XML_PARSER_COMMENT:
7673 xmlGenericError(xmlGenericErrorContext,
7674 "PP: try COMMENT\n");break;
7675 case XML_PARSER_PROLOG:
7676 xmlGenericError(xmlGenericErrorContext,
7677 "PP: try PROLOG\n");break;
7678 case XML_PARSER_START_TAG:
7679 xmlGenericError(xmlGenericErrorContext,
7680 "PP: try START_TAG\n");break;
7681 case XML_PARSER_CONTENT:
7682 xmlGenericError(xmlGenericErrorContext,
7683 "PP: try CONTENT\n");break;
7684 case XML_PARSER_CDATA_SECTION:
7685 xmlGenericError(xmlGenericErrorContext,
7686 "PP: try CDATA_SECTION\n");break;
7687 case XML_PARSER_END_TAG:
7688 xmlGenericError(xmlGenericErrorContext,
7689 "PP: try END_TAG\n");break;
7690 case XML_PARSER_ENTITY_DECL:
7691 xmlGenericError(xmlGenericErrorContext,
7692 "PP: try ENTITY_DECL\n");break;
7693 case XML_PARSER_ENTITY_VALUE:
7694 xmlGenericError(xmlGenericErrorContext,
7695 "PP: try ENTITY_VALUE\n");break;
7696 case XML_PARSER_ATTRIBUTE_VALUE:
7697 xmlGenericError(xmlGenericErrorContext,
7698 "PP: try ATTRIBUTE_VALUE\n");break;
7699 case XML_PARSER_DTD:
7700 xmlGenericError(xmlGenericErrorContext,
7701 "PP: try DTD\n");break;
7702 case XML_PARSER_EPILOG:
7703 xmlGenericError(xmlGenericErrorContext,
7704 "PP: try EPILOG\n");break;
7705 case XML_PARSER_PI:
7706 xmlGenericError(xmlGenericErrorContext,
7707 "PP: try PI\n");break;
7708 case XML_PARSER_IGNORE:
7709 xmlGenericError(xmlGenericErrorContext,
7710 "PP: try IGNORE\n");break;
7711 }
7712#endif
7713
7714 while (1) {
7715 /*
7716 * Pop-up of finished entities.
7717 */
7718 while ((RAW == 0) && (ctxt->inputNr > 1))
7719 xmlPopInput(ctxt);
7720
7721 if (ctxt->input ==NULL) break;
7722 if (ctxt->input->buf == NULL)
7723 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7724 else
7725 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7726 if (avail < 1)
7727 goto done;
7728 switch (ctxt->instate) {
7729 case XML_PARSER_EOF:
7730 /*
7731 * Document parsing is done !
7732 */
7733 goto done;
7734 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007735 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7736 xmlChar start[4];
7737 xmlCharEncoding enc;
7738
7739 /*
7740 * Very first chars read from the document flow.
7741 */
7742 if (avail < 4)
7743 goto done;
7744
7745 /*
7746 * Get the 4 first bytes and decode the charset
7747 * if enc != XML_CHAR_ENCODING_NONE
7748 * plug some encoding conversion routines.
7749 */
7750 start[0] = RAW;
7751 start[1] = NXT(1);
7752 start[2] = NXT(2);
7753 start[3] = NXT(3);
7754 enc = xmlDetectCharEncoding(start, 4);
7755 if (enc != XML_CHAR_ENCODING_NONE) {
7756 xmlSwitchEncoding(ctxt, enc);
7757 }
7758 break;
7759 }
Owen Taylor3473f882001-02-23 17:55:21 +00007760
7761 cur = ctxt->input->cur[0];
7762 next = ctxt->input->cur[1];
7763 if (cur == 0) {
7764 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7765 ctxt->sax->setDocumentLocator(ctxt->userData,
7766 &xmlDefaultSAXLocator);
7767 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7769 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7770 ctxt->wellFormed = 0;
7771 ctxt->disableSAX = 1;
7772 ctxt->instate = XML_PARSER_EOF;
7773#ifdef DEBUG_PUSH
7774 xmlGenericError(xmlGenericErrorContext,
7775 "PP: entering EOF\n");
7776#endif
7777 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7778 ctxt->sax->endDocument(ctxt->userData);
7779 goto done;
7780 }
7781 if ((cur == '<') && (next == '?')) {
7782 /* PI or XML decl */
7783 if (avail < 5) return(ret);
7784 if ((!terminate) &&
7785 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7786 return(ret);
7787 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7788 ctxt->sax->setDocumentLocator(ctxt->userData,
7789 &xmlDefaultSAXLocator);
7790 if ((ctxt->input->cur[2] == 'x') &&
7791 (ctxt->input->cur[3] == 'm') &&
7792 (ctxt->input->cur[4] == 'l') &&
7793 (IS_BLANK(ctxt->input->cur[5]))) {
7794 ret += 5;
7795#ifdef DEBUG_PUSH
7796 xmlGenericError(xmlGenericErrorContext,
7797 "PP: Parsing XML Decl\n");
7798#endif
7799 xmlParseXMLDecl(ctxt);
7800 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7801 /*
7802 * The XML REC instructs us to stop parsing right
7803 * here
7804 */
7805 ctxt->instate = XML_PARSER_EOF;
7806 return(0);
7807 }
7808 ctxt->standalone = ctxt->input->standalone;
7809 if ((ctxt->encoding == NULL) &&
7810 (ctxt->input->encoding != NULL))
7811 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7812 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7813 (!ctxt->disableSAX))
7814 ctxt->sax->startDocument(ctxt->userData);
7815 ctxt->instate = XML_PARSER_MISC;
7816#ifdef DEBUG_PUSH
7817 xmlGenericError(xmlGenericErrorContext,
7818 "PP: entering MISC\n");
7819#endif
7820 } else {
7821 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7822 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7823 (!ctxt->disableSAX))
7824 ctxt->sax->startDocument(ctxt->userData);
7825 ctxt->instate = XML_PARSER_MISC;
7826#ifdef DEBUG_PUSH
7827 xmlGenericError(xmlGenericErrorContext,
7828 "PP: entering MISC\n");
7829#endif
7830 }
7831 } else {
7832 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7833 ctxt->sax->setDocumentLocator(ctxt->userData,
7834 &xmlDefaultSAXLocator);
7835 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7836 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7837 (!ctxt->disableSAX))
7838 ctxt->sax->startDocument(ctxt->userData);
7839 ctxt->instate = XML_PARSER_MISC;
7840#ifdef DEBUG_PUSH
7841 xmlGenericError(xmlGenericErrorContext,
7842 "PP: entering MISC\n");
7843#endif
7844 }
7845 break;
7846 case XML_PARSER_MISC:
7847 SKIP_BLANKS;
7848 if (ctxt->input->buf == NULL)
7849 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7850 else
7851 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7852 if (avail < 2)
7853 goto done;
7854 cur = ctxt->input->cur[0];
7855 next = ctxt->input->cur[1];
7856 if ((cur == '<') && (next == '?')) {
7857 if ((!terminate) &&
7858 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7859 goto done;
7860#ifdef DEBUG_PUSH
7861 xmlGenericError(xmlGenericErrorContext,
7862 "PP: Parsing PI\n");
7863#endif
7864 xmlParsePI(ctxt);
7865 } else if ((cur == '<') && (next == '!') &&
7866 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7867 if ((!terminate) &&
7868 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7869 goto done;
7870#ifdef DEBUG_PUSH
7871 xmlGenericError(xmlGenericErrorContext,
7872 "PP: Parsing Comment\n");
7873#endif
7874 xmlParseComment(ctxt);
7875 ctxt->instate = XML_PARSER_MISC;
7876 } else if ((cur == '<') && (next == '!') &&
7877 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7878 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7879 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7880 (ctxt->input->cur[8] == 'E')) {
7881 if ((!terminate) &&
7882 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7883 goto done;
7884#ifdef DEBUG_PUSH
7885 xmlGenericError(xmlGenericErrorContext,
7886 "PP: Parsing internal subset\n");
7887#endif
7888 ctxt->inSubset = 1;
7889 xmlParseDocTypeDecl(ctxt);
7890 if (RAW == '[') {
7891 ctxt->instate = XML_PARSER_DTD;
7892#ifdef DEBUG_PUSH
7893 xmlGenericError(xmlGenericErrorContext,
7894 "PP: entering DTD\n");
7895#endif
7896 } else {
7897 /*
7898 * Create and update the external subset.
7899 */
7900 ctxt->inSubset = 2;
7901 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7902 (ctxt->sax->externalSubset != NULL))
7903 ctxt->sax->externalSubset(ctxt->userData,
7904 ctxt->intSubName, ctxt->extSubSystem,
7905 ctxt->extSubURI);
7906 ctxt->inSubset = 0;
7907 ctxt->instate = XML_PARSER_PROLOG;
7908#ifdef DEBUG_PUSH
7909 xmlGenericError(xmlGenericErrorContext,
7910 "PP: entering PROLOG\n");
7911#endif
7912 }
7913 } else if ((cur == '<') && (next == '!') &&
7914 (avail < 9)) {
7915 goto done;
7916 } else {
7917 ctxt->instate = XML_PARSER_START_TAG;
7918#ifdef DEBUG_PUSH
7919 xmlGenericError(xmlGenericErrorContext,
7920 "PP: entering START_TAG\n");
7921#endif
7922 }
7923 break;
7924 case XML_PARSER_IGNORE:
7925 xmlGenericError(xmlGenericErrorContext,
7926 "PP: internal error, state == IGNORE");
7927 ctxt->instate = XML_PARSER_DTD;
7928#ifdef DEBUG_PUSH
7929 xmlGenericError(xmlGenericErrorContext,
7930 "PP: entering DTD\n");
7931#endif
7932 break;
7933 case XML_PARSER_PROLOG:
7934 SKIP_BLANKS;
7935 if (ctxt->input->buf == NULL)
7936 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7937 else
7938 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7939 if (avail < 2)
7940 goto done;
7941 cur = ctxt->input->cur[0];
7942 next = ctxt->input->cur[1];
7943 if ((cur == '<') && (next == '?')) {
7944 if ((!terminate) &&
7945 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7946 goto done;
7947#ifdef DEBUG_PUSH
7948 xmlGenericError(xmlGenericErrorContext,
7949 "PP: Parsing PI\n");
7950#endif
7951 xmlParsePI(ctxt);
7952 } else if ((cur == '<') && (next == '!') &&
7953 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7954 if ((!terminate) &&
7955 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7956 goto done;
7957#ifdef DEBUG_PUSH
7958 xmlGenericError(xmlGenericErrorContext,
7959 "PP: Parsing Comment\n");
7960#endif
7961 xmlParseComment(ctxt);
7962 ctxt->instate = XML_PARSER_PROLOG;
7963 } else if ((cur == '<') && (next == '!') &&
7964 (avail < 4)) {
7965 goto done;
7966 } else {
7967 ctxt->instate = XML_PARSER_START_TAG;
7968#ifdef DEBUG_PUSH
7969 xmlGenericError(xmlGenericErrorContext,
7970 "PP: entering START_TAG\n");
7971#endif
7972 }
7973 break;
7974 case XML_PARSER_EPILOG:
7975 SKIP_BLANKS;
7976 if (ctxt->input->buf == NULL)
7977 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7978 else
7979 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7980 if (avail < 2)
7981 goto done;
7982 cur = ctxt->input->cur[0];
7983 next = ctxt->input->cur[1];
7984 if ((cur == '<') && (next == '?')) {
7985 if ((!terminate) &&
7986 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7987 goto done;
7988#ifdef DEBUG_PUSH
7989 xmlGenericError(xmlGenericErrorContext,
7990 "PP: Parsing PI\n");
7991#endif
7992 xmlParsePI(ctxt);
7993 ctxt->instate = XML_PARSER_EPILOG;
7994 } else if ((cur == '<') && (next == '!') &&
7995 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7996 if ((!terminate) &&
7997 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7998 goto done;
7999#ifdef DEBUG_PUSH
8000 xmlGenericError(xmlGenericErrorContext,
8001 "PP: Parsing Comment\n");
8002#endif
8003 xmlParseComment(ctxt);
8004 ctxt->instate = XML_PARSER_EPILOG;
8005 } else if ((cur == '<') && (next == '!') &&
8006 (avail < 4)) {
8007 goto done;
8008 } else {
8009 ctxt->errNo = XML_ERR_DOCUMENT_END;
8010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8011 ctxt->sax->error(ctxt->userData,
8012 "Extra content at the end of the document\n");
8013 ctxt->wellFormed = 0;
8014 ctxt->disableSAX = 1;
8015 ctxt->instate = XML_PARSER_EOF;
8016#ifdef DEBUG_PUSH
8017 xmlGenericError(xmlGenericErrorContext,
8018 "PP: entering EOF\n");
8019#endif
8020 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8021 (!ctxt->disableSAX))
8022 ctxt->sax->endDocument(ctxt->userData);
8023 goto done;
8024 }
8025 break;
8026 case XML_PARSER_START_TAG: {
8027 xmlChar *name, *oldname;
8028
8029 if ((avail < 2) && (ctxt->inputNr == 1))
8030 goto done;
8031 cur = ctxt->input->cur[0];
8032 if (cur != '<') {
8033 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8035 ctxt->sax->error(ctxt->userData,
8036 "Start tag expect, '<' not found\n");
8037 ctxt->wellFormed = 0;
8038 ctxt->disableSAX = 1;
8039 ctxt->instate = XML_PARSER_EOF;
8040#ifdef DEBUG_PUSH
8041 xmlGenericError(xmlGenericErrorContext,
8042 "PP: entering EOF\n");
8043#endif
8044 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8045 (!ctxt->disableSAX))
8046 ctxt->sax->endDocument(ctxt->userData);
8047 goto done;
8048 }
8049 if ((!terminate) &&
8050 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8051 goto done;
8052 if (ctxt->spaceNr == 0)
8053 spacePush(ctxt, -1);
8054 else
8055 spacePush(ctxt, *ctxt->space);
8056 name = xmlParseStartTag(ctxt);
8057 if (name == NULL) {
8058 spacePop(ctxt);
8059 ctxt->instate = XML_PARSER_EOF;
8060#ifdef DEBUG_PUSH
8061 xmlGenericError(xmlGenericErrorContext,
8062 "PP: entering EOF\n");
8063#endif
8064 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8065 (!ctxt->disableSAX))
8066 ctxt->sax->endDocument(ctxt->userData);
8067 goto done;
8068 }
8069 namePush(ctxt, xmlStrdup(name));
8070
8071 /*
8072 * [ VC: Root Element Type ]
8073 * The Name in the document type declaration must match
8074 * the element type of the root element.
8075 */
8076 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8077 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8078 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8079
8080 /*
8081 * Check for an Empty Element.
8082 */
8083 if ((RAW == '/') && (NXT(1) == '>')) {
8084 SKIP(2);
8085 if ((ctxt->sax != NULL) &&
8086 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8087 ctxt->sax->endElement(ctxt->userData, name);
8088 xmlFree(name);
8089 oldname = namePop(ctxt);
8090 spacePop(ctxt);
8091 if (oldname != NULL) {
8092#ifdef DEBUG_STACK
8093 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8094#endif
8095 xmlFree(oldname);
8096 }
8097 if (ctxt->name == NULL) {
8098 ctxt->instate = XML_PARSER_EPILOG;
8099#ifdef DEBUG_PUSH
8100 xmlGenericError(xmlGenericErrorContext,
8101 "PP: entering EPILOG\n");
8102#endif
8103 } else {
8104 ctxt->instate = XML_PARSER_CONTENT;
8105#ifdef DEBUG_PUSH
8106 xmlGenericError(xmlGenericErrorContext,
8107 "PP: entering CONTENT\n");
8108#endif
8109 }
8110 break;
8111 }
8112 if (RAW == '>') {
8113 NEXT;
8114 } else {
8115 ctxt->errNo = XML_ERR_GT_REQUIRED;
8116 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8117 ctxt->sax->error(ctxt->userData,
8118 "Couldn't find end of Start Tag %s\n",
8119 name);
8120 ctxt->wellFormed = 0;
8121 ctxt->disableSAX = 1;
8122
8123 /*
8124 * end of parsing of this node.
8125 */
8126 nodePop(ctxt);
8127 oldname = namePop(ctxt);
8128 spacePop(ctxt);
8129 if (oldname != NULL) {
8130#ifdef DEBUG_STACK
8131 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8132#endif
8133 xmlFree(oldname);
8134 }
8135 }
8136 xmlFree(name);
8137 ctxt->instate = XML_PARSER_CONTENT;
8138#ifdef DEBUG_PUSH
8139 xmlGenericError(xmlGenericErrorContext,
8140 "PP: entering CONTENT\n");
8141#endif
8142 break;
8143 }
8144 case XML_PARSER_CONTENT: {
8145 const xmlChar *test;
8146 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008147 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008148
8149 /*
8150 * Handle preparsed entities and charRef
8151 */
8152 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008153 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008154
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008155 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008156 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8157 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008158 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008159 ctxt->token = 0;
8160 }
8161 if ((avail < 2) && (ctxt->inputNr == 1))
8162 goto done;
8163 cur = ctxt->input->cur[0];
8164 next = ctxt->input->cur[1];
8165
8166 test = CUR_PTR;
8167 cons = ctxt->input->consumed;
8168 tok = ctxt->token;
8169 if ((cur == '<') && (next == '?')) {
8170 if ((!terminate) &&
8171 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8172 goto done;
8173#ifdef DEBUG_PUSH
8174 xmlGenericError(xmlGenericErrorContext,
8175 "PP: Parsing PI\n");
8176#endif
8177 xmlParsePI(ctxt);
8178 } else if ((cur == '<') && (next == '!') &&
8179 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8180 if ((!terminate) &&
8181 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8182 goto done;
8183#ifdef DEBUG_PUSH
8184 xmlGenericError(xmlGenericErrorContext,
8185 "PP: Parsing Comment\n");
8186#endif
8187 xmlParseComment(ctxt);
8188 ctxt->instate = XML_PARSER_CONTENT;
8189 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8190 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8191 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8192 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8193 (ctxt->input->cur[8] == '[')) {
8194 SKIP(9);
8195 ctxt->instate = XML_PARSER_CDATA_SECTION;
8196#ifdef DEBUG_PUSH
8197 xmlGenericError(xmlGenericErrorContext,
8198 "PP: entering CDATA_SECTION\n");
8199#endif
8200 break;
8201 } else if ((cur == '<') && (next == '!') &&
8202 (avail < 9)) {
8203 goto done;
8204 } else if ((cur == '<') && (next == '/')) {
8205 ctxt->instate = XML_PARSER_END_TAG;
8206#ifdef DEBUG_PUSH
8207 xmlGenericError(xmlGenericErrorContext,
8208 "PP: entering END_TAG\n");
8209#endif
8210 break;
8211 } else if (cur == '<') {
8212 ctxt->instate = XML_PARSER_START_TAG;
8213#ifdef DEBUG_PUSH
8214 xmlGenericError(xmlGenericErrorContext,
8215 "PP: entering START_TAG\n");
8216#endif
8217 break;
8218 } else if (cur == '&') {
8219 if ((!terminate) &&
8220 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8221 goto done;
8222#ifdef DEBUG_PUSH
8223 xmlGenericError(xmlGenericErrorContext,
8224 "PP: Parsing Reference\n");
8225#endif
8226 xmlParseReference(ctxt);
8227 } else {
8228 /* TODO Avoid the extra copy, handle directly !!! */
8229 /*
8230 * Goal of the following test is:
8231 * - minimize calls to the SAX 'character' callback
8232 * when they are mergeable
8233 * - handle an problem for isBlank when we only parse
8234 * a sequence of blank chars and the next one is
8235 * not available to check against '<' presence.
8236 * - tries to homogenize the differences in SAX
8237 * callbacks beween the push and pull versions
8238 * of the parser.
8239 */
8240 if ((ctxt->inputNr == 1) &&
8241 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8242 if ((!terminate) &&
8243 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8244 goto done;
8245 }
8246 ctxt->checkIndex = 0;
8247#ifdef DEBUG_PUSH
8248 xmlGenericError(xmlGenericErrorContext,
8249 "PP: Parsing char data\n");
8250#endif
8251 xmlParseCharData(ctxt, 0);
8252 }
8253 /*
8254 * Pop-up of finished entities.
8255 */
8256 while ((RAW == 0) && (ctxt->inputNr > 1))
8257 xmlPopInput(ctxt);
8258 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8259 (tok == ctxt->token)) {
8260 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8262 ctxt->sax->error(ctxt->userData,
8263 "detected an error in element content\n");
8264 ctxt->wellFormed = 0;
8265 ctxt->disableSAX = 1;
8266 ctxt->instate = XML_PARSER_EOF;
8267 break;
8268 }
8269 break;
8270 }
8271 case XML_PARSER_CDATA_SECTION: {
8272 /*
8273 * The Push mode need to have the SAX callback for
8274 * cdataBlock merge back contiguous callbacks.
8275 */
8276 int base;
8277
8278 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8279 if (base < 0) {
8280 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8281 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8282 if (ctxt->sax->cdataBlock != NULL)
8283 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8284 XML_PARSER_BIG_BUFFER_SIZE);
8285 }
8286 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8287 ctxt->checkIndex = 0;
8288 }
8289 goto done;
8290 } else {
8291 if ((ctxt->sax != NULL) && (base > 0) &&
8292 (!ctxt->disableSAX)) {
8293 if (ctxt->sax->cdataBlock != NULL)
8294 ctxt->sax->cdataBlock(ctxt->userData,
8295 ctxt->input->cur, base);
8296 }
8297 SKIP(base + 3);
8298 ctxt->checkIndex = 0;
8299 ctxt->instate = XML_PARSER_CONTENT;
8300#ifdef DEBUG_PUSH
8301 xmlGenericError(xmlGenericErrorContext,
8302 "PP: entering CONTENT\n");
8303#endif
8304 }
8305 break;
8306 }
8307 case XML_PARSER_END_TAG:
8308 if (avail < 2)
8309 goto done;
8310 if ((!terminate) &&
8311 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8312 goto done;
8313 xmlParseEndTag(ctxt);
8314 if (ctxt->name == NULL) {
8315 ctxt->instate = XML_PARSER_EPILOG;
8316#ifdef DEBUG_PUSH
8317 xmlGenericError(xmlGenericErrorContext,
8318 "PP: entering EPILOG\n");
8319#endif
8320 } else {
8321 ctxt->instate = XML_PARSER_CONTENT;
8322#ifdef DEBUG_PUSH
8323 xmlGenericError(xmlGenericErrorContext,
8324 "PP: entering CONTENT\n");
8325#endif
8326 }
8327 break;
8328 case XML_PARSER_DTD: {
8329 /*
8330 * Sorry but progressive parsing of the internal subset
8331 * is not expected to be supported. We first check that
8332 * the full content of the internal subset is available and
8333 * the parsing is launched only at that point.
8334 * Internal subset ends up with "']' S? '>'" in an unescaped
8335 * section and not in a ']]>' sequence which are conditional
8336 * sections (whoever argued to keep that crap in XML deserve
8337 * a place in hell !).
8338 */
8339 int base, i;
8340 xmlChar *buf;
8341 xmlChar quote = 0;
8342
8343 base = ctxt->input->cur - ctxt->input->base;
8344 if (base < 0) return(0);
8345 if (ctxt->checkIndex > base)
8346 base = ctxt->checkIndex;
8347 buf = ctxt->input->buf->buffer->content;
8348 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8349 base++) {
8350 if (quote != 0) {
8351 if (buf[base] == quote)
8352 quote = 0;
8353 continue;
8354 }
8355 if (buf[base] == '"') {
8356 quote = '"';
8357 continue;
8358 }
8359 if (buf[base] == '\'') {
8360 quote = '\'';
8361 continue;
8362 }
8363 if (buf[base] == ']') {
8364 if ((unsigned int) base +1 >=
8365 ctxt->input->buf->buffer->use)
8366 break;
8367 if (buf[base + 1] == ']') {
8368 /* conditional crap, skip both ']' ! */
8369 base++;
8370 continue;
8371 }
8372 for (i = 0;
8373 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8374 i++) {
8375 if (buf[base + i] == '>')
8376 goto found_end_int_subset;
8377 }
8378 break;
8379 }
8380 }
8381 /*
8382 * We didn't found the end of the Internal subset
8383 */
8384 if (quote == 0)
8385 ctxt->checkIndex = base;
8386#ifdef DEBUG_PUSH
8387 if (next == 0)
8388 xmlGenericError(xmlGenericErrorContext,
8389 "PP: lookup of int subset end filed\n");
8390#endif
8391 goto done;
8392
8393found_end_int_subset:
8394 xmlParseInternalSubset(ctxt);
8395 ctxt->inSubset = 2;
8396 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8397 (ctxt->sax->externalSubset != NULL))
8398 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8399 ctxt->extSubSystem, ctxt->extSubURI);
8400 ctxt->inSubset = 0;
8401 ctxt->instate = XML_PARSER_PROLOG;
8402 ctxt->checkIndex = 0;
8403#ifdef DEBUG_PUSH
8404 xmlGenericError(xmlGenericErrorContext,
8405 "PP: entering PROLOG\n");
8406#endif
8407 break;
8408 }
8409 case XML_PARSER_COMMENT:
8410 xmlGenericError(xmlGenericErrorContext,
8411 "PP: internal error, state == COMMENT\n");
8412 ctxt->instate = XML_PARSER_CONTENT;
8413#ifdef DEBUG_PUSH
8414 xmlGenericError(xmlGenericErrorContext,
8415 "PP: entering CONTENT\n");
8416#endif
8417 break;
8418 case XML_PARSER_PI:
8419 xmlGenericError(xmlGenericErrorContext,
8420 "PP: internal error, state == PI\n");
8421 ctxt->instate = XML_PARSER_CONTENT;
8422#ifdef DEBUG_PUSH
8423 xmlGenericError(xmlGenericErrorContext,
8424 "PP: entering CONTENT\n");
8425#endif
8426 break;
8427 case XML_PARSER_ENTITY_DECL:
8428 xmlGenericError(xmlGenericErrorContext,
8429 "PP: internal error, state == ENTITY_DECL\n");
8430 ctxt->instate = XML_PARSER_DTD;
8431#ifdef DEBUG_PUSH
8432 xmlGenericError(xmlGenericErrorContext,
8433 "PP: entering DTD\n");
8434#endif
8435 break;
8436 case XML_PARSER_ENTITY_VALUE:
8437 xmlGenericError(xmlGenericErrorContext,
8438 "PP: internal error, state == ENTITY_VALUE\n");
8439 ctxt->instate = XML_PARSER_CONTENT;
8440#ifdef DEBUG_PUSH
8441 xmlGenericError(xmlGenericErrorContext,
8442 "PP: entering DTD\n");
8443#endif
8444 break;
8445 case XML_PARSER_ATTRIBUTE_VALUE:
8446 xmlGenericError(xmlGenericErrorContext,
8447 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8448 ctxt->instate = XML_PARSER_START_TAG;
8449#ifdef DEBUG_PUSH
8450 xmlGenericError(xmlGenericErrorContext,
8451 "PP: entering START_TAG\n");
8452#endif
8453 break;
8454 case XML_PARSER_SYSTEM_LITERAL:
8455 xmlGenericError(xmlGenericErrorContext,
8456 "PP: internal error, state == SYSTEM_LITERAL\n");
8457 ctxt->instate = XML_PARSER_START_TAG;
8458#ifdef DEBUG_PUSH
8459 xmlGenericError(xmlGenericErrorContext,
8460 "PP: entering START_TAG\n");
8461#endif
8462 break;
8463 }
8464 }
8465done:
8466#ifdef DEBUG_PUSH
8467 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8468#endif
8469 return(ret);
8470}
8471
8472/**
Owen Taylor3473f882001-02-23 17:55:21 +00008473 * xmlParseChunk:
8474 * @ctxt: an XML parser context
8475 * @chunk: an char array
8476 * @size: the size in byte of the chunk
8477 * @terminate: last chunk indicator
8478 *
8479 * Parse a Chunk of memory
8480 *
8481 * Returns zero if no error, the xmlParserErrors otherwise.
8482 */
8483int
8484xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8485 int terminate) {
8486 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8487 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8488 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8489 int cur = ctxt->input->cur - ctxt->input->base;
8490
8491 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8492 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8493 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008494 ctxt->input->end =
8495 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008496#ifdef DEBUG_PUSH
8497 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8498#endif
8499
8500 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8501 xmlParseTryOrFinish(ctxt, terminate);
8502 } else if (ctxt->instate != XML_PARSER_EOF) {
8503 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8504 xmlParserInputBufferPtr in = ctxt->input->buf;
8505 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8506 (in->raw != NULL)) {
8507 int nbchars;
8508
8509 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8510 if (nbchars < 0) {
8511 xmlGenericError(xmlGenericErrorContext,
8512 "xmlParseChunk: encoder error\n");
8513 return(XML_ERR_INVALID_ENCODING);
8514 }
8515 }
8516 }
8517 }
8518 xmlParseTryOrFinish(ctxt, terminate);
8519 if (terminate) {
8520 /*
8521 * Check for termination
8522 */
8523 if ((ctxt->instate != XML_PARSER_EOF) &&
8524 (ctxt->instate != XML_PARSER_EPILOG)) {
8525 ctxt->errNo = XML_ERR_DOCUMENT_END;
8526 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8527 ctxt->sax->error(ctxt->userData,
8528 "Extra content at the end of the document\n");
8529 ctxt->wellFormed = 0;
8530 ctxt->disableSAX = 1;
8531 }
8532 if (ctxt->instate != XML_PARSER_EOF) {
8533 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8534 (!ctxt->disableSAX))
8535 ctxt->sax->endDocument(ctxt->userData);
8536 }
8537 ctxt->instate = XML_PARSER_EOF;
8538 }
8539 return((xmlParserErrors) ctxt->errNo);
8540}
8541
8542/************************************************************************
8543 * *
8544 * I/O front end functions to the parser *
8545 * *
8546 ************************************************************************/
8547
8548/**
8549 * xmlStopParser:
8550 * @ctxt: an XML parser context
8551 *
8552 * Blocks further parser processing
8553 */
8554void
8555xmlStopParser(xmlParserCtxtPtr ctxt) {
8556 ctxt->instate = XML_PARSER_EOF;
8557 if (ctxt->input != NULL)
8558 ctxt->input->cur = BAD_CAST"";
8559}
8560
8561/**
8562 * xmlCreatePushParserCtxt:
8563 * @sax: a SAX handler
8564 * @user_data: The user data returned on SAX callbacks
8565 * @chunk: a pointer to an array of chars
8566 * @size: number of chars in the array
8567 * @filename: an optional file name or URI
8568 *
8569 * Create a parser context for using the XML parser in push mode
8570 * To allow content encoding detection, @size should be >= 4
8571 * The value of @filename is used for fetching external entities
8572 * and error/warning reports.
8573 *
8574 * Returns the new parser context or NULL
8575 */
8576xmlParserCtxtPtr
8577xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8578 const char *chunk, int size, const char *filename) {
8579 xmlParserCtxtPtr ctxt;
8580 xmlParserInputPtr inputStream;
8581 xmlParserInputBufferPtr buf;
8582 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8583
8584 /*
8585 * plug some encoding conversion routines
8586 */
8587 if ((chunk != NULL) && (size >= 4))
8588 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8589
8590 buf = xmlAllocParserInputBuffer(enc);
8591 if (buf == NULL) return(NULL);
8592
8593 ctxt = xmlNewParserCtxt();
8594 if (ctxt == NULL) {
8595 xmlFree(buf);
8596 return(NULL);
8597 }
8598 if (sax != NULL) {
8599 if (ctxt->sax != &xmlDefaultSAXHandler)
8600 xmlFree(ctxt->sax);
8601 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8602 if (ctxt->sax == NULL) {
8603 xmlFree(buf);
8604 xmlFree(ctxt);
8605 return(NULL);
8606 }
8607 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8608 if (user_data != NULL)
8609 ctxt->userData = user_data;
8610 }
8611 if (filename == NULL) {
8612 ctxt->directory = NULL;
8613 } else {
8614 ctxt->directory = xmlParserGetDirectory(filename);
8615 }
8616
8617 inputStream = xmlNewInputStream(ctxt);
8618 if (inputStream == NULL) {
8619 xmlFreeParserCtxt(ctxt);
8620 return(NULL);
8621 }
8622
8623 if (filename == NULL)
8624 inputStream->filename = NULL;
8625 else
8626 inputStream->filename = xmlMemStrdup(filename);
8627 inputStream->buf = buf;
8628 inputStream->base = inputStream->buf->buffer->content;
8629 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008630 inputStream->end =
8631 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008632
8633 inputPush(ctxt, inputStream);
8634
8635 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8636 (ctxt->input->buf != NULL)) {
8637 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8638#ifdef DEBUG_PUSH
8639 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8640#endif
8641 }
8642
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008643 if (enc != XML_CHAR_ENCODING_NONE) {
8644 xmlSwitchEncoding(ctxt, enc);
8645 }
8646
Owen Taylor3473f882001-02-23 17:55:21 +00008647 return(ctxt);
8648}
8649
8650/**
8651 * xmlCreateIOParserCtxt:
8652 * @sax: a SAX handler
8653 * @user_data: The user data returned on SAX callbacks
8654 * @ioread: an I/O read function
8655 * @ioclose: an I/O close function
8656 * @ioctx: an I/O handler
8657 * @enc: the charset encoding if known
8658 *
8659 * Create a parser context for using the XML parser with an existing
8660 * I/O stream
8661 *
8662 * Returns the new parser context or NULL
8663 */
8664xmlParserCtxtPtr
8665xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8666 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8667 void *ioctx, xmlCharEncoding enc) {
8668 xmlParserCtxtPtr ctxt;
8669 xmlParserInputPtr inputStream;
8670 xmlParserInputBufferPtr buf;
8671
8672 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8673 if (buf == NULL) return(NULL);
8674
8675 ctxt = xmlNewParserCtxt();
8676 if (ctxt == NULL) {
8677 xmlFree(buf);
8678 return(NULL);
8679 }
8680 if (sax != NULL) {
8681 if (ctxt->sax != &xmlDefaultSAXHandler)
8682 xmlFree(ctxt->sax);
8683 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8684 if (ctxt->sax == NULL) {
8685 xmlFree(buf);
8686 xmlFree(ctxt);
8687 return(NULL);
8688 }
8689 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8690 if (user_data != NULL)
8691 ctxt->userData = user_data;
8692 }
8693
8694 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8695 if (inputStream == NULL) {
8696 xmlFreeParserCtxt(ctxt);
8697 return(NULL);
8698 }
8699 inputPush(ctxt, inputStream);
8700
8701 return(ctxt);
8702}
8703
8704/************************************************************************
8705 * *
8706 * Front ends when parsing a Dtd *
8707 * *
8708 ************************************************************************/
8709
8710/**
8711 * xmlIOParseDTD:
8712 * @sax: the SAX handler block or NULL
8713 * @input: an Input Buffer
8714 * @enc: the charset encoding if known
8715 *
8716 * Load and parse a DTD
8717 *
8718 * Returns the resulting xmlDtdPtr or NULL in case of error.
8719 * @input will be freed at parsing end.
8720 */
8721
8722xmlDtdPtr
8723xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8724 xmlCharEncoding enc) {
8725 xmlDtdPtr ret = NULL;
8726 xmlParserCtxtPtr ctxt;
8727 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008728 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008729
8730 if (input == NULL)
8731 return(NULL);
8732
8733 ctxt = xmlNewParserCtxt();
8734 if (ctxt == NULL) {
8735 return(NULL);
8736 }
8737
8738 /*
8739 * Set-up the SAX context
8740 */
8741 if (sax != NULL) {
8742 if (ctxt->sax != NULL)
8743 xmlFree(ctxt->sax);
8744 ctxt->sax = sax;
8745 ctxt->userData = NULL;
8746 }
8747
8748 /*
8749 * generate a parser input from the I/O handler
8750 */
8751
8752 pinput = xmlNewIOInputStream(ctxt, input, enc);
8753 if (pinput == NULL) {
8754 if (sax != NULL) ctxt->sax = NULL;
8755 xmlFreeParserCtxt(ctxt);
8756 return(NULL);
8757 }
8758
8759 /*
8760 * plug some encoding conversion routines here.
8761 */
8762 xmlPushInput(ctxt, pinput);
8763
8764 pinput->filename = NULL;
8765 pinput->line = 1;
8766 pinput->col = 1;
8767 pinput->base = ctxt->input->cur;
8768 pinput->cur = ctxt->input->cur;
8769 pinput->free = NULL;
8770
8771 /*
8772 * let's parse that entity knowing it's an external subset.
8773 */
8774 ctxt->inSubset = 2;
8775 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8776 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8777 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008778
8779 if (enc == XML_CHAR_ENCODING_NONE) {
8780 /*
8781 * Get the 4 first bytes and decode the charset
8782 * if enc != XML_CHAR_ENCODING_NONE
8783 * plug some encoding conversion routines.
8784 */
8785 start[0] = RAW;
8786 start[1] = NXT(1);
8787 start[2] = NXT(2);
8788 start[3] = NXT(3);
8789 enc = xmlDetectCharEncoding(start, 4);
8790 if (enc != XML_CHAR_ENCODING_NONE) {
8791 xmlSwitchEncoding(ctxt, enc);
8792 }
8793 }
8794
Owen Taylor3473f882001-02-23 17:55:21 +00008795 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8796
8797 if (ctxt->myDoc != NULL) {
8798 if (ctxt->wellFormed) {
8799 ret = ctxt->myDoc->extSubset;
8800 ctxt->myDoc->extSubset = NULL;
8801 } else {
8802 ret = NULL;
8803 }
8804 xmlFreeDoc(ctxt->myDoc);
8805 ctxt->myDoc = NULL;
8806 }
8807 if (sax != NULL) ctxt->sax = NULL;
8808 xmlFreeParserCtxt(ctxt);
8809
8810 return(ret);
8811}
8812
8813/**
8814 * xmlSAXParseDTD:
8815 * @sax: the SAX handler block
8816 * @ExternalID: a NAME* containing the External ID of the DTD
8817 * @SystemID: a NAME* containing the URL to the DTD
8818 *
8819 * Load and parse an external subset.
8820 *
8821 * Returns the resulting xmlDtdPtr or NULL in case of error.
8822 */
8823
8824xmlDtdPtr
8825xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8826 const xmlChar *SystemID) {
8827 xmlDtdPtr ret = NULL;
8828 xmlParserCtxtPtr ctxt;
8829 xmlParserInputPtr input = NULL;
8830 xmlCharEncoding enc;
8831
8832 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8833
8834 ctxt = xmlNewParserCtxt();
8835 if (ctxt == NULL) {
8836 return(NULL);
8837 }
8838
8839 /*
8840 * Set-up the SAX context
8841 */
8842 if (sax != NULL) {
8843 if (ctxt->sax != NULL)
8844 xmlFree(ctxt->sax);
8845 ctxt->sax = sax;
8846 ctxt->userData = NULL;
8847 }
8848
8849 /*
8850 * Ask the Entity resolver to load the damn thing
8851 */
8852
8853 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8854 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8855 if (input == NULL) {
8856 if (sax != NULL) ctxt->sax = NULL;
8857 xmlFreeParserCtxt(ctxt);
8858 return(NULL);
8859 }
8860
8861 /*
8862 * plug some encoding conversion routines here.
8863 */
8864 xmlPushInput(ctxt, input);
8865 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8866 xmlSwitchEncoding(ctxt, enc);
8867
8868 if (input->filename == NULL)
8869 input->filename = (char *) xmlStrdup(SystemID);
8870 input->line = 1;
8871 input->col = 1;
8872 input->base = ctxt->input->cur;
8873 input->cur = ctxt->input->cur;
8874 input->free = NULL;
8875
8876 /*
8877 * let's parse that entity knowing it's an external subset.
8878 */
8879 ctxt->inSubset = 2;
8880 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8881 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8882 ExternalID, SystemID);
8883 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8884
8885 if (ctxt->myDoc != NULL) {
8886 if (ctxt->wellFormed) {
8887 ret = ctxt->myDoc->extSubset;
8888 ctxt->myDoc->extSubset = NULL;
8889 } else {
8890 ret = NULL;
8891 }
8892 xmlFreeDoc(ctxt->myDoc);
8893 ctxt->myDoc = NULL;
8894 }
8895 if (sax != NULL) ctxt->sax = NULL;
8896 xmlFreeParserCtxt(ctxt);
8897
8898 return(ret);
8899}
8900
8901/**
8902 * xmlParseDTD:
8903 * @ExternalID: a NAME* containing the External ID of the DTD
8904 * @SystemID: a NAME* containing the URL to the DTD
8905 *
8906 * Load and parse an external subset.
8907 *
8908 * Returns the resulting xmlDtdPtr or NULL in case of error.
8909 */
8910
8911xmlDtdPtr
8912xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8913 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8914}
8915
8916/************************************************************************
8917 * *
8918 * Front ends when parsing an Entity *
8919 * *
8920 ************************************************************************/
8921
8922/**
Owen Taylor3473f882001-02-23 17:55:21 +00008923 * xmlParseCtxtExternalEntity:
8924 * @ctx: the existing parsing context
8925 * @URL: the URL for the entity to load
8926 * @ID: the System ID for the entity to load
8927 * @list: the return value for the set of parsed nodes
8928 *
8929 * Parse an external general entity within an existing parsing context
8930 * An external general parsed entity is well-formed if it matches the
8931 * production labeled extParsedEnt.
8932 *
8933 * [78] extParsedEnt ::= TextDecl? content
8934 *
8935 * Returns 0 if the entity is well formed, -1 in case of args problem and
8936 * the parser error code otherwise
8937 */
8938
8939int
8940xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8941 const xmlChar *ID, xmlNodePtr *list) {
8942 xmlParserCtxtPtr ctxt;
8943 xmlDocPtr newDoc;
8944 xmlSAXHandlerPtr oldsax = NULL;
8945 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008946 xmlChar start[4];
8947 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00008948
8949 if (ctx->depth > 40) {
8950 return(XML_ERR_ENTITY_LOOP);
8951 }
8952
8953 if (list != NULL)
8954 *list = NULL;
8955 if ((URL == NULL) && (ID == NULL))
8956 return(-1);
8957 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8958 return(-1);
8959
8960
8961 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8962 if (ctxt == NULL) return(-1);
8963 ctxt->userData = ctxt;
8964 oldsax = ctxt->sax;
8965 ctxt->sax = ctx->sax;
8966 newDoc = xmlNewDoc(BAD_CAST "1.0");
8967 if (newDoc == NULL) {
8968 xmlFreeParserCtxt(ctxt);
8969 return(-1);
8970 }
8971 if (ctx->myDoc != NULL) {
8972 newDoc->intSubset = ctx->myDoc->intSubset;
8973 newDoc->extSubset = ctx->myDoc->extSubset;
8974 }
8975 if (ctx->myDoc->URL != NULL) {
8976 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8977 }
8978 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8979 if (newDoc->children == NULL) {
8980 ctxt->sax = oldsax;
8981 xmlFreeParserCtxt(ctxt);
8982 newDoc->intSubset = NULL;
8983 newDoc->extSubset = NULL;
8984 xmlFreeDoc(newDoc);
8985 return(-1);
8986 }
8987 nodePush(ctxt, newDoc->children);
8988 if (ctx->myDoc == NULL) {
8989 ctxt->myDoc = newDoc;
8990 } else {
8991 ctxt->myDoc = ctx->myDoc;
8992 newDoc->children->doc = ctx->myDoc;
8993 }
8994
Daniel Veillard87a764e2001-06-20 17:41:10 +00008995 /*
8996 * Get the 4 first bytes and decode the charset
8997 * if enc != XML_CHAR_ENCODING_NONE
8998 * plug some encoding conversion routines.
8999 */
9000 GROW
9001 start[0] = RAW;
9002 start[1] = NXT(1);
9003 start[2] = NXT(2);
9004 start[3] = NXT(3);
9005 enc = xmlDetectCharEncoding(start, 4);
9006 if (enc != XML_CHAR_ENCODING_NONE) {
9007 xmlSwitchEncoding(ctxt, enc);
9008 }
9009
Owen Taylor3473f882001-02-23 17:55:21 +00009010 /*
9011 * Parse a possible text declaration first
9012 */
Owen Taylor3473f882001-02-23 17:55:21 +00009013 if ((RAW == '<') && (NXT(1) == '?') &&
9014 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9015 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9016 xmlParseTextDecl(ctxt);
9017 }
9018
9019 /*
9020 * Doing validity checking on chunk doesn't make sense
9021 */
9022 ctxt->instate = XML_PARSER_CONTENT;
9023 ctxt->validate = ctx->validate;
9024 ctxt->loadsubset = ctx->loadsubset;
9025 ctxt->depth = ctx->depth + 1;
9026 ctxt->replaceEntities = ctx->replaceEntities;
9027 if (ctxt->validate) {
9028 ctxt->vctxt.error = ctx->vctxt.error;
9029 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009030 } else {
9031 ctxt->vctxt.error = NULL;
9032 ctxt->vctxt.warning = NULL;
9033 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009034 ctxt->vctxt.nodeTab = NULL;
9035 ctxt->vctxt.nodeNr = 0;
9036 ctxt->vctxt.nodeMax = 0;
9037 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009038
9039 xmlParseContent(ctxt);
9040
9041 if ((RAW == '<') && (NXT(1) == '/')) {
9042 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9044 ctxt->sax->error(ctxt->userData,
9045 "chunk is not well balanced\n");
9046 ctxt->wellFormed = 0;
9047 ctxt->disableSAX = 1;
9048 } else if (RAW != 0) {
9049 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9051 ctxt->sax->error(ctxt->userData,
9052 "extra content at the end of well balanced chunk\n");
9053 ctxt->wellFormed = 0;
9054 ctxt->disableSAX = 1;
9055 }
9056 if (ctxt->node != newDoc->children) {
9057 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9059 ctxt->sax->error(ctxt->userData,
9060 "chunk is not well balanced\n");
9061 ctxt->wellFormed = 0;
9062 ctxt->disableSAX = 1;
9063 }
9064
9065 if (!ctxt->wellFormed) {
9066 if (ctxt->errNo == 0)
9067 ret = 1;
9068 else
9069 ret = ctxt->errNo;
9070 } else {
9071 if (list != NULL) {
9072 xmlNodePtr cur;
9073
9074 /*
9075 * Return the newly created nodeset after unlinking it from
9076 * they pseudo parent.
9077 */
9078 cur = newDoc->children->children;
9079 *list = cur;
9080 while (cur != NULL) {
9081 cur->parent = NULL;
9082 cur = cur->next;
9083 }
9084 newDoc->children->children = NULL;
9085 }
9086 ret = 0;
9087 }
9088 ctxt->sax = oldsax;
9089 xmlFreeParserCtxt(ctxt);
9090 newDoc->intSubset = NULL;
9091 newDoc->extSubset = NULL;
9092 xmlFreeDoc(newDoc);
9093
9094 return(ret);
9095}
9096
9097/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009098 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009099 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009100 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009101 * @sax: the SAX handler bloc (possibly NULL)
9102 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9103 * @depth: Used for loop detection, use 0
9104 * @URL: the URL for the entity to load
9105 * @ID: the System ID for the entity to load
9106 * @list: the return value for the set of parsed nodes
9107 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009108 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009109 *
9110 * Returns 0 if the entity is well formed, -1 in case of args problem and
9111 * the parser error code otherwise
9112 */
9113
Daniel Veillard257d9102001-05-08 10:41:44 +00009114static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009115xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9116 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009117 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009118 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009119 xmlParserCtxtPtr ctxt;
9120 xmlDocPtr newDoc;
9121 xmlSAXHandlerPtr oldsax = NULL;
9122 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009123 xmlChar start[4];
9124 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009125
9126 if (depth > 40) {
9127 return(XML_ERR_ENTITY_LOOP);
9128 }
9129
9130
9131
9132 if (list != NULL)
9133 *list = NULL;
9134 if ((URL == NULL) && (ID == NULL))
9135 return(-1);
9136 if (doc == NULL) /* @@ relax but check for dereferences */
9137 return(-1);
9138
9139
9140 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9141 if (ctxt == NULL) return(-1);
9142 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009143 if (oldctxt != NULL) {
9144 ctxt->_private = oldctxt->_private;
9145 ctxt->loadsubset = oldctxt->loadsubset;
9146 ctxt->validate = oldctxt->validate;
9147 ctxt->external = oldctxt->external;
9148 } else {
9149 /*
9150 * Doing validity checking on chunk without context
9151 * doesn't make sense
9152 */
9153 ctxt->_private = NULL;
9154 ctxt->validate = 0;
9155 ctxt->external = 2;
9156 ctxt->loadsubset = 0;
9157 }
Owen Taylor3473f882001-02-23 17:55:21 +00009158 if (sax != NULL) {
9159 oldsax = ctxt->sax;
9160 ctxt->sax = sax;
9161 if (user_data != NULL)
9162 ctxt->userData = user_data;
9163 }
9164 newDoc = xmlNewDoc(BAD_CAST "1.0");
9165 if (newDoc == NULL) {
9166 xmlFreeParserCtxt(ctxt);
9167 return(-1);
9168 }
9169 if (doc != NULL) {
9170 newDoc->intSubset = doc->intSubset;
9171 newDoc->extSubset = doc->extSubset;
9172 }
9173 if (doc->URL != NULL) {
9174 newDoc->URL = xmlStrdup(doc->URL);
9175 }
9176 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9177 if (newDoc->children == NULL) {
9178 if (sax != NULL)
9179 ctxt->sax = oldsax;
9180 xmlFreeParserCtxt(ctxt);
9181 newDoc->intSubset = NULL;
9182 newDoc->extSubset = NULL;
9183 xmlFreeDoc(newDoc);
9184 return(-1);
9185 }
9186 nodePush(ctxt, newDoc->children);
9187 if (doc == NULL) {
9188 ctxt->myDoc = newDoc;
9189 } else {
9190 ctxt->myDoc = doc;
9191 newDoc->children->doc = doc;
9192 }
9193
Daniel Veillard87a764e2001-06-20 17:41:10 +00009194 /*
9195 * Get the 4 first bytes and decode the charset
9196 * if enc != XML_CHAR_ENCODING_NONE
9197 * plug some encoding conversion routines.
9198 */
9199 GROW;
9200 start[0] = RAW;
9201 start[1] = NXT(1);
9202 start[2] = NXT(2);
9203 start[3] = NXT(3);
9204 enc = xmlDetectCharEncoding(start, 4);
9205 if (enc != XML_CHAR_ENCODING_NONE) {
9206 xmlSwitchEncoding(ctxt, enc);
9207 }
9208
Owen Taylor3473f882001-02-23 17:55:21 +00009209 /*
9210 * Parse a possible text declaration first
9211 */
Owen Taylor3473f882001-02-23 17:55:21 +00009212 if ((RAW == '<') && (NXT(1) == '?') &&
9213 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9214 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9215 xmlParseTextDecl(ctxt);
9216 }
9217
Owen Taylor3473f882001-02-23 17:55:21 +00009218 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009219 ctxt->depth = depth;
9220
9221 xmlParseContent(ctxt);
9222
9223 if ((RAW == '<') && (NXT(1) == '/')) {
9224 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9226 ctxt->sax->error(ctxt->userData,
9227 "chunk is not well balanced\n");
9228 ctxt->wellFormed = 0;
9229 ctxt->disableSAX = 1;
9230 } else if (RAW != 0) {
9231 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9232 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9233 ctxt->sax->error(ctxt->userData,
9234 "extra content at the end of well balanced chunk\n");
9235 ctxt->wellFormed = 0;
9236 ctxt->disableSAX = 1;
9237 }
9238 if (ctxt->node != newDoc->children) {
9239 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9241 ctxt->sax->error(ctxt->userData,
9242 "chunk is not well balanced\n");
9243 ctxt->wellFormed = 0;
9244 ctxt->disableSAX = 1;
9245 }
9246
9247 if (!ctxt->wellFormed) {
9248 if (ctxt->errNo == 0)
9249 ret = 1;
9250 else
9251 ret = ctxt->errNo;
9252 } else {
9253 if (list != NULL) {
9254 xmlNodePtr cur;
9255
9256 /*
9257 * Return the newly created nodeset after unlinking it from
9258 * they pseudo parent.
9259 */
9260 cur = newDoc->children->children;
9261 *list = cur;
9262 while (cur != NULL) {
9263 cur->parent = NULL;
9264 cur = cur->next;
9265 }
9266 newDoc->children->children = NULL;
9267 }
9268 ret = 0;
9269 }
9270 if (sax != NULL)
9271 ctxt->sax = oldsax;
9272 xmlFreeParserCtxt(ctxt);
9273 newDoc->intSubset = NULL;
9274 newDoc->extSubset = NULL;
9275 xmlFreeDoc(newDoc);
9276
9277 return(ret);
9278}
9279
9280/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009281 * xmlParseExternalEntity:
9282 * @doc: the document the chunk pertains to
9283 * @sax: the SAX handler bloc (possibly NULL)
9284 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9285 * @depth: Used for loop detection, use 0
9286 * @URL: the URL for the entity to load
9287 * @ID: the System ID for the entity to load
9288 * @list: the return value for the set of parsed nodes
9289 *
9290 * Parse an external general entity
9291 * An external general parsed entity is well-formed if it matches the
9292 * production labeled extParsedEnt.
9293 *
9294 * [78] extParsedEnt ::= TextDecl? content
9295 *
9296 * Returns 0 if the entity is well formed, -1 in case of args problem and
9297 * the parser error code otherwise
9298 */
9299
9300int
9301xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
9302 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009303 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
9304 ID, list));
Daniel Veillard257d9102001-05-08 10:41:44 +00009305}
9306
9307/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009308 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009309 * @doc: the document the chunk pertains to
9310 * @sax: the SAX handler bloc (possibly NULL)
9311 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9312 * @depth: Used for loop detection, use 0
9313 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9314 * @list: the return value for the set of parsed nodes
9315 *
9316 * Parse a well-balanced chunk of an XML document
9317 * called by the parser
9318 * The allowed sequence for the Well Balanced Chunk is the one defined by
9319 * the content production in the XML grammar:
9320 *
9321 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9322 *
9323 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9324 * the parser error code otherwise
9325 */
9326
9327int
9328xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9329 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9330 xmlParserCtxtPtr ctxt;
9331 xmlDocPtr newDoc;
9332 xmlSAXHandlerPtr oldsax = NULL;
9333 int size;
9334 int ret = 0;
9335
9336 if (depth > 40) {
9337 return(XML_ERR_ENTITY_LOOP);
9338 }
9339
9340
9341 if (list != NULL)
9342 *list = NULL;
9343 if (string == NULL)
9344 return(-1);
9345
9346 size = xmlStrlen(string);
9347
9348 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9349 if (ctxt == NULL) return(-1);
9350 ctxt->userData = ctxt;
9351 if (sax != NULL) {
9352 oldsax = ctxt->sax;
9353 ctxt->sax = sax;
9354 if (user_data != NULL)
9355 ctxt->userData = user_data;
9356 }
9357 newDoc = xmlNewDoc(BAD_CAST "1.0");
9358 if (newDoc == NULL) {
9359 xmlFreeParserCtxt(ctxt);
9360 return(-1);
9361 }
9362 if (doc != NULL) {
9363 newDoc->intSubset = doc->intSubset;
9364 newDoc->extSubset = doc->extSubset;
9365 }
9366 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9367 if (newDoc->children == NULL) {
9368 if (sax != NULL)
9369 ctxt->sax = oldsax;
9370 xmlFreeParserCtxt(ctxt);
9371 newDoc->intSubset = NULL;
9372 newDoc->extSubset = NULL;
9373 xmlFreeDoc(newDoc);
9374 return(-1);
9375 }
9376 nodePush(ctxt, newDoc->children);
9377 if (doc == NULL) {
9378 ctxt->myDoc = newDoc;
9379 } else {
9380 ctxt->myDoc = doc;
9381 newDoc->children->doc = doc;
9382 }
9383 ctxt->instate = XML_PARSER_CONTENT;
9384 ctxt->depth = depth;
9385
9386 /*
9387 * Doing validity checking on chunk doesn't make sense
9388 */
9389 ctxt->validate = 0;
9390 ctxt->loadsubset = 0;
9391
9392 xmlParseContent(ctxt);
9393
9394 if ((RAW == '<') && (NXT(1) == '/')) {
9395 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9397 ctxt->sax->error(ctxt->userData,
9398 "chunk is not well balanced\n");
9399 ctxt->wellFormed = 0;
9400 ctxt->disableSAX = 1;
9401 } else if (RAW != 0) {
9402 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9404 ctxt->sax->error(ctxt->userData,
9405 "extra content at the end of well balanced chunk\n");
9406 ctxt->wellFormed = 0;
9407 ctxt->disableSAX = 1;
9408 }
9409 if (ctxt->node != newDoc->children) {
9410 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9412 ctxt->sax->error(ctxt->userData,
9413 "chunk is not well balanced\n");
9414 ctxt->wellFormed = 0;
9415 ctxt->disableSAX = 1;
9416 }
9417
9418 if (!ctxt->wellFormed) {
9419 if (ctxt->errNo == 0)
9420 ret = 1;
9421 else
9422 ret = ctxt->errNo;
9423 } else {
9424 if (list != NULL) {
9425 xmlNodePtr cur;
9426
9427 /*
9428 * Return the newly created nodeset after unlinking it from
9429 * they pseudo parent.
9430 */
9431 cur = newDoc->children->children;
9432 *list = cur;
9433 while (cur != NULL) {
9434 cur->parent = NULL;
9435 cur = cur->next;
9436 }
9437 newDoc->children->children = NULL;
9438 }
9439 ret = 0;
9440 }
9441 if (sax != NULL)
9442 ctxt->sax = oldsax;
9443 xmlFreeParserCtxt(ctxt);
9444 newDoc->intSubset = NULL;
9445 newDoc->extSubset = NULL;
9446 xmlFreeDoc(newDoc);
9447
9448 return(ret);
9449}
9450
9451/**
9452 * xmlSAXParseEntity:
9453 * @sax: the SAX handler block
9454 * @filename: the filename
9455 *
9456 * parse an XML external entity out of context and build a tree.
9457 * It use the given SAX function block to handle the parsing callback.
9458 * If sax is NULL, fallback to the default DOM tree building routines.
9459 *
9460 * [78] extParsedEnt ::= TextDecl? content
9461 *
9462 * This correspond to a "Well Balanced" chunk
9463 *
9464 * Returns the resulting document tree
9465 */
9466
9467xmlDocPtr
9468xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9469 xmlDocPtr ret;
9470 xmlParserCtxtPtr ctxt;
9471 char *directory = NULL;
9472
9473 ctxt = xmlCreateFileParserCtxt(filename);
9474 if (ctxt == NULL) {
9475 return(NULL);
9476 }
9477 if (sax != NULL) {
9478 if (ctxt->sax != NULL)
9479 xmlFree(ctxt->sax);
9480 ctxt->sax = sax;
9481 ctxt->userData = NULL;
9482 }
9483
9484 if ((ctxt->directory == NULL) && (directory == NULL))
9485 directory = xmlParserGetDirectory(filename);
9486
9487 xmlParseExtParsedEnt(ctxt);
9488
9489 if (ctxt->wellFormed)
9490 ret = ctxt->myDoc;
9491 else {
9492 ret = NULL;
9493 xmlFreeDoc(ctxt->myDoc);
9494 ctxt->myDoc = NULL;
9495 }
9496 if (sax != NULL)
9497 ctxt->sax = NULL;
9498 xmlFreeParserCtxt(ctxt);
9499
9500 return(ret);
9501}
9502
9503/**
9504 * xmlParseEntity:
9505 * @filename: the filename
9506 *
9507 * parse an XML external entity out of context and build a tree.
9508 *
9509 * [78] extParsedEnt ::= TextDecl? content
9510 *
9511 * This correspond to a "Well Balanced" chunk
9512 *
9513 * Returns the resulting document tree
9514 */
9515
9516xmlDocPtr
9517xmlParseEntity(const char *filename) {
9518 return(xmlSAXParseEntity(NULL, filename));
9519}
9520
9521/**
9522 * xmlCreateEntityParserCtxt:
9523 * @URL: the entity URL
9524 * @ID: the entity PUBLIC ID
9525 * @base: a posible base for the target URI
9526 *
9527 * Create a parser context for an external entity
9528 * Automatic support for ZLIB/Compress compressed document is provided
9529 * by default if found at compile-time.
9530 *
9531 * Returns the new parser context or NULL
9532 */
9533xmlParserCtxtPtr
9534xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9535 const xmlChar *base) {
9536 xmlParserCtxtPtr ctxt;
9537 xmlParserInputPtr inputStream;
9538 char *directory = NULL;
9539 xmlChar *uri;
9540
9541 ctxt = xmlNewParserCtxt();
9542 if (ctxt == NULL) {
9543 return(NULL);
9544 }
9545
9546 uri = xmlBuildURI(URL, base);
9547
9548 if (uri == NULL) {
9549 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9550 if (inputStream == NULL) {
9551 xmlFreeParserCtxt(ctxt);
9552 return(NULL);
9553 }
9554
9555 inputPush(ctxt, inputStream);
9556
9557 if ((ctxt->directory == NULL) && (directory == NULL))
9558 directory = xmlParserGetDirectory((char *)URL);
9559 if ((ctxt->directory == NULL) && (directory != NULL))
9560 ctxt->directory = directory;
9561 } else {
9562 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9563 if (inputStream == NULL) {
9564 xmlFree(uri);
9565 xmlFreeParserCtxt(ctxt);
9566 return(NULL);
9567 }
9568
9569 inputPush(ctxt, inputStream);
9570
9571 if ((ctxt->directory == NULL) && (directory == NULL))
9572 directory = xmlParserGetDirectory((char *)uri);
9573 if ((ctxt->directory == NULL) && (directory != NULL))
9574 ctxt->directory = directory;
9575 xmlFree(uri);
9576 }
9577
9578 return(ctxt);
9579}
9580
9581/************************************************************************
9582 * *
9583 * Front ends when parsing from a file *
9584 * *
9585 ************************************************************************/
9586
9587/**
9588 * xmlCreateFileParserCtxt:
9589 * @filename: the filename
9590 *
9591 * Create a parser context for a file content.
9592 * Automatic support for ZLIB/Compress compressed document is provided
9593 * by default if found at compile-time.
9594 *
9595 * Returns the new parser context or NULL
9596 */
9597xmlParserCtxtPtr
9598xmlCreateFileParserCtxt(const char *filename)
9599{
9600 xmlParserCtxtPtr ctxt;
9601 xmlParserInputPtr inputStream;
9602 xmlParserInputBufferPtr buf;
9603 char *directory = NULL;
9604
9605 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9606 if (buf == NULL) {
9607 return(NULL);
9608 }
9609
9610 ctxt = xmlNewParserCtxt();
9611 if (ctxt == NULL) {
9612 if (xmlDefaultSAXHandler.error != NULL) {
9613 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9614 }
9615 return(NULL);
9616 }
9617
9618 inputStream = xmlNewInputStream(ctxt);
9619 if (inputStream == NULL) {
9620 xmlFreeParserCtxt(ctxt);
9621 return(NULL);
9622 }
9623
9624 inputStream->filename = xmlMemStrdup(filename);
9625 inputStream->buf = buf;
9626 inputStream->base = inputStream->buf->buffer->content;
9627 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009628 inputStream->end =
9629 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009630
9631 inputPush(ctxt, inputStream);
9632 if ((ctxt->directory == NULL) && (directory == NULL))
9633 directory = xmlParserGetDirectory(filename);
9634 if ((ctxt->directory == NULL) && (directory != NULL))
9635 ctxt->directory = directory;
9636
9637 return(ctxt);
9638}
9639
9640/**
9641 * xmlSAXParseFile:
9642 * @sax: the SAX handler block
9643 * @filename: the filename
9644 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9645 * documents
9646 *
9647 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9648 * compressed document is provided by default if found at compile-time.
9649 * It use the given SAX function block to handle the parsing callback.
9650 * If sax is NULL, fallback to the default DOM tree building routines.
9651 *
9652 * Returns the resulting document tree
9653 */
9654
9655xmlDocPtr
9656xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9657 int recovery) {
9658 xmlDocPtr ret;
9659 xmlParserCtxtPtr ctxt;
9660 char *directory = NULL;
9661
9662 ctxt = xmlCreateFileParserCtxt(filename);
9663 if (ctxt == NULL) {
9664 return(NULL);
9665 }
9666 if (sax != NULL) {
9667 if (ctxt->sax != NULL)
9668 xmlFree(ctxt->sax);
9669 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009670 }
9671
9672 if ((ctxt->directory == NULL) && (directory == NULL))
9673 directory = xmlParserGetDirectory(filename);
9674 if ((ctxt->directory == NULL) && (directory != NULL))
9675 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9676
9677 xmlParseDocument(ctxt);
9678
9679 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9680 else {
9681 ret = NULL;
9682 xmlFreeDoc(ctxt->myDoc);
9683 ctxt->myDoc = NULL;
9684 }
9685 if (sax != NULL)
9686 ctxt->sax = NULL;
9687 xmlFreeParserCtxt(ctxt);
9688
9689 return(ret);
9690}
9691
9692/**
9693 * xmlRecoverDoc:
9694 * @cur: a pointer to an array of xmlChar
9695 *
9696 * parse an XML in-memory document and build a tree.
9697 * In the case the document is not Well Formed, a tree is built anyway
9698 *
9699 * Returns the resulting document tree
9700 */
9701
9702xmlDocPtr
9703xmlRecoverDoc(xmlChar *cur) {
9704 return(xmlSAXParseDoc(NULL, cur, 1));
9705}
9706
9707/**
9708 * xmlParseFile:
9709 * @filename: the filename
9710 *
9711 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9712 * compressed document is provided by default if found at compile-time.
9713 *
9714 * Returns the resulting document tree
9715 */
9716
9717xmlDocPtr
9718xmlParseFile(const char *filename) {
9719 return(xmlSAXParseFile(NULL, filename, 0));
9720}
9721
9722/**
9723 * xmlRecoverFile:
9724 * @filename: the filename
9725 *
9726 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9727 * compressed document is provided by default if found at compile-time.
9728 * In the case the document is not Well Formed, a tree is built anyway
9729 *
9730 * Returns the resulting document tree
9731 */
9732
9733xmlDocPtr
9734xmlRecoverFile(const char *filename) {
9735 return(xmlSAXParseFile(NULL, filename, 1));
9736}
9737
9738
9739/**
9740 * xmlSetupParserForBuffer:
9741 * @ctxt: an XML parser context
9742 * @buffer: a xmlChar * buffer
9743 * @filename: a file name
9744 *
9745 * Setup the parser context to parse a new buffer; Clears any prior
9746 * contents from the parser context. The buffer parameter must not be
9747 * NULL, but the filename parameter can be
9748 */
9749void
9750xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9751 const char* filename)
9752{
9753 xmlParserInputPtr input;
9754
9755 input = xmlNewInputStream(ctxt);
9756 if (input == NULL) {
9757 perror("malloc");
9758 xmlFree(ctxt);
9759 return;
9760 }
9761
9762 xmlClearParserCtxt(ctxt);
9763 if (filename != NULL)
9764 input->filename = xmlMemStrdup(filename);
9765 input->base = buffer;
9766 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009767 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009768 inputPush(ctxt, input);
9769}
9770
9771/**
9772 * xmlSAXUserParseFile:
9773 * @sax: a SAX handler
9774 * @user_data: The user data returned on SAX callbacks
9775 * @filename: a file name
9776 *
9777 * parse an XML file and call the given SAX handler routines.
9778 * Automatic support for ZLIB/Compress compressed document is provided
9779 *
9780 * Returns 0 in case of success or a error number otherwise
9781 */
9782int
9783xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9784 const char *filename) {
9785 int ret = 0;
9786 xmlParserCtxtPtr ctxt;
9787
9788 ctxt = xmlCreateFileParserCtxt(filename);
9789 if (ctxt == NULL) return -1;
9790 if (ctxt->sax != &xmlDefaultSAXHandler)
9791 xmlFree(ctxt->sax);
9792 ctxt->sax = sax;
9793 if (user_data != NULL)
9794 ctxt->userData = user_data;
9795
9796 xmlParseDocument(ctxt);
9797
9798 if (ctxt->wellFormed)
9799 ret = 0;
9800 else {
9801 if (ctxt->errNo != 0)
9802 ret = ctxt->errNo;
9803 else
9804 ret = -1;
9805 }
9806 if (sax != NULL)
9807 ctxt->sax = NULL;
9808 xmlFreeParserCtxt(ctxt);
9809
9810 return ret;
9811}
9812
9813/************************************************************************
9814 * *
9815 * Front ends when parsing from memory *
9816 * *
9817 ************************************************************************/
9818
9819/**
9820 * xmlCreateMemoryParserCtxt:
9821 * @buffer: a pointer to a char array
9822 * @size: the size of the array
9823 *
9824 * Create a parser context for an XML in-memory document.
9825 *
9826 * Returns the new parser context or NULL
9827 */
9828xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009829xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009830 xmlParserCtxtPtr ctxt;
9831 xmlParserInputPtr input;
9832 xmlParserInputBufferPtr buf;
9833
9834 if (buffer == NULL)
9835 return(NULL);
9836 if (size <= 0)
9837 return(NULL);
9838
9839 ctxt = xmlNewParserCtxt();
9840 if (ctxt == NULL)
9841 return(NULL);
9842
9843 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9844 if (buf == NULL) return(NULL);
9845
9846 input = xmlNewInputStream(ctxt);
9847 if (input == NULL) {
9848 xmlFreeParserCtxt(ctxt);
9849 return(NULL);
9850 }
9851
9852 input->filename = NULL;
9853 input->buf = buf;
9854 input->base = input->buf->buffer->content;
9855 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009856 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009857
9858 inputPush(ctxt, input);
9859 return(ctxt);
9860}
9861
9862/**
9863 * xmlSAXParseMemory:
9864 * @sax: the SAX handler block
9865 * @buffer: an pointer to a char array
9866 * @size: the size of the array
9867 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9868 * documents
9869 *
9870 * parse an XML in-memory block and use the given SAX function block
9871 * to handle the parsing callback. If sax is NULL, fallback to the default
9872 * DOM tree building routines.
9873 *
9874 * Returns the resulting document tree
9875 */
9876xmlDocPtr
9877xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9878 xmlDocPtr ret;
9879 xmlParserCtxtPtr ctxt;
9880
9881 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9882 if (ctxt == NULL) return(NULL);
9883 if (sax != NULL) {
9884 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009885 }
9886
9887 xmlParseDocument(ctxt);
9888
9889 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9890 else {
9891 ret = NULL;
9892 xmlFreeDoc(ctxt->myDoc);
9893 ctxt->myDoc = NULL;
9894 }
9895 if (sax != NULL)
9896 ctxt->sax = NULL;
9897 xmlFreeParserCtxt(ctxt);
9898
9899 return(ret);
9900}
9901
9902/**
9903 * xmlParseMemory:
9904 * @buffer: an pointer to a char array
9905 * @size: the size of the array
9906 *
9907 * parse an XML in-memory block and build a tree.
9908 *
9909 * Returns the resulting document tree
9910 */
9911
9912xmlDocPtr xmlParseMemory(char *buffer, int size) {
9913 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9914}
9915
9916/**
9917 * xmlRecoverMemory:
9918 * @buffer: an pointer to a char array
9919 * @size: the size of the array
9920 *
9921 * parse an XML in-memory block and build a tree.
9922 * In the case the document is not Well Formed, a tree is built anyway
9923 *
9924 * Returns the resulting document tree
9925 */
9926
9927xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9928 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9929}
9930
9931/**
9932 * xmlSAXUserParseMemory:
9933 * @sax: a SAX handler
9934 * @user_data: The user data returned on SAX callbacks
9935 * @buffer: an in-memory XML document input
9936 * @size: the length of the XML document in bytes
9937 *
9938 * A better SAX parsing routine.
9939 * parse an XML in-memory buffer and call the given SAX handler routines.
9940 *
9941 * Returns 0 in case of success or a error number otherwise
9942 */
9943int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009944 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009945 int ret = 0;
9946 xmlParserCtxtPtr ctxt;
9947 xmlSAXHandlerPtr oldsax = NULL;
9948
9949 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9950 if (ctxt == NULL) return -1;
9951 if (sax != NULL) {
9952 oldsax = ctxt->sax;
9953 ctxt->sax = sax;
9954 }
Daniel Veillard30211a02001-04-26 09:33:18 +00009955 if (user_data != NULL)
9956 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00009957
9958 xmlParseDocument(ctxt);
9959
9960 if (ctxt->wellFormed)
9961 ret = 0;
9962 else {
9963 if (ctxt->errNo != 0)
9964 ret = ctxt->errNo;
9965 else
9966 ret = -1;
9967 }
9968 if (sax != NULL) {
9969 ctxt->sax = oldsax;
9970 }
9971 xmlFreeParserCtxt(ctxt);
9972
9973 return ret;
9974}
9975
9976/**
9977 * xmlCreateDocParserCtxt:
9978 * @cur: a pointer to an array of xmlChar
9979 *
9980 * Creates a parser context for an XML in-memory document.
9981 *
9982 * Returns the new parser context or NULL
9983 */
9984xmlParserCtxtPtr
9985xmlCreateDocParserCtxt(xmlChar *cur) {
9986 int len;
9987
9988 if (cur == NULL)
9989 return(NULL);
9990 len = xmlStrlen(cur);
9991 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9992}
9993
9994/**
9995 * xmlSAXParseDoc:
9996 * @sax: the SAX handler block
9997 * @cur: a pointer to an array of xmlChar
9998 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9999 * documents
10000 *
10001 * parse an XML in-memory document and build a tree.
10002 * It use the given SAX function block to handle the parsing callback.
10003 * If sax is NULL, fallback to the default DOM tree building routines.
10004 *
10005 * Returns the resulting document tree
10006 */
10007
10008xmlDocPtr
10009xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10010 xmlDocPtr ret;
10011 xmlParserCtxtPtr ctxt;
10012
10013 if (cur == NULL) return(NULL);
10014
10015
10016 ctxt = xmlCreateDocParserCtxt(cur);
10017 if (ctxt == NULL) return(NULL);
10018 if (sax != NULL) {
10019 ctxt->sax = sax;
10020 ctxt->userData = NULL;
10021 }
10022
10023 xmlParseDocument(ctxt);
10024 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10025 else {
10026 ret = NULL;
10027 xmlFreeDoc(ctxt->myDoc);
10028 ctxt->myDoc = NULL;
10029 }
10030 if (sax != NULL)
10031 ctxt->sax = NULL;
10032 xmlFreeParserCtxt(ctxt);
10033
10034 return(ret);
10035}
10036
10037/**
10038 * xmlParseDoc:
10039 * @cur: a pointer to an array of xmlChar
10040 *
10041 * parse an XML in-memory document and build a tree.
10042 *
10043 * Returns the resulting document tree
10044 */
10045
10046xmlDocPtr
10047xmlParseDoc(xmlChar *cur) {
10048 return(xmlSAXParseDoc(NULL, cur, 0));
10049}
10050
10051
10052/************************************************************************
10053 * *
10054 * Miscellaneous *
10055 * *
10056 ************************************************************************/
10057
10058#ifdef LIBXML_XPATH_ENABLED
10059#include <libxml/xpath.h>
10060#endif
10061
10062static int xmlParserInitialized = 0;
10063
10064/**
10065 * xmlInitParser:
10066 *
10067 * Initialization function for the XML parser.
10068 * This is not reentrant. Call once before processing in case of
10069 * use in multithreaded programs.
10070 */
10071
10072void
10073xmlInitParser(void) {
10074 if (xmlParserInitialized) return;
10075
10076 xmlInitCharEncodingHandlers();
10077 xmlInitializePredefinedEntities();
10078 xmlDefaultSAXHandlerInit();
10079 xmlRegisterDefaultInputCallbacks();
10080 xmlRegisterDefaultOutputCallbacks();
10081#ifdef LIBXML_HTML_ENABLED
10082 htmlInitAutoClose();
10083 htmlDefaultSAXHandlerInit();
10084#endif
10085#ifdef LIBXML_XPATH_ENABLED
10086 xmlXPathInit();
10087#endif
10088 xmlParserInitialized = 1;
10089}
10090
10091/**
10092 * xmlCleanupParser:
10093 *
10094 * Cleanup function for the XML parser. It tries to reclaim all
10095 * parsing related global memory allocated for the parser processing.
10096 * It doesn't deallocate any document related memory. Calling this
10097 * function should not prevent reusing the parser.
10098 */
10099
10100void
10101xmlCleanupParser(void) {
10102 xmlParserInitialized = 0;
10103 xmlCleanupCharEncodingHandlers();
10104 xmlCleanupPredefinedEntities();
10105}
10106
10107/**
10108 * xmlPedanticParserDefault:
10109 * @val: int 0 or 1
10110 *
10111 * Set and return the previous value for enabling pedantic warnings.
10112 *
10113 * Returns the last value for 0 for no substitution, 1 for substitution.
10114 */
10115
10116int
10117xmlPedanticParserDefault(int val) {
10118 int old = xmlPedanticParserDefaultValue;
10119
10120 xmlPedanticParserDefaultValue = val;
10121 return(old);
10122}
10123
10124/**
10125 * xmlSubstituteEntitiesDefault:
10126 * @val: int 0 or 1
10127 *
10128 * Set and return the previous value for default entity support.
10129 * Initially the parser always keep entity references instead of substituting
10130 * entity values in the output. This function has to be used to change the
10131 * default parser behaviour
10132 * SAX::subtituteEntities() has to be used for changing that on a file by
10133 * file basis.
10134 *
10135 * Returns the last value for 0 for no substitution, 1 for substitution.
10136 */
10137
10138int
10139xmlSubstituteEntitiesDefault(int val) {
10140 int old = xmlSubstituteEntitiesDefaultValue;
10141
10142 xmlSubstituteEntitiesDefaultValue = val;
10143 return(old);
10144}
10145
10146/**
10147 * xmlKeepBlanksDefault:
10148 * @val: int 0 or 1
10149 *
10150 * Set and return the previous value for default blanks text nodes support.
10151 * The 1.x version of the parser used an heuristic to try to detect
10152 * ignorable white spaces. As a result the SAX callback was generating
10153 * ignorableWhitespace() callbacks instead of characters() one, and when
10154 * using the DOM output text nodes containing those blanks were not generated.
10155 * The 2.x and later version will switch to the XML standard way and
10156 * ignorableWhitespace() are only generated when running the parser in
10157 * validating mode and when the current element doesn't allow CDATA or
10158 * mixed content.
10159 * This function is provided as a way to force the standard behaviour
10160 * on 1.X libs and to switch back to the old mode for compatibility when
10161 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
10162 * by using xmlIsBlankNode() commodity function to detect the "empty"
10163 * nodes generated.
10164 * This value also affect autogeneration of indentation when saving code
10165 * if blanks sections are kept, indentation is not generated.
10166 *
10167 * Returns the last value for 0 for no substitution, 1 for substitution.
10168 */
10169
10170int
10171xmlKeepBlanksDefault(int val) {
10172 int old = xmlKeepBlanksDefaultValue;
10173
10174 xmlKeepBlanksDefaultValue = val;
10175 xmlIndentTreeOutput = !val;
10176 return(old);
10177}
10178