blob: 95e7c0a65733b8d921a3562a0c5fbf7f02bc899a [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
Bjorn Reese70a9da52001-04-21 16:57:29 +000036#include "libxml.h"
37
Owen Taylor3473f882001-02-23 17:55:21 +000038#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '\\'
40#else
Owen Taylor3473f882001-02-23 17:55:21 +000041#define XML_DIR_SEP '/'
42#endif
43
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <stdlib.h>
45#include <string.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
56
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
83int xmlGetWarningsDefaultValue = 1;
84int xmlParserDebugEntities = 0;
85#ifdef VMS
86int xmlSubstituteEntitiesDefaultVal = 0;
87#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
88int xmlDoValidityCheckingDefaultVal = 0;
89#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
90#else
91int xmlSubstituteEntitiesDefaultValue = 0;
92int xmlDoValidityCheckingDefaultValue = 0;
93#endif
94int xmlLoadExtDtdDefaultValue = 0;
95int xmlPedanticParserDefaultValue = 0;
96int xmlKeepBlanksDefaultValue = 1;
97
98/*
99 * List of XML prefixed PI allowed by W3C specs
100 */
101
102const char *xmlW3CPIs[] = {
103 "xml-stylesheet",
104 NULL
105};
106
107/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
108void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
109xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
110 const xmlChar **str);
111
Daniel Veillard257d9102001-05-08 10:41:44 +0000112static int
113xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlSAXHandlerPtr sax,
114 void *user_data, int depth, const xmlChar *URL,
115 const xmlChar *ID, xmlNodePtr *list, void *private);
Owen Taylor3473f882001-02-23 17:55:21 +0000116
117/************************************************************************
118 * *
119 * Parser stacks related functions and macros *
120 * *
121 ************************************************************************/
122
123xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
124 const xmlChar ** str);
125
126/*
127 * Generic function for accessing stacks in the Parser Context
128 */
129
130#define PUSH_AND_POP(scope, type, name) \
131scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
132 if (ctxt->name##Nr >= ctxt->name##Max) { \
133 ctxt->name##Max *= 2; \
134 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
135 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
136 if (ctxt->name##Tab == NULL) { \
137 xmlGenericError(xmlGenericErrorContext, \
138 "realloc failed !\n"); \
139 return(0); \
140 } \
141 } \
142 ctxt->name##Tab[ctxt->name##Nr] = value; \
143 ctxt->name = value; \
144 return(ctxt->name##Nr++); \
145} \
146scope type name##Pop(xmlParserCtxtPtr ctxt) { \
147 type ret; \
148 if (ctxt->name##Nr <= 0) return(0); \
149 ctxt->name##Nr--; \
150 if (ctxt->name##Nr > 0) \
151 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
152 else \
153 ctxt->name = NULL; \
154 ret = ctxt->name##Tab[ctxt->name##Nr]; \
155 ctxt->name##Tab[ctxt->name##Nr] = 0; \
156 return(ret); \
157} \
158
159/*
160 * Those macros actually generate the functions
161 */
162PUSH_AND_POP(extern, xmlParserInputPtr, input)
163PUSH_AND_POP(extern, xmlNodePtr, node)
164PUSH_AND_POP(extern, xmlChar*, name)
165
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000166static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000167 if (ctxt->spaceNr >= ctxt->spaceMax) {
168 ctxt->spaceMax *= 2;
169 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
170 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
171 if (ctxt->spaceTab == NULL) {
172 xmlGenericError(xmlGenericErrorContext,
173 "realloc failed !\n");
174 return(0);
175 }
176 }
177 ctxt->spaceTab[ctxt->spaceNr] = val;
178 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
179 return(ctxt->spaceNr++);
180}
181
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000182static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000183 int ret;
184 if (ctxt->spaceNr <= 0) return(0);
185 ctxt->spaceNr--;
186 if (ctxt->spaceNr > 0)
187 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
188 else
189 ctxt->space = NULL;
190 ret = ctxt->spaceTab[ctxt->spaceNr];
191 ctxt->spaceTab[ctxt->spaceNr] = -1;
192 return(ret);
193}
194
195/*
196 * Macros for accessing the content. Those should be used only by the parser,
197 * and not exported.
198 *
199 * Dirty macros, i.e. one often need to make assumption on the context to
200 * use them
201 *
202 * CUR_PTR return the current pointer to the xmlChar to be parsed.
203 * To be used with extreme caution since operations consuming
204 * characters may move the input buffer to a different location !
205 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
206 * This should be used internally by the parser
207 * only to compare to ASCII values otherwise it would break when
208 * running with UTF-8 encoding.
209 * RAW same as CUR but in the input buffer, bypass any token
210 * extraction that may have been done
211 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
212 * to compare on ASCII based substring.
213 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
214 * strings within the parser.
215 *
216 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
217 *
218 * NEXT Skip to the next character, this does the proper decoding
219 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
220 * NEXTL(l) Skip l xmlChars in the input buffer
221 * CUR_CHAR(l) returns the current unicode character (int), set l
222 * to the number of xmlChars used for the encoding [0-5].
223 * CUR_SCHAR same but operate on a string instead of the context
224 * COPY_BUF copy the current unicode char to the target buffer, increment
225 * the index
226 * GROW, SHRINK handling of input buffers
227 */
228
229#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
230#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
231#define NXT(val) ctxt->input->cur[(val)]
232#define CUR_PTR ctxt->input->cur
233
234#define SKIP(val) do { \
235 ctxt->nbChars += (val),ctxt->input->cur += (val); \
236 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000237 if ((*ctxt->input->cur == 0) && \
238 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
239 xmlPopInput(ctxt); \
240 } while (0)
241
Daniel Veillard48b2f892001-02-25 16:11:03 +0000242#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000243 xmlParserInputShrink(ctxt->input); \
244 if ((*ctxt->input->cur == 0) && \
245 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
246 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000247 }
Owen Taylor3473f882001-02-23 17:55:21 +0000248
Daniel Veillard48b2f892001-02-25 16:11:03 +0000249#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000250 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
251 if ((*ctxt->input->cur == 0) && \
252 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
253 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000254 }
Owen Taylor3473f882001-02-23 17:55:21 +0000255
256#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
257
258#define NEXT xmlNextChar(ctxt)
259
Daniel Veillard21a0f912001-02-25 19:54:14 +0000260#define NEXT1 { \
261 ctxt->input->cur++; \
262 ctxt->nbChars++; \
263 if (*ctxt->input->cur == 0) \
264 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
265 }
266
Owen Taylor3473f882001-02-23 17:55:21 +0000267#define NEXTL(l) do { \
268 if (*(ctxt->input->cur) == '\n') { \
269 ctxt->input->line++; ctxt->input->col = 1; \
270 } else ctxt->input->col++; \
271 ctxt->token = 0; ctxt->input->cur += l; \
272 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000273 } while (0)
274
275#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
276#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
277
278#define COPY_BUF(l,b,i,v) \
279 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000280 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000281
282/**
283 * xmlSkipBlankChars:
284 * @ctxt: the XML parser context
285 *
286 * skip all blanks character found at that point in the input streams.
287 * It pops up finished entities in the process if allowable at that point.
288 *
289 * Returns the number of space chars skipped
290 */
291
292int
293xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000294 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000295
Daniel Veillard02141ea2001-04-30 11:46:40 +0000296 if (ctxt->token != 0) {
297 if (!IS_BLANK(ctxt->token))
298 return(0);
299 ctxt->token = 0;
300 res++;
301 }
Owen Taylor3473f882001-02-23 17:55:21 +0000302 /*
303 * It's Okay to use CUR/NEXT here since all the blanks are on
304 * the ASCII range.
305 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000306 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
307 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000308 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000309 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000310 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000311 cur = ctxt->input->cur;
312 while (IS_BLANK(*cur)) {
313 if (*cur == '\n') {
314 ctxt->input->line++; ctxt->input->col = 1;
315 }
316 cur++;
317 res++;
318 if (*cur == 0) {
319 ctxt->input->cur = cur;
320 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
321 cur = ctxt->input->cur;
322 }
323 }
324 ctxt->input->cur = cur;
325 } else {
326 int cur;
327 do {
328 cur = CUR;
329 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
330 NEXT;
331 cur = CUR;
332 res++;
333 }
334 while ((cur == 0) && (ctxt->inputNr > 1) &&
335 (ctxt->instate != XML_PARSER_COMMENT)) {
336 xmlPopInput(ctxt);
337 cur = CUR;
338 }
339 /*
340 * Need to handle support of entities branching here
341 */
342 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
343 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
344 }
Owen Taylor3473f882001-02-23 17:55:21 +0000345 return(res);
346}
347
348/************************************************************************
349 * *
350 * Commodity functions to handle entities *
351 * *
352 ************************************************************************/
353
354/**
355 * xmlPopInput:
356 * @ctxt: an XML parser context
357 *
358 * xmlPopInput: the current input pointed by ctxt->input came to an end
359 * pop it and return the next char.
360 *
361 * Returns the current xmlChar in the parser context
362 */
363xmlChar
364xmlPopInput(xmlParserCtxtPtr ctxt) {
365 if (ctxt->inputNr == 1) return(0); /* End of main Input */
366 if (xmlParserDebugEntities)
367 xmlGenericError(xmlGenericErrorContext,
368 "Popping input %d\n", ctxt->inputNr);
369 xmlFreeInputStream(inputPop(ctxt));
370 if ((*ctxt->input->cur == 0) &&
371 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
372 return(xmlPopInput(ctxt));
373 return(CUR);
374}
375
376/**
377 * xmlPushInput:
378 * @ctxt: an XML parser context
379 * @input: an XML parser input fragment (entity, XML fragment ...).
380 *
381 * xmlPushInput: switch to a new input stream which is stacked on top
382 * of the previous one(s).
383 */
384void
385xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
386 if (input == NULL) return;
387
388 if (xmlParserDebugEntities) {
389 if ((ctxt->input != NULL) && (ctxt->input->filename))
390 xmlGenericError(xmlGenericErrorContext,
391 "%s(%d): ", ctxt->input->filename,
392 ctxt->input->line);
393 xmlGenericError(xmlGenericErrorContext,
394 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
395 }
396 inputPush(ctxt, input);
397 GROW;
398}
399
400/**
401 * xmlParseCharRef:
402 * @ctxt: an XML parser context
403 *
404 * parse Reference declarations
405 *
406 * [66] CharRef ::= '&#' [0-9]+ ';' |
407 * '&#x' [0-9a-fA-F]+ ';'
408 *
409 * [ WFC: Legal Character ]
410 * Characters referred to using character references must match the
411 * production for Char.
412 *
413 * Returns the value parsed (as an int), 0 in case of error
414 */
415int
416xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000417 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000418 int count = 0;
419
420 if (ctxt->token != 0) {
421 val = ctxt->token;
422 ctxt->token = 0;
423 return(val);
424 }
425 /*
426 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
427 */
428 if ((RAW == '&') && (NXT(1) == '#') &&
429 (NXT(2) == 'x')) {
430 SKIP(3);
431 GROW;
432 while (RAW != ';') { /* loop blocked by count */
433 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
434 val = val * 16 + (CUR - '0');
435 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
436 val = val * 16 + (CUR - 'a') + 10;
437 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
438 val = val * 16 + (CUR - 'A') + 10;
439 else {
440 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
442 ctxt->sax->error(ctxt->userData,
443 "xmlParseCharRef: invalid hexadecimal value\n");
444 ctxt->wellFormed = 0;
445 ctxt->disableSAX = 1;
446 val = 0;
447 break;
448 }
449 NEXT;
450 count++;
451 }
452 if (RAW == ';') {
453 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
454 ctxt->nbChars ++;
455 ctxt->input->cur++;
456 }
457 } else if ((RAW == '&') && (NXT(1) == '#')) {
458 SKIP(2);
459 GROW;
460 while (RAW != ';') { /* loop blocked by count */
461 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
462 val = val * 10 + (CUR - '0');
463 else {
464 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
466 ctxt->sax->error(ctxt->userData,
467 "xmlParseCharRef: invalid decimal value\n");
468 ctxt->wellFormed = 0;
469 ctxt->disableSAX = 1;
470 val = 0;
471 break;
472 }
473 NEXT;
474 count++;
475 }
476 if (RAW == ';') {
477 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
478 ctxt->nbChars ++;
479 ctxt->input->cur++;
480 }
481 } else {
482 ctxt->errNo = XML_ERR_INVALID_CHARREF;
483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
484 ctxt->sax->error(ctxt->userData,
485 "xmlParseCharRef: invalid value\n");
486 ctxt->wellFormed = 0;
487 ctxt->disableSAX = 1;
488 }
489
490 /*
491 * [ WFC: Legal Character ]
492 * Characters referred to using character references must match the
493 * production for Char.
494 */
495 if (IS_CHAR(val)) {
496 return(val);
497 } else {
498 ctxt->errNo = XML_ERR_INVALID_CHAR;
499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
500 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
501 val);
502 ctxt->wellFormed = 0;
503 ctxt->disableSAX = 1;
504 }
505 return(0);
506}
507
508/**
509 * xmlParseStringCharRef:
510 * @ctxt: an XML parser context
511 * @str: a pointer to an index in the string
512 *
513 * parse Reference declarations, variant parsing from a string rather
514 * than an an input flow.
515 *
516 * [66] CharRef ::= '&#' [0-9]+ ';' |
517 * '&#x' [0-9a-fA-F]+ ';'
518 *
519 * [ WFC: Legal Character ]
520 * Characters referred to using character references must match the
521 * production for Char.
522 *
523 * Returns the value parsed (as an int), 0 in case of error, str will be
524 * updated to the current value of the index
525 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000526static int
Owen Taylor3473f882001-02-23 17:55:21 +0000527xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
528 const xmlChar *ptr;
529 xmlChar cur;
530 int val = 0;
531
532 if ((str == NULL) || (*str == NULL)) return(0);
533 ptr = *str;
534 cur = *ptr;
535 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
536 ptr += 3;
537 cur = *ptr;
538 while (cur != ';') { /* Non input consuming loop */
539 if ((cur >= '0') && (cur <= '9'))
540 val = val * 16 + (cur - '0');
541 else if ((cur >= 'a') && (cur <= 'f'))
542 val = val * 16 + (cur - 'a') + 10;
543 else if ((cur >= 'A') && (cur <= 'F'))
544 val = val * 16 + (cur - 'A') + 10;
545 else {
546 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
548 ctxt->sax->error(ctxt->userData,
549 "xmlParseStringCharRef: invalid hexadecimal value\n");
550 ctxt->wellFormed = 0;
551 ctxt->disableSAX = 1;
552 val = 0;
553 break;
554 }
555 ptr++;
556 cur = *ptr;
557 }
558 if (cur == ';')
559 ptr++;
560 } else if ((cur == '&') && (ptr[1] == '#')){
561 ptr += 2;
562 cur = *ptr;
563 while (cur != ';') { /* Non input consuming loops */
564 if ((cur >= '0') && (cur <= '9'))
565 val = val * 10 + (cur - '0');
566 else {
567 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
569 ctxt->sax->error(ctxt->userData,
570 "xmlParseStringCharRef: invalid decimal value\n");
571 ctxt->wellFormed = 0;
572 ctxt->disableSAX = 1;
573 val = 0;
574 break;
575 }
576 ptr++;
577 cur = *ptr;
578 }
579 if (cur == ';')
580 ptr++;
581 } else {
582 ctxt->errNo = XML_ERR_INVALID_CHARREF;
583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
584 ctxt->sax->error(ctxt->userData,
585 "xmlParseCharRef: invalid value\n");
586 ctxt->wellFormed = 0;
587 ctxt->disableSAX = 1;
588 return(0);
589 }
590 *str = ptr;
591
592 /*
593 * [ WFC: Legal Character ]
594 * Characters referred to using character references must match the
595 * production for Char.
596 */
597 if (IS_CHAR(val)) {
598 return(val);
599 } else {
600 ctxt->errNo = XML_ERR_INVALID_CHAR;
601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
602 ctxt->sax->error(ctxt->userData,
603 "CharRef: invalid xmlChar value %d\n", val);
604 ctxt->wellFormed = 0;
605 ctxt->disableSAX = 1;
606 }
607 return(0);
608}
609
610/**
611 * xmlParserHandlePEReference:
612 * @ctxt: the parser context
613 *
614 * [69] PEReference ::= '%' Name ';'
615 *
616 * [ WFC: No Recursion ]
617 * A parsed entity must not contain a recursive
618 * reference to itself, either directly or indirectly.
619 *
620 * [ WFC: Entity Declared ]
621 * In a document without any DTD, a document with only an internal DTD
622 * subset which contains no parameter entity references, or a document
623 * with "standalone='yes'", ... ... The declaration of a parameter
624 * entity must precede any reference to it...
625 *
626 * [ VC: Entity Declared ]
627 * In a document with an external subset or external parameter entities
628 * with "standalone='no'", ... ... The declaration of a parameter entity
629 * must precede any reference to it...
630 *
631 * [ WFC: In DTD ]
632 * Parameter-entity references may only appear in the DTD.
633 * NOTE: misleading but this is handled.
634 *
635 * A PEReference may have been detected in the current input stream
636 * the handling is done accordingly to
637 * http://www.w3.org/TR/REC-xml#entproc
638 * i.e.
639 * - Included in literal in entity values
640 * - Included as Paraemeter Entity reference within DTDs
641 */
642void
643xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
644 xmlChar *name;
645 xmlEntityPtr entity = NULL;
646 xmlParserInputPtr input;
647
648 if (ctxt->token != 0) {
649 return;
650 }
651 if (RAW != '%') return;
652 switch(ctxt->instate) {
653 case XML_PARSER_CDATA_SECTION:
654 return;
655 case XML_PARSER_COMMENT:
656 return;
657 case XML_PARSER_START_TAG:
658 return;
659 case XML_PARSER_END_TAG:
660 return;
661 case XML_PARSER_EOF:
662 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
664 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
665 ctxt->wellFormed = 0;
666 ctxt->disableSAX = 1;
667 return;
668 case XML_PARSER_PROLOG:
669 case XML_PARSER_START:
670 case XML_PARSER_MISC:
671 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
673 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
674 ctxt->wellFormed = 0;
675 ctxt->disableSAX = 1;
676 return;
677 case XML_PARSER_ENTITY_DECL:
678 case XML_PARSER_CONTENT:
679 case XML_PARSER_ATTRIBUTE_VALUE:
680 case XML_PARSER_PI:
681 case XML_PARSER_SYSTEM_LITERAL:
682 /* we just ignore it there */
683 return;
684 case XML_PARSER_EPILOG:
685 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
687 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
688 ctxt->wellFormed = 0;
689 ctxt->disableSAX = 1;
690 return;
691 case XML_PARSER_ENTITY_VALUE:
692 /*
693 * NOTE: in the case of entity values, we don't do the
694 * substitution here since we need the literal
695 * entity value to be able to save the internal
696 * subset of the document.
697 * This will be handled by xmlStringDecodeEntities
698 */
699 return;
700 case XML_PARSER_DTD:
701 /*
702 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
703 * In the internal DTD subset, parameter-entity references
704 * can occur only where markup declarations can occur, not
705 * within markup declarations.
706 * In that case this is handled in xmlParseMarkupDecl
707 */
708 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
709 return;
710 break;
711 case XML_PARSER_IGNORE:
712 return;
713 }
714
715 NEXT;
716 name = xmlParseName(ctxt);
717 if (xmlParserDebugEntities)
718 xmlGenericError(xmlGenericErrorContext,
719 "PE Reference: %s\n", name);
720 if (name == NULL) {
721 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
723 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
724 ctxt->wellFormed = 0;
725 ctxt->disableSAX = 1;
726 } else {
727 if (RAW == ';') {
728 NEXT;
729 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
730 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
731 if (entity == NULL) {
732
733 /*
734 * [ WFC: Entity Declared ]
735 * In a document without any DTD, a document with only an
736 * internal DTD subset which contains no parameter entity
737 * references, or a document with "standalone='yes'", ...
738 * ... The declaration of a parameter entity must precede
739 * any reference to it...
740 */
741 if ((ctxt->standalone == 1) ||
742 ((ctxt->hasExternalSubset == 0) &&
743 (ctxt->hasPErefs == 0))) {
744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
745 ctxt->sax->error(ctxt->userData,
746 "PEReference: %%%s; not found\n", name);
747 ctxt->wellFormed = 0;
748 ctxt->disableSAX = 1;
749 } else {
750 /*
751 * [ VC: Entity Declared ]
752 * In a document with an external subset or external
753 * parameter entities with "standalone='no'", ...
754 * ... The declaration of a parameter entity must precede
755 * any reference to it...
756 */
757 if ((!ctxt->disableSAX) &&
758 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
759 ctxt->vctxt.error(ctxt->vctxt.userData,
760 "PEReference: %%%s; not found\n", name);
761 } else if ((!ctxt->disableSAX) &&
762 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
763 ctxt->sax->warning(ctxt->userData,
764 "PEReference: %%%s; not found\n", name);
765 ctxt->valid = 0;
766 }
767 } else {
768 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
769 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
770 /*
771 * handle the extra spaces added before and after
772 * c.f. http://www.w3.org/TR/REC-xml#as-PE
773 * this is done independantly.
774 */
775 input = xmlNewEntityInputStream(ctxt, entity);
776 xmlPushInput(ctxt, input);
777 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
778 (RAW == '<') && (NXT(1) == '?') &&
779 (NXT(2) == 'x') && (NXT(3) == 'm') &&
780 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
781 xmlParseTextDecl(ctxt);
782 }
783 if (ctxt->token == 0)
784 ctxt->token = ' ';
785 } else {
786 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
787 ctxt->sax->error(ctxt->userData,
788 "xmlHandlePEReference: %s is not a parameter entity\n",
789 name);
790 ctxt->wellFormed = 0;
791 ctxt->disableSAX = 1;
792 }
793 }
794 } else {
795 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
797 ctxt->sax->error(ctxt->userData,
798 "xmlHandlePEReference: expecting ';'\n");
799 ctxt->wellFormed = 0;
800 ctxt->disableSAX = 1;
801 }
802 xmlFree(name);
803 }
804}
805
806/*
807 * Macro used to grow the current buffer.
808 */
809#define growBuffer(buffer) { \
810 buffer##_size *= 2; \
811 buffer = (xmlChar *) \
812 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
813 if (buffer == NULL) { \
814 perror("realloc failed"); \
815 return(NULL); \
816 } \
817}
818
819/**
820 * xmlStringDecodeEntities:
821 * @ctxt: the parser context
822 * @str: the input string
823 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
824 * @end: an end marker xmlChar, 0 if none
825 * @end2: an end marker xmlChar, 0 if none
826 * @end3: an end marker xmlChar, 0 if none
827 *
828 * Takes a entity string content and process to do the adequate subtitutions.
829 *
830 * [67] Reference ::= EntityRef | CharRef
831 *
832 * [69] PEReference ::= '%' Name ';'
833 *
834 * Returns A newly allocated string with the substitution done. The caller
835 * must deallocate it !
836 */
837xmlChar *
838xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
839 xmlChar end, xmlChar end2, xmlChar end3) {
840 xmlChar *buffer = NULL;
841 int buffer_size = 0;
842
843 xmlChar *current = NULL;
844 xmlEntityPtr ent;
845 int c,l;
846 int nbchars = 0;
847
848 if (str == NULL)
849 return(NULL);
850
851 if (ctxt->depth > 40) {
852 ctxt->errNo = XML_ERR_ENTITY_LOOP;
853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
854 ctxt->sax->error(ctxt->userData,
855 "Detected entity reference loop\n");
856 ctxt->wellFormed = 0;
857 ctxt->disableSAX = 1;
858 return(NULL);
859 }
860
861 /*
862 * allocate a translation buffer.
863 */
864 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
865 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
866 if (buffer == NULL) {
867 perror("xmlDecodeEntities: malloc failed");
868 return(NULL);
869 }
870
871 /*
872 * Ok loop until we reach one of the ending char or a size limit.
873 * we are operating on already parsed values.
874 */
875 c = CUR_SCHAR(str, l);
876 while ((c != 0) && (c != end) && /* non input consuming loop */
877 (c != end2) && (c != end3)) {
878
879 if (c == 0) break;
880 if ((c == '&') && (str[1] == '#')) {
881 int val = xmlParseStringCharRef(ctxt, &str);
882 if (val != 0) {
883 COPY_BUF(0,buffer,nbchars,val);
884 }
885 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
886 if (xmlParserDebugEntities)
887 xmlGenericError(xmlGenericErrorContext,
888 "String decoding Entity Reference: %.30s\n",
889 str);
890 ent = xmlParseStringEntityRef(ctxt, &str);
891 if ((ent != NULL) &&
892 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
893 if (ent->content != NULL) {
894 COPY_BUF(0,buffer,nbchars,ent->content[0]);
895 } else {
896 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
897 ctxt->sax->error(ctxt->userData,
898 "internal error entity has no content\n");
899 }
900 } else if ((ent != NULL) && (ent->content != NULL)) {
901 xmlChar *rep;
902
903 ctxt->depth++;
904 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
905 0, 0, 0);
906 ctxt->depth--;
907 if (rep != NULL) {
908 current = rep;
909 while (*current != 0) { /* non input consuming loop */
910 buffer[nbchars++] = *current++;
911 if (nbchars >
912 buffer_size - XML_PARSER_BUFFER_SIZE) {
913 growBuffer(buffer);
914 }
915 }
916 xmlFree(rep);
917 }
918 } else if (ent != NULL) {
919 int i = xmlStrlen(ent->name);
920 const xmlChar *cur = ent->name;
921
922 buffer[nbchars++] = '&';
923 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
924 growBuffer(buffer);
925 }
926 for (;i > 0;i--)
927 buffer[nbchars++] = *cur++;
928 buffer[nbchars++] = ';';
929 }
930 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
931 if (xmlParserDebugEntities)
932 xmlGenericError(xmlGenericErrorContext,
933 "String decoding PE Reference: %.30s\n", str);
934 ent = xmlParseStringPEReference(ctxt, &str);
935 if (ent != NULL) {
936 xmlChar *rep;
937
938 ctxt->depth++;
939 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
940 0, 0, 0);
941 ctxt->depth--;
942 if (rep != NULL) {
943 current = rep;
944 while (*current != 0) { /* non input consuming loop */
945 buffer[nbchars++] = *current++;
946 if (nbchars >
947 buffer_size - XML_PARSER_BUFFER_SIZE) {
948 growBuffer(buffer);
949 }
950 }
951 xmlFree(rep);
952 }
953 }
954 } else {
955 COPY_BUF(l,buffer,nbchars,c);
956 str += l;
957 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
958 growBuffer(buffer);
959 }
960 }
961 c = CUR_SCHAR(str, l);
962 }
963 buffer[nbchars++] = 0;
964 return(buffer);
965}
966
967
968/************************************************************************
969 * *
970 * Commodity functions to handle xmlChars *
971 * *
972 ************************************************************************/
973
974/**
975 * xmlStrndup:
976 * @cur: the input xmlChar *
977 * @len: the len of @cur
978 *
979 * a strndup for array of xmlChar's
980 *
981 * Returns a new xmlChar * or NULL
982 */
983xmlChar *
984xmlStrndup(const xmlChar *cur, int len) {
985 xmlChar *ret;
986
987 if ((cur == NULL) || (len < 0)) return(NULL);
988 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
989 if (ret == NULL) {
990 xmlGenericError(xmlGenericErrorContext,
991 "malloc of %ld byte failed\n",
992 (len + 1) * (long)sizeof(xmlChar));
993 return(NULL);
994 }
995 memcpy(ret, cur, len * sizeof(xmlChar));
996 ret[len] = 0;
997 return(ret);
998}
999
1000/**
1001 * xmlStrdup:
1002 * @cur: the input xmlChar *
1003 *
1004 * a strdup for array of xmlChar's. Since they are supposed to be
1005 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1006 * a termination mark of '0'.
1007 *
1008 * Returns a new xmlChar * or NULL
1009 */
1010xmlChar *
1011xmlStrdup(const xmlChar *cur) {
1012 const xmlChar *p = cur;
1013
1014 if (cur == NULL) return(NULL);
1015 while (*p != 0) p++; /* non input consuming */
1016 return(xmlStrndup(cur, p - cur));
1017}
1018
1019/**
1020 * xmlCharStrndup:
1021 * @cur: the input char *
1022 * @len: the len of @cur
1023 *
1024 * a strndup for char's to xmlChar's
1025 *
1026 * Returns a new xmlChar * or NULL
1027 */
1028
1029xmlChar *
1030xmlCharStrndup(const char *cur, int len) {
1031 int i;
1032 xmlChar *ret;
1033
1034 if ((cur == NULL) || (len < 0)) return(NULL);
1035 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1036 if (ret == NULL) {
1037 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1038 (len + 1) * (long)sizeof(xmlChar));
1039 return(NULL);
1040 }
1041 for (i = 0;i < len;i++)
1042 ret[i] = (xmlChar) cur[i];
1043 ret[len] = 0;
1044 return(ret);
1045}
1046
1047/**
1048 * xmlCharStrdup:
1049 * @cur: the input char *
1050 * @len: the len of @cur
1051 *
1052 * a strdup for char's to xmlChar's
1053 *
1054 * Returns a new xmlChar * or NULL
1055 */
1056
1057xmlChar *
1058xmlCharStrdup(const char *cur) {
1059 const char *p = cur;
1060
1061 if (cur == NULL) return(NULL);
1062 while (*p != '\0') p++; /* non input consuming */
1063 return(xmlCharStrndup(cur, p - cur));
1064}
1065
1066/**
1067 * xmlStrcmp:
1068 * @str1: the first xmlChar *
1069 * @str2: the second xmlChar *
1070 *
1071 * a strcmp for xmlChar's
1072 *
1073 * Returns the integer result of the comparison
1074 */
1075
1076int
1077xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1078 register int tmp;
1079
1080 if (str1 == str2) return(0);
1081 if (str1 == NULL) return(-1);
1082 if (str2 == NULL) return(1);
1083 do {
1084 tmp = *str1++ - *str2;
1085 if (tmp != 0) return(tmp);
1086 } while (*str2++ != 0);
1087 return 0;
1088}
1089
1090/**
1091 * xmlStrEqual:
1092 * @str1: the first xmlChar *
1093 * @str2: the second xmlChar *
1094 *
1095 * Check if both string are equal of have same content
1096 * Should be a bit more readable and faster than xmlStrEqual()
1097 *
1098 * Returns 1 if they are equal, 0 if they are different
1099 */
1100
1101int
1102xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1103 if (str1 == str2) return(1);
1104 if (str1 == NULL) return(0);
1105 if (str2 == NULL) return(0);
1106 do {
1107 if (*str1++ != *str2) return(0);
1108 } while (*str2++);
1109 return(1);
1110}
1111
1112/**
1113 * xmlStrncmp:
1114 * @str1: the first xmlChar *
1115 * @str2: the second xmlChar *
1116 * @len: the max comparison length
1117 *
1118 * a strncmp for xmlChar's
1119 *
1120 * Returns the integer result of the comparison
1121 */
1122
1123int
1124xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1125 register int tmp;
1126
1127 if (len <= 0) return(0);
1128 if (str1 == str2) return(0);
1129 if (str1 == NULL) return(-1);
1130 if (str2 == NULL) return(1);
1131 do {
1132 tmp = *str1++ - *str2;
1133 if (tmp != 0 || --len == 0) return(tmp);
1134 } while (*str2++ != 0);
1135 return 0;
1136}
1137
1138static xmlChar casemap[256] = {
1139 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1140 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1141 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1142 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1143 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1144 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1145 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1146 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1147 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1148 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1149 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1150 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1151 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1152 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1153 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1154 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1155 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1156 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1157 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1158 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1159 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1160 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1161 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1162 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1163 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1164 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1165 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1166 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1167 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1168 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1169 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1170 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1171};
1172
1173/**
1174 * xmlStrcasecmp:
1175 * @str1: the first xmlChar *
1176 * @str2: the second xmlChar *
1177 *
1178 * a strcasecmp for xmlChar's
1179 *
1180 * Returns the integer result of the comparison
1181 */
1182
1183int
1184xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1185 register int tmp;
1186
1187 if (str1 == str2) return(0);
1188 if (str1 == NULL) return(-1);
1189 if (str2 == NULL) return(1);
1190 do {
1191 tmp = casemap[*str1++] - casemap[*str2];
1192 if (tmp != 0) return(tmp);
1193 } while (*str2++ != 0);
1194 return 0;
1195}
1196
1197/**
1198 * xmlStrncasecmp:
1199 * @str1: the first xmlChar *
1200 * @str2: the second xmlChar *
1201 * @len: the max comparison length
1202 *
1203 * a strncasecmp for xmlChar's
1204 *
1205 * Returns the integer result of the comparison
1206 */
1207
1208int
1209xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1210 register int tmp;
1211
1212 if (len <= 0) return(0);
1213 if (str1 == str2) return(0);
1214 if (str1 == NULL) return(-1);
1215 if (str2 == NULL) return(1);
1216 do {
1217 tmp = casemap[*str1++] - casemap[*str2];
1218 if (tmp != 0 || --len == 0) return(tmp);
1219 } while (*str2++ != 0);
1220 return 0;
1221}
1222
1223/**
1224 * xmlStrchr:
1225 * @str: the xmlChar * array
1226 * @val: the xmlChar to search
1227 *
1228 * a strchr for xmlChar's
1229 *
1230 * Returns the xmlChar * for the first occurence or NULL.
1231 */
1232
1233const xmlChar *
1234xmlStrchr(const xmlChar *str, xmlChar val) {
1235 if (str == NULL) return(NULL);
1236 while (*str != 0) { /* non input consuming */
1237 if (*str == val) return((xmlChar *) str);
1238 str++;
1239 }
1240 return(NULL);
1241}
1242
1243/**
1244 * xmlStrstr:
1245 * @str: the xmlChar * array (haystack)
1246 * @val: the xmlChar to search (needle)
1247 *
1248 * a strstr for xmlChar's
1249 *
1250 * Returns the xmlChar * for the first occurence or NULL.
1251 */
1252
1253const xmlChar *
1254xmlStrstr(const xmlChar *str, xmlChar *val) {
1255 int n;
1256
1257 if (str == NULL) return(NULL);
1258 if (val == NULL) return(NULL);
1259 n = xmlStrlen(val);
1260
1261 if (n == 0) return(str);
1262 while (*str != 0) { /* non input consuming */
1263 if (*str == *val) {
1264 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1265 }
1266 str++;
1267 }
1268 return(NULL);
1269}
1270
1271/**
1272 * xmlStrcasestr:
1273 * @str: the xmlChar * array (haystack)
1274 * @val: the xmlChar to search (needle)
1275 *
1276 * a case-ignoring strstr for xmlChar's
1277 *
1278 * Returns the xmlChar * for the first occurence or NULL.
1279 */
1280
1281const xmlChar *
1282xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1283 int n;
1284
1285 if (str == NULL) return(NULL);
1286 if (val == NULL) return(NULL);
1287 n = xmlStrlen(val);
1288
1289 if (n == 0) return(str);
1290 while (*str != 0) { /* non input consuming */
1291 if (casemap[*str] == casemap[*val])
1292 if (!xmlStrncasecmp(str, val, n)) return(str);
1293 str++;
1294 }
1295 return(NULL);
1296}
1297
1298/**
1299 * xmlStrsub:
1300 * @str: the xmlChar * array (haystack)
1301 * @start: the index of the first char (zero based)
1302 * @len: the length of the substring
1303 *
1304 * Extract a substring of a given string
1305 *
1306 * Returns the xmlChar * for the first occurence or NULL.
1307 */
1308
1309xmlChar *
1310xmlStrsub(const xmlChar *str, int start, int len) {
1311 int i;
1312
1313 if (str == NULL) return(NULL);
1314 if (start < 0) return(NULL);
1315 if (len < 0) return(NULL);
1316
1317 for (i = 0;i < start;i++) {
1318 if (*str == 0) return(NULL);
1319 str++;
1320 }
1321 if (*str == 0) return(NULL);
1322 return(xmlStrndup(str, len));
1323}
1324
1325/**
1326 * xmlStrlen:
1327 * @str: the xmlChar * array
1328 *
1329 * length of a xmlChar's string
1330 *
1331 * Returns the number of xmlChar contained in the ARRAY.
1332 */
1333
1334int
1335xmlStrlen(const xmlChar *str) {
1336 int len = 0;
1337
1338 if (str == NULL) return(0);
1339 while (*str != 0) { /* non input consuming */
1340 str++;
1341 len++;
1342 }
1343 return(len);
1344}
1345
1346/**
1347 * xmlStrncat:
1348 * @cur: the original xmlChar * array
1349 * @add: the xmlChar * array added
1350 * @len: the length of @add
1351 *
1352 * a strncat for array of xmlChar's, it will extend cur with the len
1353 * first bytes of @add.
1354 *
1355 * Returns a new xmlChar *, the original @cur is reallocated if needed
1356 * and should not be freed
1357 */
1358
1359xmlChar *
1360xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1361 int size;
1362 xmlChar *ret;
1363
1364 if ((add == NULL) || (len == 0))
1365 return(cur);
1366 if (cur == NULL)
1367 return(xmlStrndup(add, len));
1368
1369 size = xmlStrlen(cur);
1370 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1371 if (ret == NULL) {
1372 xmlGenericError(xmlGenericErrorContext,
1373 "xmlStrncat: realloc of %ld byte failed\n",
1374 (size + len + 1) * (long)sizeof(xmlChar));
1375 return(cur);
1376 }
1377 memcpy(&ret[size], add, len * sizeof(xmlChar));
1378 ret[size + len] = 0;
1379 return(ret);
1380}
1381
1382/**
1383 * xmlStrcat:
1384 * @cur: the original xmlChar * array
1385 * @add: the xmlChar * array added
1386 *
1387 * a strcat for array of xmlChar's. Since they are supposed to be
1388 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1389 * a termination mark of '0'.
1390 *
1391 * Returns a new xmlChar * containing the concatenated string.
1392 */
1393xmlChar *
1394xmlStrcat(xmlChar *cur, const xmlChar *add) {
1395 const xmlChar *p = add;
1396
1397 if (add == NULL) return(cur);
1398 if (cur == NULL)
1399 return(xmlStrdup(add));
1400
1401 while (*p != 0) p++; /* non input consuming */
1402 return(xmlStrncat(cur, add, p - add));
1403}
1404
1405/************************************************************************
1406 * *
1407 * Commodity functions, cleanup needed ? *
1408 * *
1409 ************************************************************************/
1410
1411/**
1412 * areBlanks:
1413 * @ctxt: an XML parser context
1414 * @str: a xmlChar *
1415 * @len: the size of @str
1416 *
1417 * Is this a sequence of blank chars that one can ignore ?
1418 *
1419 * Returns 1 if ignorable 0 otherwise.
1420 */
1421
1422static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1423 int i, ret;
1424 xmlNodePtr lastChild;
1425
Daniel Veillard2f362242001-03-02 17:36:21 +00001426 if (ctxt->keepBlanks)
1427 return(0);
1428
Owen Taylor3473f882001-02-23 17:55:21 +00001429 /*
1430 * Check for xml:space value.
1431 */
1432 if (*(ctxt->space) == 1)
1433 return(0);
1434
1435 /*
1436 * Check that the string is made of blanks
1437 */
1438 for (i = 0;i < len;i++)
1439 if (!(IS_BLANK(str[i]))) return(0);
1440
1441 /*
1442 * Look if the element is mixed content in the Dtd if available
1443 */
1444 if (ctxt->myDoc != NULL) {
1445 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1446 if (ret == 0) return(1);
1447 if (ret == 1) return(0);
1448 }
1449
1450 /*
1451 * Otherwise, heuristic :-\
1452 */
Owen Taylor3473f882001-02-23 17:55:21 +00001453 if (RAW != '<') return(0);
1454 if (ctxt->node == NULL) return(0);
1455 if ((ctxt->node->children == NULL) &&
1456 (RAW == '<') && (NXT(1) == '/')) return(0);
1457
1458 lastChild = xmlGetLastChild(ctxt->node);
1459 if (lastChild == NULL) {
1460 if (ctxt->node->content != NULL) return(0);
1461 } else if (xmlNodeIsText(lastChild))
1462 return(0);
1463 else if ((ctxt->node->children != NULL) &&
1464 (xmlNodeIsText(ctxt->node->children)))
1465 return(0);
1466 return(1);
1467}
1468
1469/*
1470 * Forward definition for recusive behaviour.
1471 */
1472void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1473void xmlParseReference(xmlParserCtxtPtr ctxt);
1474
1475/************************************************************************
1476 * *
1477 * Extra stuff for namespace support *
1478 * Relates to http://www.w3.org/TR/WD-xml-names *
1479 * *
1480 ************************************************************************/
1481
1482/**
1483 * xmlSplitQName:
1484 * @ctxt: an XML parser context
1485 * @name: an XML parser context
1486 * @prefix: a xmlChar **
1487 *
1488 * parse an UTF8 encoded XML qualified name string
1489 *
1490 * [NS 5] QName ::= (Prefix ':')? LocalPart
1491 *
1492 * [NS 6] Prefix ::= NCName
1493 *
1494 * [NS 7] LocalPart ::= NCName
1495 *
1496 * Returns the local part, and prefix is updated
1497 * to get the Prefix if any.
1498 */
1499
1500xmlChar *
1501xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1502 xmlChar buf[XML_MAX_NAMELEN + 5];
1503 xmlChar *buffer = NULL;
1504 int len = 0;
1505 int max = XML_MAX_NAMELEN;
1506 xmlChar *ret = NULL;
1507 const xmlChar *cur = name;
1508 int c;
1509
1510 *prefix = NULL;
1511
1512#ifndef XML_XML_NAMESPACE
1513 /* xml: prefix is not really a namespace */
1514 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1515 (cur[2] == 'l') && (cur[3] == ':'))
1516 return(xmlStrdup(name));
1517#endif
1518
1519 /* nasty but valid */
1520 if (cur[0] == ':')
1521 return(xmlStrdup(name));
1522
1523 c = *cur++;
1524 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1525 buf[len++] = c;
1526 c = *cur++;
1527 }
1528 if (len >= max) {
1529 /*
1530 * Okay someone managed to make a huge name, so he's ready to pay
1531 * for the processing speed.
1532 */
1533 max = len * 2;
1534
1535 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1536 if (buffer == NULL) {
1537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1538 ctxt->sax->error(ctxt->userData,
1539 "xmlSplitQName: out of memory\n");
1540 return(NULL);
1541 }
1542 memcpy(buffer, buf, len);
1543 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1544 if (len + 10 > max) {
1545 max *= 2;
1546 buffer = (xmlChar *) xmlRealloc(buffer,
1547 max * sizeof(xmlChar));
1548 if (buffer == NULL) {
1549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1550 ctxt->sax->error(ctxt->userData,
1551 "xmlSplitQName: out of memory\n");
1552 return(NULL);
1553 }
1554 }
1555 buffer[len++] = c;
1556 c = *cur++;
1557 }
1558 buffer[len] = 0;
1559 }
1560
1561 if (buffer == NULL)
1562 ret = xmlStrndup(buf, len);
1563 else {
1564 ret = buffer;
1565 buffer = NULL;
1566 max = XML_MAX_NAMELEN;
1567 }
1568
1569
1570 if (c == ':') {
1571 c = *cur++;
1572 if (c == 0) return(ret);
1573 *prefix = ret;
1574 len = 0;
1575
1576 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1577 buf[len++] = c;
1578 c = *cur++;
1579 }
1580 if (len >= max) {
1581 /*
1582 * Okay someone managed to make a huge name, so he's ready to pay
1583 * for the processing speed.
1584 */
1585 max = len * 2;
1586
1587 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1588 if (buffer == NULL) {
1589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1590 ctxt->sax->error(ctxt->userData,
1591 "xmlSplitQName: out of memory\n");
1592 return(NULL);
1593 }
1594 memcpy(buffer, buf, len);
1595 while (c != 0) { /* tested bigname2.xml */
1596 if (len + 10 > max) {
1597 max *= 2;
1598 buffer = (xmlChar *) xmlRealloc(buffer,
1599 max * sizeof(xmlChar));
1600 if (buffer == NULL) {
1601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1602 ctxt->sax->error(ctxt->userData,
1603 "xmlSplitQName: out of memory\n");
1604 return(NULL);
1605 }
1606 }
1607 buffer[len++] = c;
1608 c = *cur++;
1609 }
1610 buffer[len] = 0;
1611 }
1612
1613 if (buffer == NULL)
1614 ret = xmlStrndup(buf, len);
1615 else {
1616 ret = buffer;
1617 }
1618 }
1619
1620 return(ret);
1621}
1622
1623/************************************************************************
1624 * *
1625 * The parser itself *
1626 * Relates to http://www.w3.org/TR/REC-xml *
1627 * *
1628 ************************************************************************/
1629
Daniel Veillard21a0f912001-02-25 19:54:14 +00001630xmlChar *xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001631/**
1632 * xmlParseName:
1633 * @ctxt: an XML parser context
1634 *
1635 * parse an XML name.
1636 *
1637 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1638 * CombiningChar | Extender
1639 *
1640 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1641 *
1642 * [6] Names ::= Name (S Name)*
1643 *
1644 * Returns the Name parsed or NULL
1645 */
1646
1647xmlChar *
1648xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001649 const xmlChar *in;
1650 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001651 int count = 0;
1652
1653 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001654
1655 /*
1656 * Accelerator for simple ASCII names
1657 */
1658 in = ctxt->input->cur;
1659 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1660 ((*in >= 0x41) && (*in <= 0x5A)) ||
1661 (*in == '_') || (*in == ':')) {
1662 in++;
1663 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1664 ((*in >= 0x41) && (*in <= 0x5A)) ||
1665 ((*in >= 0x30) && (*in <= 0x39)) ||
1666 (*in == '_') || (*in == ':'))
1667 in++;
1668 if ((*in == ' ') || (*in == '>') || (*in == '/')) {
1669 count = in - ctxt->input->cur;
1670 ret = xmlStrndup(ctxt->input->cur, count);
1671 ctxt->input->cur = in;
1672 return(ret);
1673 }
1674 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001675 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001676}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001677
Daniel Veillard21a0f912001-02-25 19:54:14 +00001678xmlChar *
1679xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1680 xmlChar buf[XML_MAX_NAMELEN + 5];
1681 int len = 0, l;
1682 int c;
1683 int count = 0;
1684
1685 /*
1686 * Handler for more complex cases
1687 */
1688 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001689 c = CUR_CHAR(l);
1690 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1691 (!IS_LETTER(c) && (c != '_') &&
1692 (c != ':'))) {
1693 return(NULL);
1694 }
1695
1696 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1697 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1698 (c == '.') || (c == '-') ||
1699 (c == '_') || (c == ':') ||
1700 (IS_COMBINING(c)) ||
1701 (IS_EXTENDER(c)))) {
1702 if (count++ > 100) {
1703 count = 0;
1704 GROW;
1705 }
1706 COPY_BUF(l,buf,len,c);
1707 NEXTL(l);
1708 c = CUR_CHAR(l);
1709 if (len >= XML_MAX_NAMELEN) {
1710 /*
1711 * Okay someone managed to make a huge name, so he's ready to pay
1712 * for the processing speed.
1713 */
1714 xmlChar *buffer;
1715 int max = len * 2;
1716
1717 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1718 if (buffer == NULL) {
1719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1720 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001721 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001722 return(NULL);
1723 }
1724 memcpy(buffer, buf, len);
1725 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1726 (c == '.') || (c == '-') ||
1727 (c == '_') || (c == ':') ||
1728 (IS_COMBINING(c)) ||
1729 (IS_EXTENDER(c))) {
1730 if (count++ > 100) {
1731 count = 0;
1732 GROW;
1733 }
1734 if (len + 10 > max) {
1735 max *= 2;
1736 buffer = (xmlChar *) xmlRealloc(buffer,
1737 max * sizeof(xmlChar));
1738 if (buffer == NULL) {
1739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1740 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001741 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001742 return(NULL);
1743 }
1744 }
1745 COPY_BUF(l,buffer,len,c);
1746 NEXTL(l);
1747 c = CUR_CHAR(l);
1748 }
1749 buffer[len] = 0;
1750 return(buffer);
1751 }
1752 }
1753 return(xmlStrndup(buf, len));
1754}
1755
1756/**
1757 * xmlParseStringName:
1758 * @ctxt: an XML parser context
1759 * @str: a pointer to the string pointer (IN/OUT)
1760 *
1761 * parse an XML name.
1762 *
1763 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1764 * CombiningChar | Extender
1765 *
1766 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1767 *
1768 * [6] Names ::= Name (S Name)*
1769 *
1770 * Returns the Name parsed or NULL. The str pointer
1771 * is updated to the current location in the string.
1772 */
1773
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001774static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001775xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1776 xmlChar buf[XML_MAX_NAMELEN + 5];
1777 const xmlChar *cur = *str;
1778 int len = 0, l;
1779 int c;
1780
1781 c = CUR_SCHAR(cur, l);
1782 if (!IS_LETTER(c) && (c != '_') &&
1783 (c != ':')) {
1784 return(NULL);
1785 }
1786
1787 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1788 (c == '.') || (c == '-') ||
1789 (c == '_') || (c == ':') ||
1790 (IS_COMBINING(c)) ||
1791 (IS_EXTENDER(c))) {
1792 COPY_BUF(l,buf,len,c);
1793 cur += l;
1794 c = CUR_SCHAR(cur, l);
1795 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1796 /*
1797 * Okay someone managed to make a huge name, so he's ready to pay
1798 * for the processing speed.
1799 */
1800 xmlChar *buffer;
1801 int max = len * 2;
1802
1803 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1804 if (buffer == NULL) {
1805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1806 ctxt->sax->error(ctxt->userData,
1807 "xmlParseStringName: out of memory\n");
1808 return(NULL);
1809 }
1810 memcpy(buffer, buf, len);
1811 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1812 (c == '.') || (c == '-') ||
1813 (c == '_') || (c == ':') ||
1814 (IS_COMBINING(c)) ||
1815 (IS_EXTENDER(c))) {
1816 if (len + 10 > max) {
1817 max *= 2;
1818 buffer = (xmlChar *) xmlRealloc(buffer,
1819 max * sizeof(xmlChar));
1820 if (buffer == NULL) {
1821 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1822 ctxt->sax->error(ctxt->userData,
1823 "xmlParseStringName: out of memory\n");
1824 return(NULL);
1825 }
1826 }
1827 COPY_BUF(l,buffer,len,c);
1828 cur += l;
1829 c = CUR_SCHAR(cur, l);
1830 }
1831 buffer[len] = 0;
1832 *str = cur;
1833 return(buffer);
1834 }
1835 }
1836 *str = cur;
1837 return(xmlStrndup(buf, len));
1838}
1839
1840/**
1841 * xmlParseNmtoken:
1842 * @ctxt: an XML parser context
1843 *
1844 * parse an XML Nmtoken.
1845 *
1846 * [7] Nmtoken ::= (NameChar)+
1847 *
1848 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1849 *
1850 * Returns the Nmtoken parsed or NULL
1851 */
1852
1853xmlChar *
1854xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1855 xmlChar buf[XML_MAX_NAMELEN + 5];
1856 int len = 0, l;
1857 int c;
1858 int count = 0;
1859
1860 GROW;
1861 c = CUR_CHAR(l);
1862
1863 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1864 (c == '.') || (c == '-') ||
1865 (c == '_') || (c == ':') ||
1866 (IS_COMBINING(c)) ||
1867 (IS_EXTENDER(c))) {
1868 if (count++ > 100) {
1869 count = 0;
1870 GROW;
1871 }
1872 COPY_BUF(l,buf,len,c);
1873 NEXTL(l);
1874 c = CUR_CHAR(l);
1875 if (len >= XML_MAX_NAMELEN) {
1876 /*
1877 * Okay someone managed to make a huge token, so he's ready to pay
1878 * for the processing speed.
1879 */
1880 xmlChar *buffer;
1881 int max = len * 2;
1882
1883 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1884 if (buffer == NULL) {
1885 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1886 ctxt->sax->error(ctxt->userData,
1887 "xmlParseNmtoken: out of memory\n");
1888 return(NULL);
1889 }
1890 memcpy(buffer, buf, len);
1891 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1892 (c == '.') || (c == '-') ||
1893 (c == '_') || (c == ':') ||
1894 (IS_COMBINING(c)) ||
1895 (IS_EXTENDER(c))) {
1896 if (count++ > 100) {
1897 count = 0;
1898 GROW;
1899 }
1900 if (len + 10 > max) {
1901 max *= 2;
1902 buffer = (xmlChar *) xmlRealloc(buffer,
1903 max * sizeof(xmlChar));
1904 if (buffer == NULL) {
1905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1906 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001907 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001908 return(NULL);
1909 }
1910 }
1911 COPY_BUF(l,buffer,len,c);
1912 NEXTL(l);
1913 c = CUR_CHAR(l);
1914 }
1915 buffer[len] = 0;
1916 return(buffer);
1917 }
1918 }
1919 if (len == 0)
1920 return(NULL);
1921 return(xmlStrndup(buf, len));
1922}
1923
1924/**
1925 * xmlParseEntityValue:
1926 * @ctxt: an XML parser context
1927 * @orig: if non-NULL store a copy of the original entity value
1928 *
1929 * parse a value for ENTITY declarations
1930 *
1931 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1932 * "'" ([^%&'] | PEReference | Reference)* "'"
1933 *
1934 * Returns the EntityValue parsed with reference substitued or NULL
1935 */
1936
1937xmlChar *
1938xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1939 xmlChar *buf = NULL;
1940 int len = 0;
1941 int size = XML_PARSER_BUFFER_SIZE;
1942 int c, l;
1943 xmlChar stop;
1944 xmlChar *ret = NULL;
1945 const xmlChar *cur = NULL;
1946 xmlParserInputPtr input;
1947
1948 if (RAW == '"') stop = '"';
1949 else if (RAW == '\'') stop = '\'';
1950 else {
1951 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1953 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1954 ctxt->wellFormed = 0;
1955 ctxt->disableSAX = 1;
1956 return(NULL);
1957 }
1958 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1959 if (buf == NULL) {
1960 xmlGenericError(xmlGenericErrorContext,
1961 "malloc of %d byte failed\n", size);
1962 return(NULL);
1963 }
1964
1965 /*
1966 * The content of the entity definition is copied in a buffer.
1967 */
1968
1969 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1970 input = ctxt->input;
1971 GROW;
1972 NEXT;
1973 c = CUR_CHAR(l);
1974 /*
1975 * NOTE: 4.4.5 Included in Literal
1976 * When a parameter entity reference appears in a literal entity
1977 * value, ... a single or double quote character in the replacement
1978 * text is always treated as a normal data character and will not
1979 * terminate the literal.
1980 * In practice it means we stop the loop only when back at parsing
1981 * the initial entity and the quote is found
1982 */
1983 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1984 (ctxt->input != input))) {
1985 if (len + 5 >= size) {
1986 size *= 2;
1987 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1988 if (buf == NULL) {
1989 xmlGenericError(xmlGenericErrorContext,
1990 "realloc of %d byte failed\n", size);
1991 return(NULL);
1992 }
1993 }
1994 COPY_BUF(l,buf,len,c);
1995 NEXTL(l);
1996 /*
1997 * Pop-up of finished entities.
1998 */
1999 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2000 xmlPopInput(ctxt);
2001
2002 GROW;
2003 c = CUR_CHAR(l);
2004 if (c == 0) {
2005 GROW;
2006 c = CUR_CHAR(l);
2007 }
2008 }
2009 buf[len] = 0;
2010
2011 /*
2012 * Raise problem w.r.t. '&' and '%' being used in non-entities
2013 * reference constructs. Note Charref will be handled in
2014 * xmlStringDecodeEntities()
2015 */
2016 cur = buf;
2017 while (*cur != 0) { /* non input consuming */
2018 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2019 xmlChar *name;
2020 xmlChar tmp = *cur;
2021
2022 cur++;
2023 name = xmlParseStringName(ctxt, &cur);
2024 if ((name == NULL) || (*cur != ';')) {
2025 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2026 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2027 ctxt->sax->error(ctxt->userData,
2028 "EntityValue: '%c' forbidden except for entities references\n",
2029 tmp);
2030 ctxt->wellFormed = 0;
2031 ctxt->disableSAX = 1;
2032 }
2033 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2034 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2036 ctxt->sax->error(ctxt->userData,
2037 "EntityValue: PEReferences forbidden in internal subset\n",
2038 tmp);
2039 ctxt->wellFormed = 0;
2040 ctxt->disableSAX = 1;
2041 }
2042 if (name != NULL)
2043 xmlFree(name);
2044 }
2045 cur++;
2046 }
2047
2048 /*
2049 * Then PEReference entities are substituted.
2050 */
2051 if (c != stop) {
2052 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2054 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2055 ctxt->wellFormed = 0;
2056 ctxt->disableSAX = 1;
2057 xmlFree(buf);
2058 } else {
2059 NEXT;
2060 /*
2061 * NOTE: 4.4.7 Bypassed
2062 * When a general entity reference appears in the EntityValue in
2063 * an entity declaration, it is bypassed and left as is.
2064 * so XML_SUBSTITUTE_REF is not set here.
2065 */
2066 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2067 0, 0, 0);
2068 if (orig != NULL)
2069 *orig = buf;
2070 else
2071 xmlFree(buf);
2072 }
2073
2074 return(ret);
2075}
2076
2077/**
2078 * xmlParseAttValue:
2079 * @ctxt: an XML parser context
2080 *
2081 * parse a value for an attribute
2082 * Note: the parser won't do substitution of entities here, this
2083 * will be handled later in xmlStringGetNodeList
2084 *
2085 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2086 * "'" ([^<&'] | Reference)* "'"
2087 *
2088 * 3.3.3 Attribute-Value Normalization:
2089 * Before the value of an attribute is passed to the application or
2090 * checked for validity, the XML processor must normalize it as follows:
2091 * - a character reference is processed by appending the referenced
2092 * character to the attribute value
2093 * - an entity reference is processed by recursively processing the
2094 * replacement text of the entity
2095 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2096 * appending #x20 to the normalized value, except that only a single
2097 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2098 * parsed entity or the literal entity value of an internal parsed entity
2099 * - other characters are processed by appending them to the normalized value
2100 * If the declared value is not CDATA, then the XML processor must further
2101 * process the normalized attribute value by discarding any leading and
2102 * trailing space (#x20) characters, and by replacing sequences of space
2103 * (#x20) characters by a single space (#x20) character.
2104 * All attributes for which no declaration has been read should be treated
2105 * by a non-validating parser as if declared CDATA.
2106 *
2107 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2108 */
2109
2110xmlChar *
2111xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2112 xmlChar limit = 0;
2113 xmlChar *buf = NULL;
2114 int len = 0;
2115 int buf_size = 0;
2116 int c, l;
2117 xmlChar *current = NULL;
2118 xmlEntityPtr ent;
2119
2120
2121 SHRINK;
2122 if (NXT(0) == '"') {
2123 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2124 limit = '"';
2125 NEXT;
2126 } else if (NXT(0) == '\'') {
2127 limit = '\'';
2128 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2129 NEXT;
2130 } else {
2131 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2133 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2134 ctxt->wellFormed = 0;
2135 ctxt->disableSAX = 1;
2136 return(NULL);
2137 }
2138
2139 /*
2140 * allocate a translation buffer.
2141 */
2142 buf_size = XML_PARSER_BUFFER_SIZE;
2143 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2144 if (buf == NULL) {
2145 perror("xmlParseAttValue: malloc failed");
2146 return(NULL);
2147 }
2148
2149 /*
2150 * Ok loop until we reach one of the ending char or a size limit.
2151 */
2152 c = CUR_CHAR(l);
2153 while (((NXT(0) != limit) && /* checked */
2154 (c != '<')) || (ctxt->token != 0)) {
2155 if (c == 0) break;
2156 if (ctxt->token == '&') {
2157 /*
2158 * The reparsing will be done in xmlStringGetNodeList()
2159 * called by the attribute() function in SAX.c
2160 */
2161 static xmlChar buffer[6] = "&#38;";
2162
2163 if (len > buf_size - 10) {
2164 growBuffer(buf);
2165 }
2166 current = &buffer[0];
2167 while (*current != 0) { /* non input consuming */
2168 buf[len++] = *current++;
2169 }
2170 ctxt->token = 0;
2171 } else if (c == '&') {
2172 if (NXT(1) == '#') {
2173 int val = xmlParseCharRef(ctxt);
2174 if (val == '&') {
2175 /*
2176 * The reparsing will be done in xmlStringGetNodeList()
2177 * called by the attribute() function in SAX.c
2178 */
2179 static xmlChar buffer[6] = "&#38;";
2180
2181 if (len > buf_size - 10) {
2182 growBuffer(buf);
2183 }
2184 current = &buffer[0];
2185 while (*current != 0) { /* non input consuming */
2186 buf[len++] = *current++;
2187 }
2188 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002189 if (len > buf_size - 10) {
2190 growBuffer(buf);
2191 }
Owen Taylor3473f882001-02-23 17:55:21 +00002192 len += xmlCopyChar(0, &buf[len], val);
2193 }
2194 } else {
2195 ent = xmlParseEntityRef(ctxt);
2196 if ((ent != NULL) &&
2197 (ctxt->replaceEntities != 0)) {
2198 xmlChar *rep;
2199
2200 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2201 rep = xmlStringDecodeEntities(ctxt, ent->content,
2202 XML_SUBSTITUTE_REF, 0, 0, 0);
2203 if (rep != NULL) {
2204 current = rep;
2205 while (*current != 0) { /* non input consuming */
2206 buf[len++] = *current++;
2207 if (len > buf_size - 10) {
2208 growBuffer(buf);
2209 }
2210 }
2211 xmlFree(rep);
2212 }
2213 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002214 if (len > buf_size - 10) {
2215 growBuffer(buf);
2216 }
Owen Taylor3473f882001-02-23 17:55:21 +00002217 if (ent->content != NULL)
2218 buf[len++] = ent->content[0];
2219 }
2220 } else if (ent != NULL) {
2221 int i = xmlStrlen(ent->name);
2222 const xmlChar *cur = ent->name;
2223
2224 /*
2225 * This may look absurd but is needed to detect
2226 * entities problems
2227 */
2228 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2229 (ent->content != NULL)) {
2230 xmlChar *rep;
2231 rep = xmlStringDecodeEntities(ctxt, ent->content,
2232 XML_SUBSTITUTE_REF, 0, 0, 0);
2233 if (rep != NULL)
2234 xmlFree(rep);
2235 }
2236
2237 /*
2238 * Just output the reference
2239 */
2240 buf[len++] = '&';
2241 if (len > buf_size - i - 10) {
2242 growBuffer(buf);
2243 }
2244 for (;i > 0;i--)
2245 buf[len++] = *cur++;
2246 buf[len++] = ';';
2247 }
2248 }
2249 } else {
2250 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2251 COPY_BUF(l,buf,len,0x20);
2252 if (len > buf_size - 10) {
2253 growBuffer(buf);
2254 }
2255 } else {
2256 COPY_BUF(l,buf,len,c);
2257 if (len > buf_size - 10) {
2258 growBuffer(buf);
2259 }
2260 }
2261 NEXTL(l);
2262 }
2263 GROW;
2264 c = CUR_CHAR(l);
2265 }
2266 buf[len++] = 0;
2267 if (RAW == '<') {
2268 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2270 ctxt->sax->error(ctxt->userData,
2271 "Unescaped '<' not allowed in attributes values\n");
2272 ctxt->wellFormed = 0;
2273 ctxt->disableSAX = 1;
2274 } else if (RAW != limit) {
2275 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2277 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2278 ctxt->wellFormed = 0;
2279 ctxt->disableSAX = 1;
2280 } else
2281 NEXT;
2282 return(buf);
2283}
2284
2285/**
2286 * xmlParseSystemLiteral:
2287 * @ctxt: an XML parser context
2288 *
2289 * parse an XML Literal
2290 *
2291 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2292 *
2293 * Returns the SystemLiteral parsed or NULL
2294 */
2295
2296xmlChar *
2297xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2298 xmlChar *buf = NULL;
2299 int len = 0;
2300 int size = XML_PARSER_BUFFER_SIZE;
2301 int cur, l;
2302 xmlChar stop;
2303 int state = ctxt->instate;
2304 int count = 0;
2305
2306 SHRINK;
2307 if (RAW == '"') {
2308 NEXT;
2309 stop = '"';
2310 } else if (RAW == '\'') {
2311 NEXT;
2312 stop = '\'';
2313 } else {
2314 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2316 ctxt->sax->error(ctxt->userData,
2317 "SystemLiteral \" or ' expected\n");
2318 ctxt->wellFormed = 0;
2319 ctxt->disableSAX = 1;
2320 return(NULL);
2321 }
2322
2323 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2324 if (buf == NULL) {
2325 xmlGenericError(xmlGenericErrorContext,
2326 "malloc of %d byte failed\n", size);
2327 return(NULL);
2328 }
2329 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2330 cur = CUR_CHAR(l);
2331 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2332 if (len + 5 >= size) {
2333 size *= 2;
2334 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2335 if (buf == NULL) {
2336 xmlGenericError(xmlGenericErrorContext,
2337 "realloc of %d byte failed\n", size);
2338 ctxt->instate = (xmlParserInputState) state;
2339 return(NULL);
2340 }
2341 }
2342 count++;
2343 if (count > 50) {
2344 GROW;
2345 count = 0;
2346 }
2347 COPY_BUF(l,buf,len,cur);
2348 NEXTL(l);
2349 cur = CUR_CHAR(l);
2350 if (cur == 0) {
2351 GROW;
2352 SHRINK;
2353 cur = CUR_CHAR(l);
2354 }
2355 }
2356 buf[len] = 0;
2357 ctxt->instate = (xmlParserInputState) state;
2358 if (!IS_CHAR(cur)) {
2359 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2361 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2362 ctxt->wellFormed = 0;
2363 ctxt->disableSAX = 1;
2364 } else {
2365 NEXT;
2366 }
2367 return(buf);
2368}
2369
2370/**
2371 * xmlParsePubidLiteral:
2372 * @ctxt: an XML parser context
2373 *
2374 * parse an XML public literal
2375 *
2376 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2377 *
2378 * Returns the PubidLiteral parsed or NULL.
2379 */
2380
2381xmlChar *
2382xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2383 xmlChar *buf = NULL;
2384 int len = 0;
2385 int size = XML_PARSER_BUFFER_SIZE;
2386 xmlChar cur;
2387 xmlChar stop;
2388 int count = 0;
2389
2390 SHRINK;
2391 if (RAW == '"') {
2392 NEXT;
2393 stop = '"';
2394 } else if (RAW == '\'') {
2395 NEXT;
2396 stop = '\'';
2397 } else {
2398 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2399 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2400 ctxt->sax->error(ctxt->userData,
2401 "SystemLiteral \" or ' expected\n");
2402 ctxt->wellFormed = 0;
2403 ctxt->disableSAX = 1;
2404 return(NULL);
2405 }
2406 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2407 if (buf == NULL) {
2408 xmlGenericError(xmlGenericErrorContext,
2409 "malloc of %d byte failed\n", size);
2410 return(NULL);
2411 }
2412 cur = CUR;
2413 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2414 if (len + 1 >= size) {
2415 size *= 2;
2416 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2417 if (buf == NULL) {
2418 xmlGenericError(xmlGenericErrorContext,
2419 "realloc of %d byte failed\n", size);
2420 return(NULL);
2421 }
2422 }
2423 buf[len++] = cur;
2424 count++;
2425 if (count > 50) {
2426 GROW;
2427 count = 0;
2428 }
2429 NEXT;
2430 cur = CUR;
2431 if (cur == 0) {
2432 GROW;
2433 SHRINK;
2434 cur = CUR;
2435 }
2436 }
2437 buf[len] = 0;
2438 if (cur != stop) {
2439 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2440 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2441 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2442 ctxt->wellFormed = 0;
2443 ctxt->disableSAX = 1;
2444 } else {
2445 NEXT;
2446 }
2447 return(buf);
2448}
2449
Daniel Veillard48b2f892001-02-25 16:11:03 +00002450void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002451/**
2452 * xmlParseCharData:
2453 * @ctxt: an XML parser context
2454 * @cdata: int indicating whether we are within a CDATA section
2455 *
2456 * parse a CharData section.
2457 * if we are within a CDATA section ']]>' marks an end of section.
2458 *
2459 * The right angle bracket (>) may be represented using the string "&gt;",
2460 * and must, for compatibility, be escaped using "&gt;" or a character
2461 * reference when it appears in the string "]]>" in content, when that
2462 * string is not marking the end of a CDATA section.
2463 *
2464 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2465 */
2466
2467void
2468xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002469 const xmlChar *in;
2470 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002471 int line = ctxt->input->line;
2472 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002473
2474 SHRINK;
2475 GROW;
2476 /*
2477 * Accelerated common case where input don't need to be
2478 * modified before passing it to the handler.
2479 */
2480 if ((ctxt->token == 0) && (!cdata)) {
2481 in = ctxt->input->cur;
2482 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002483get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002484 while (((*in >= 0x20) && (*in != '<') &&
2485 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2486 in++;
2487 if (*in == 0xA) {
2488 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002489 in++;
2490 while (*in == 0xA) {
2491 ctxt->input->line++;
2492 in++;
2493 }
2494 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002495 }
2496 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002497 if (nbchar > 0) {
2498 if (IS_BLANK(*ctxt->input->cur) &&
2499 areBlanks(ctxt, ctxt->input->cur, nbchar)) {
2500 if (ctxt->sax->ignorableWhitespace != NULL)
2501 ctxt->sax->ignorableWhitespace(ctxt->userData,
2502 ctxt->input->cur, nbchar);
2503 } else {
2504 if (ctxt->sax->characters != NULL)
2505 ctxt->sax->characters(ctxt->userData,
2506 ctxt->input->cur, nbchar);
2507 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002508 }
2509 ctxt->input->cur = in;
2510 if (*in == 0xD) {
2511 in++;
2512 if (*in == 0xA) {
2513 ctxt->input->cur = in;
2514 in++;
2515 ctxt->input->line++;
2516 continue; /* while */
2517 }
2518 in--;
2519 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002520 if (*in == '<') {
2521 return;
2522 }
2523 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002524 return;
2525 }
2526 SHRINK;
2527 GROW;
2528 in = ctxt->input->cur;
2529 } while ((*in >= 0x20) && (*in <= 0x7F));
2530 nbchar = 0;
2531 }
Daniel Veillard50582112001-03-26 22:52:16 +00002532 ctxt->input->line = line;
2533 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002534 xmlParseCharDataComplex(ctxt, cdata);
2535}
2536
2537void
2538xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002539 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2540 int nbchar = 0;
2541 int cur, l;
2542 int count = 0;
2543
2544 SHRINK;
2545 GROW;
2546 cur = CUR_CHAR(l);
2547 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2548 ((cur != '&') || (ctxt->token == '&')) &&
2549 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2550 if ((cur == ']') && (NXT(1) == ']') &&
2551 (NXT(2) == '>')) {
2552 if (cdata) break;
2553 else {
2554 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2556 ctxt->sax->error(ctxt->userData,
2557 "Sequence ']]>' not allowed in content\n");
2558 /* Should this be relaxed ??? I see a "must here */
2559 ctxt->wellFormed = 0;
2560 ctxt->disableSAX = 1;
2561 }
2562 }
2563 COPY_BUF(l,buf,nbchar,cur);
2564 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2565 /*
2566 * Ok the segment is to be consumed as chars.
2567 */
2568 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2569 if (areBlanks(ctxt, buf, nbchar)) {
2570 if (ctxt->sax->ignorableWhitespace != NULL)
2571 ctxt->sax->ignorableWhitespace(ctxt->userData,
2572 buf, nbchar);
2573 } else {
2574 if (ctxt->sax->characters != NULL)
2575 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2576 }
2577 }
2578 nbchar = 0;
2579 }
2580 count++;
2581 if (count > 50) {
2582 GROW;
2583 count = 0;
2584 }
2585 NEXTL(l);
2586 cur = CUR_CHAR(l);
2587 }
2588 if (nbchar != 0) {
2589 /*
2590 * Ok the segment is to be consumed as chars.
2591 */
2592 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2593 if (areBlanks(ctxt, buf, nbchar)) {
2594 if (ctxt->sax->ignorableWhitespace != NULL)
2595 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2596 } else {
2597 if (ctxt->sax->characters != NULL)
2598 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2599 }
2600 }
2601 }
2602}
2603
2604/**
2605 * xmlParseExternalID:
2606 * @ctxt: an XML parser context
2607 * @publicID: a xmlChar** receiving PubidLiteral
2608 * @strict: indicate whether we should restrict parsing to only
2609 * production [75], see NOTE below
2610 *
2611 * Parse an External ID or a Public ID
2612 *
2613 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2614 * 'PUBLIC' S PubidLiteral S SystemLiteral
2615 *
2616 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2617 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2618 *
2619 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2620 *
2621 * Returns the function returns SystemLiteral and in the second
2622 * case publicID receives PubidLiteral, is strict is off
2623 * it is possible to return NULL and have publicID set.
2624 */
2625
2626xmlChar *
2627xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2628 xmlChar *URI = NULL;
2629
2630 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002631
2632 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002633 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2634 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2635 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2636 SKIP(6);
2637 if (!IS_BLANK(CUR)) {
2638 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2640 ctxt->sax->error(ctxt->userData,
2641 "Space required after 'SYSTEM'\n");
2642 ctxt->wellFormed = 0;
2643 ctxt->disableSAX = 1;
2644 }
2645 SKIP_BLANKS;
2646 URI = xmlParseSystemLiteral(ctxt);
2647 if (URI == NULL) {
2648 ctxt->errNo = XML_ERR_URI_REQUIRED;
2649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2650 ctxt->sax->error(ctxt->userData,
2651 "xmlParseExternalID: SYSTEM, no URI\n");
2652 ctxt->wellFormed = 0;
2653 ctxt->disableSAX = 1;
2654 }
2655 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2656 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2657 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2658 SKIP(6);
2659 if (!IS_BLANK(CUR)) {
2660 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2662 ctxt->sax->error(ctxt->userData,
2663 "Space required after 'PUBLIC'\n");
2664 ctxt->wellFormed = 0;
2665 ctxt->disableSAX = 1;
2666 }
2667 SKIP_BLANKS;
2668 *publicID = xmlParsePubidLiteral(ctxt);
2669 if (*publicID == NULL) {
2670 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2672 ctxt->sax->error(ctxt->userData,
2673 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2674 ctxt->wellFormed = 0;
2675 ctxt->disableSAX = 1;
2676 }
2677 if (strict) {
2678 /*
2679 * We don't handle [83] so "S SystemLiteral" is required.
2680 */
2681 if (!IS_BLANK(CUR)) {
2682 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2684 ctxt->sax->error(ctxt->userData,
2685 "Space required after the Public Identifier\n");
2686 ctxt->wellFormed = 0;
2687 ctxt->disableSAX = 1;
2688 }
2689 } else {
2690 /*
2691 * We handle [83] so we return immediately, if
2692 * "S SystemLiteral" is not detected. From a purely parsing
2693 * point of view that's a nice mess.
2694 */
2695 const xmlChar *ptr;
2696 GROW;
2697
2698 ptr = CUR_PTR;
2699 if (!IS_BLANK(*ptr)) return(NULL);
2700
2701 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2702 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2703 }
2704 SKIP_BLANKS;
2705 URI = xmlParseSystemLiteral(ctxt);
2706 if (URI == NULL) {
2707 ctxt->errNo = XML_ERR_URI_REQUIRED;
2708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2709 ctxt->sax->error(ctxt->userData,
2710 "xmlParseExternalID: PUBLIC, no URI\n");
2711 ctxt->wellFormed = 0;
2712 ctxt->disableSAX = 1;
2713 }
2714 }
2715 return(URI);
2716}
2717
2718/**
2719 * xmlParseComment:
2720 * @ctxt: an XML parser context
2721 *
2722 * Skip an XML (SGML) comment <!-- .... -->
2723 * The spec says that "For compatibility, the string "--" (double-hyphen)
2724 * must not occur within comments. "
2725 *
2726 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2727 */
2728void
2729xmlParseComment(xmlParserCtxtPtr ctxt) {
2730 xmlChar *buf = NULL;
2731 int len;
2732 int size = XML_PARSER_BUFFER_SIZE;
2733 int q, ql;
2734 int r, rl;
2735 int cur, l;
2736 xmlParserInputState state;
2737 xmlParserInputPtr input = ctxt->input;
2738 int count = 0;
2739
2740 /*
2741 * Check that there is a comment right here.
2742 */
2743 if ((RAW != '<') || (NXT(1) != '!') ||
2744 (NXT(2) != '-') || (NXT(3) != '-')) return;
2745
2746 state = ctxt->instate;
2747 ctxt->instate = XML_PARSER_COMMENT;
2748 SHRINK;
2749 SKIP(4);
2750 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2751 if (buf == NULL) {
2752 xmlGenericError(xmlGenericErrorContext,
2753 "malloc of %d byte failed\n", size);
2754 ctxt->instate = state;
2755 return;
2756 }
2757 q = CUR_CHAR(ql);
2758 NEXTL(ql);
2759 r = CUR_CHAR(rl);
2760 NEXTL(rl);
2761 cur = CUR_CHAR(l);
2762 len = 0;
2763 while (IS_CHAR(cur) && /* checked */
2764 ((cur != '>') ||
2765 (r != '-') || (q != '-'))) {
2766 if ((r == '-') && (q == '-') && (len > 1)) {
2767 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2769 ctxt->sax->error(ctxt->userData,
2770 "Comment must not contain '--' (double-hyphen)`\n");
2771 ctxt->wellFormed = 0;
2772 ctxt->disableSAX = 1;
2773 }
2774 if (len + 5 >= size) {
2775 size *= 2;
2776 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2777 if (buf == NULL) {
2778 xmlGenericError(xmlGenericErrorContext,
2779 "realloc of %d byte failed\n", size);
2780 ctxt->instate = state;
2781 return;
2782 }
2783 }
2784 COPY_BUF(ql,buf,len,q);
2785 q = r;
2786 ql = rl;
2787 r = cur;
2788 rl = l;
2789
2790 count++;
2791 if (count > 50) {
2792 GROW;
2793 count = 0;
2794 }
2795 NEXTL(l);
2796 cur = CUR_CHAR(l);
2797 if (cur == 0) {
2798 SHRINK;
2799 GROW;
2800 cur = CUR_CHAR(l);
2801 }
2802 }
2803 buf[len] = 0;
2804 if (!IS_CHAR(cur)) {
2805 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2807 ctxt->sax->error(ctxt->userData,
2808 "Comment not terminated \n<!--%.50s\n", buf);
2809 ctxt->wellFormed = 0;
2810 ctxt->disableSAX = 1;
2811 xmlFree(buf);
2812 } else {
2813 if (input != ctxt->input) {
2814 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2815 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2816 ctxt->sax->error(ctxt->userData,
2817"Comment doesn't start and stop in the same entity\n");
2818 ctxt->wellFormed = 0;
2819 ctxt->disableSAX = 1;
2820 }
2821 NEXT;
2822 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2823 (!ctxt->disableSAX))
2824 ctxt->sax->comment(ctxt->userData, buf);
2825 xmlFree(buf);
2826 }
2827 ctxt->instate = state;
2828}
2829
2830/**
2831 * xmlParsePITarget:
2832 * @ctxt: an XML parser context
2833 *
2834 * parse the name of a PI
2835 *
2836 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2837 *
2838 * Returns the PITarget name or NULL
2839 */
2840
2841xmlChar *
2842xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2843 xmlChar *name;
2844
2845 name = xmlParseName(ctxt);
2846 if ((name != NULL) &&
2847 ((name[0] == 'x') || (name[0] == 'X')) &&
2848 ((name[1] == 'm') || (name[1] == 'M')) &&
2849 ((name[2] == 'l') || (name[2] == 'L'))) {
2850 int i;
2851 if ((name[0] == 'x') && (name[1] == 'm') &&
2852 (name[2] == 'l') && (name[3] == 0)) {
2853 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2855 ctxt->sax->error(ctxt->userData,
2856 "XML declaration allowed only at the start of the document\n");
2857 ctxt->wellFormed = 0;
2858 ctxt->disableSAX = 1;
2859 return(name);
2860 } else if (name[3] == 0) {
2861 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2863 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2864 ctxt->wellFormed = 0;
2865 ctxt->disableSAX = 1;
2866 return(name);
2867 }
2868 for (i = 0;;i++) {
2869 if (xmlW3CPIs[i] == NULL) break;
2870 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2871 return(name);
2872 }
2873 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2874 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2875 ctxt->sax->warning(ctxt->userData,
2876 "xmlParsePItarget: invalid name prefix 'xml'\n");
2877 }
2878 }
2879 return(name);
2880}
2881
2882/**
2883 * xmlParsePI:
2884 * @ctxt: an XML parser context
2885 *
2886 * parse an XML Processing Instruction.
2887 *
2888 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2889 *
2890 * The processing is transfered to SAX once parsed.
2891 */
2892
2893void
2894xmlParsePI(xmlParserCtxtPtr ctxt) {
2895 xmlChar *buf = NULL;
2896 int len = 0;
2897 int size = XML_PARSER_BUFFER_SIZE;
2898 int cur, l;
2899 xmlChar *target;
2900 xmlParserInputState state;
2901 int count = 0;
2902
2903 if ((RAW == '<') && (NXT(1) == '?')) {
2904 xmlParserInputPtr input = ctxt->input;
2905 state = ctxt->instate;
2906 ctxt->instate = XML_PARSER_PI;
2907 /*
2908 * this is a Processing Instruction.
2909 */
2910 SKIP(2);
2911 SHRINK;
2912
2913 /*
2914 * Parse the target name and check for special support like
2915 * namespace.
2916 */
2917 target = xmlParsePITarget(ctxt);
2918 if (target != NULL) {
2919 if ((RAW == '?') && (NXT(1) == '>')) {
2920 if (input != ctxt->input) {
2921 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2923 ctxt->sax->error(ctxt->userData,
2924 "PI declaration doesn't start and stop in the same entity\n");
2925 ctxt->wellFormed = 0;
2926 ctxt->disableSAX = 1;
2927 }
2928 SKIP(2);
2929
2930 /*
2931 * SAX: PI detected.
2932 */
2933 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2934 (ctxt->sax->processingInstruction != NULL))
2935 ctxt->sax->processingInstruction(ctxt->userData,
2936 target, NULL);
2937 ctxt->instate = state;
2938 xmlFree(target);
2939 return;
2940 }
2941 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2942 if (buf == NULL) {
2943 xmlGenericError(xmlGenericErrorContext,
2944 "malloc of %d byte failed\n", size);
2945 ctxt->instate = state;
2946 return;
2947 }
2948 cur = CUR;
2949 if (!IS_BLANK(cur)) {
2950 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2952 ctxt->sax->error(ctxt->userData,
2953 "xmlParsePI: PI %s space expected\n", target);
2954 ctxt->wellFormed = 0;
2955 ctxt->disableSAX = 1;
2956 }
2957 SKIP_BLANKS;
2958 cur = CUR_CHAR(l);
2959 while (IS_CHAR(cur) && /* checked */
2960 ((cur != '?') || (NXT(1) != '>'))) {
2961 if (len + 5 >= size) {
2962 size *= 2;
2963 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2964 if (buf == NULL) {
2965 xmlGenericError(xmlGenericErrorContext,
2966 "realloc of %d byte failed\n", size);
2967 ctxt->instate = state;
2968 return;
2969 }
2970 }
2971 count++;
2972 if (count > 50) {
2973 GROW;
2974 count = 0;
2975 }
2976 COPY_BUF(l,buf,len,cur);
2977 NEXTL(l);
2978 cur = CUR_CHAR(l);
2979 if (cur == 0) {
2980 SHRINK;
2981 GROW;
2982 cur = CUR_CHAR(l);
2983 }
2984 }
2985 buf[len] = 0;
2986 if (cur != '?') {
2987 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
2988 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2989 ctxt->sax->error(ctxt->userData,
2990 "xmlParsePI: PI %s never end ...\n", target);
2991 ctxt->wellFormed = 0;
2992 ctxt->disableSAX = 1;
2993 } else {
2994 if (input != ctxt->input) {
2995 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2997 ctxt->sax->error(ctxt->userData,
2998 "PI declaration doesn't start and stop in the same entity\n");
2999 ctxt->wellFormed = 0;
3000 ctxt->disableSAX = 1;
3001 }
3002 SKIP(2);
3003
3004 /*
3005 * SAX: PI detected.
3006 */
3007 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3008 (ctxt->sax->processingInstruction != NULL))
3009 ctxt->sax->processingInstruction(ctxt->userData,
3010 target, buf);
3011 }
3012 xmlFree(buf);
3013 xmlFree(target);
3014 } else {
3015 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3017 ctxt->sax->error(ctxt->userData,
3018 "xmlParsePI : no target name\n");
3019 ctxt->wellFormed = 0;
3020 ctxt->disableSAX = 1;
3021 }
3022 ctxt->instate = state;
3023 }
3024}
3025
3026/**
3027 * xmlParseNotationDecl:
3028 * @ctxt: an XML parser context
3029 *
3030 * parse a notation declaration
3031 *
3032 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3033 *
3034 * Hence there is actually 3 choices:
3035 * 'PUBLIC' S PubidLiteral
3036 * 'PUBLIC' S PubidLiteral S SystemLiteral
3037 * and 'SYSTEM' S SystemLiteral
3038 *
3039 * See the NOTE on xmlParseExternalID().
3040 */
3041
3042void
3043xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3044 xmlChar *name;
3045 xmlChar *Pubid;
3046 xmlChar *Systemid;
3047
3048 if ((RAW == '<') && (NXT(1) == '!') &&
3049 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3050 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3051 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3052 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3053 xmlParserInputPtr input = ctxt->input;
3054 SHRINK;
3055 SKIP(10);
3056 if (!IS_BLANK(CUR)) {
3057 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3059 ctxt->sax->error(ctxt->userData,
3060 "Space required after '<!NOTATION'\n");
3061 ctxt->wellFormed = 0;
3062 ctxt->disableSAX = 1;
3063 return;
3064 }
3065 SKIP_BLANKS;
3066
Daniel Veillard29631a82001-03-05 09:49:20 +00003067 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003068 if (name == NULL) {
3069 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3070 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3071 ctxt->sax->error(ctxt->userData,
3072 "NOTATION: Name expected here\n");
3073 ctxt->wellFormed = 0;
3074 ctxt->disableSAX = 1;
3075 return;
3076 }
3077 if (!IS_BLANK(CUR)) {
3078 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3080 ctxt->sax->error(ctxt->userData,
3081 "Space required after the NOTATION name'\n");
3082 ctxt->wellFormed = 0;
3083 ctxt->disableSAX = 1;
3084 return;
3085 }
3086 SKIP_BLANKS;
3087
3088 /*
3089 * Parse the IDs.
3090 */
3091 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3092 SKIP_BLANKS;
3093
3094 if (RAW == '>') {
3095 if (input != ctxt->input) {
3096 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3098 ctxt->sax->error(ctxt->userData,
3099"Notation declaration doesn't start and stop in the same entity\n");
3100 ctxt->wellFormed = 0;
3101 ctxt->disableSAX = 1;
3102 }
3103 NEXT;
3104 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3105 (ctxt->sax->notationDecl != NULL))
3106 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3107 } else {
3108 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3110 ctxt->sax->error(ctxt->userData,
3111 "'>' required to close NOTATION declaration\n");
3112 ctxt->wellFormed = 0;
3113 ctxt->disableSAX = 1;
3114 }
3115 xmlFree(name);
3116 if (Systemid != NULL) xmlFree(Systemid);
3117 if (Pubid != NULL) xmlFree(Pubid);
3118 }
3119}
3120
3121/**
3122 * xmlParseEntityDecl:
3123 * @ctxt: an XML parser context
3124 *
3125 * parse <!ENTITY declarations
3126 *
3127 * [70] EntityDecl ::= GEDecl | PEDecl
3128 *
3129 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3130 *
3131 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3132 *
3133 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3134 *
3135 * [74] PEDef ::= EntityValue | ExternalID
3136 *
3137 * [76] NDataDecl ::= S 'NDATA' S Name
3138 *
3139 * [ VC: Notation Declared ]
3140 * The Name must match the declared name of a notation.
3141 */
3142
3143void
3144xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3145 xmlChar *name = NULL;
3146 xmlChar *value = NULL;
3147 xmlChar *URI = NULL, *literal = NULL;
3148 xmlChar *ndata = NULL;
3149 int isParameter = 0;
3150 xmlChar *orig = NULL;
3151
3152 GROW;
3153 if ((RAW == '<') && (NXT(1) == '!') &&
3154 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3155 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3156 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3157 xmlParserInputPtr input = ctxt->input;
3158 ctxt->instate = XML_PARSER_ENTITY_DECL;
3159 SHRINK;
3160 SKIP(8);
3161 if (!IS_BLANK(CUR)) {
3162 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3163 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3164 ctxt->sax->error(ctxt->userData,
3165 "Space required after '<!ENTITY'\n");
3166 ctxt->wellFormed = 0;
3167 ctxt->disableSAX = 1;
3168 }
3169 SKIP_BLANKS;
3170
3171 if (RAW == '%') {
3172 NEXT;
3173 if (!IS_BLANK(CUR)) {
3174 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3176 ctxt->sax->error(ctxt->userData,
3177 "Space required after '%'\n");
3178 ctxt->wellFormed = 0;
3179 ctxt->disableSAX = 1;
3180 }
3181 SKIP_BLANKS;
3182 isParameter = 1;
3183 }
3184
Daniel Veillard29631a82001-03-05 09:49:20 +00003185 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003186 if (name == NULL) {
3187 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3188 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3189 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3190 ctxt->wellFormed = 0;
3191 ctxt->disableSAX = 1;
3192 return;
3193 }
3194 if (!IS_BLANK(CUR)) {
3195 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3197 ctxt->sax->error(ctxt->userData,
3198 "Space required after the entity name\n");
3199 ctxt->wellFormed = 0;
3200 ctxt->disableSAX = 1;
3201 }
3202 SKIP_BLANKS;
3203
3204 /*
3205 * handle the various case of definitions...
3206 */
3207 if (isParameter) {
3208 if ((RAW == '"') || (RAW == '\'')) {
3209 value = xmlParseEntityValue(ctxt, &orig);
3210 if (value) {
3211 if ((ctxt->sax != NULL) &&
3212 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3213 ctxt->sax->entityDecl(ctxt->userData, name,
3214 XML_INTERNAL_PARAMETER_ENTITY,
3215 NULL, NULL, value);
3216 }
3217 } else {
3218 URI = xmlParseExternalID(ctxt, &literal, 1);
3219 if ((URI == NULL) && (literal == NULL)) {
3220 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3221 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3222 ctxt->sax->error(ctxt->userData,
3223 "Entity value required\n");
3224 ctxt->wellFormed = 0;
3225 ctxt->disableSAX = 1;
3226 }
3227 if (URI) {
3228 xmlURIPtr uri;
3229
3230 uri = xmlParseURI((const char *) URI);
3231 if (uri == NULL) {
3232 ctxt->errNo = XML_ERR_INVALID_URI;
3233 if ((ctxt->sax != NULL) &&
3234 (!ctxt->disableSAX) &&
3235 (ctxt->sax->error != NULL))
3236 ctxt->sax->error(ctxt->userData,
3237 "Invalid URI: %s\n", URI);
3238 ctxt->wellFormed = 0;
3239 } else {
3240 if (uri->fragment != NULL) {
3241 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3242 if ((ctxt->sax != NULL) &&
3243 (!ctxt->disableSAX) &&
3244 (ctxt->sax->error != NULL))
3245 ctxt->sax->error(ctxt->userData,
3246 "Fragment not allowed: %s\n", URI);
3247 ctxt->wellFormed = 0;
3248 } else {
3249 if ((ctxt->sax != NULL) &&
3250 (!ctxt->disableSAX) &&
3251 (ctxt->sax->entityDecl != NULL))
3252 ctxt->sax->entityDecl(ctxt->userData, name,
3253 XML_EXTERNAL_PARAMETER_ENTITY,
3254 literal, URI, NULL);
3255 }
3256 xmlFreeURI(uri);
3257 }
3258 }
3259 }
3260 } else {
3261 if ((RAW == '"') || (RAW == '\'')) {
3262 value = xmlParseEntityValue(ctxt, &orig);
3263 if ((ctxt->sax != NULL) &&
3264 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3265 ctxt->sax->entityDecl(ctxt->userData, name,
3266 XML_INTERNAL_GENERAL_ENTITY,
3267 NULL, NULL, value);
3268 } else {
3269 URI = xmlParseExternalID(ctxt, &literal, 1);
3270 if ((URI == NULL) && (literal == NULL)) {
3271 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3273 ctxt->sax->error(ctxt->userData,
3274 "Entity value required\n");
3275 ctxt->wellFormed = 0;
3276 ctxt->disableSAX = 1;
3277 }
3278 if (URI) {
3279 xmlURIPtr uri;
3280
3281 uri = xmlParseURI((const char *)URI);
3282 if (uri == NULL) {
3283 ctxt->errNo = XML_ERR_INVALID_URI;
3284 if ((ctxt->sax != NULL) &&
3285 (!ctxt->disableSAX) &&
3286 (ctxt->sax->error != NULL))
3287 ctxt->sax->error(ctxt->userData,
3288 "Invalid URI: %s\n", URI);
3289 ctxt->wellFormed = 0;
3290 } else {
3291 if (uri->fragment != NULL) {
3292 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3293 if ((ctxt->sax != NULL) &&
3294 (!ctxt->disableSAX) &&
3295 (ctxt->sax->error != NULL))
3296 ctxt->sax->error(ctxt->userData,
3297 "Fragment not allowed: %s\n", URI);
3298 ctxt->wellFormed = 0;
3299 }
3300 xmlFreeURI(uri);
3301 }
3302 }
3303 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3304 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3306 ctxt->sax->error(ctxt->userData,
3307 "Space required before 'NDATA'\n");
3308 ctxt->wellFormed = 0;
3309 ctxt->disableSAX = 1;
3310 }
3311 SKIP_BLANKS;
3312 if ((RAW == 'N') && (NXT(1) == 'D') &&
3313 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3314 (NXT(4) == 'A')) {
3315 SKIP(5);
3316 if (!IS_BLANK(CUR)) {
3317 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3318 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3319 ctxt->sax->error(ctxt->userData,
3320 "Space required after 'NDATA'\n");
3321 ctxt->wellFormed = 0;
3322 ctxt->disableSAX = 1;
3323 }
3324 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003325 ndata = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003326 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3327 (ctxt->sax->unparsedEntityDecl != NULL))
3328 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3329 literal, URI, ndata);
3330 } else {
3331 if ((ctxt->sax != NULL) &&
3332 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3333 ctxt->sax->entityDecl(ctxt->userData, name,
3334 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3335 literal, URI, NULL);
3336 }
3337 }
3338 }
3339 SKIP_BLANKS;
3340 if (RAW != '>') {
3341 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3343 ctxt->sax->error(ctxt->userData,
3344 "xmlParseEntityDecl: entity %s not terminated\n", name);
3345 ctxt->wellFormed = 0;
3346 ctxt->disableSAX = 1;
3347 } else {
3348 if (input != ctxt->input) {
3349 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3351 ctxt->sax->error(ctxt->userData,
3352"Entity declaration doesn't start and stop in the same entity\n");
3353 ctxt->wellFormed = 0;
3354 ctxt->disableSAX = 1;
3355 }
3356 NEXT;
3357 }
3358 if (orig != NULL) {
3359 /*
3360 * Ugly mechanism to save the raw entity value.
3361 */
3362 xmlEntityPtr cur = NULL;
3363
3364 if (isParameter) {
3365 if ((ctxt->sax != NULL) &&
3366 (ctxt->sax->getParameterEntity != NULL))
3367 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3368 } else {
3369 if ((ctxt->sax != NULL) &&
3370 (ctxt->sax->getEntity != NULL))
3371 cur = ctxt->sax->getEntity(ctxt->userData, name);
3372 }
3373 if (cur != NULL) {
3374 if (cur->orig != NULL)
3375 xmlFree(orig);
3376 else
3377 cur->orig = orig;
3378 } else
3379 xmlFree(orig);
3380 }
3381 if (name != NULL) xmlFree(name);
3382 if (value != NULL) xmlFree(value);
3383 if (URI != NULL) xmlFree(URI);
3384 if (literal != NULL) xmlFree(literal);
3385 if (ndata != NULL) xmlFree(ndata);
3386 }
3387}
3388
3389/**
3390 * xmlParseDefaultDecl:
3391 * @ctxt: an XML parser context
3392 * @value: Receive a possible fixed default value for the attribute
3393 *
3394 * Parse an attribute default declaration
3395 *
3396 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3397 *
3398 * [ VC: Required Attribute ]
3399 * if the default declaration is the keyword #REQUIRED, then the
3400 * attribute must be specified for all elements of the type in the
3401 * attribute-list declaration.
3402 *
3403 * [ VC: Attribute Default Legal ]
3404 * The declared default value must meet the lexical constraints of
3405 * the declared attribute type c.f. xmlValidateAttributeDecl()
3406 *
3407 * [ VC: Fixed Attribute Default ]
3408 * if an attribute has a default value declared with the #FIXED
3409 * keyword, instances of that attribute must match the default value.
3410 *
3411 * [ WFC: No < in Attribute Values ]
3412 * handled in xmlParseAttValue()
3413 *
3414 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3415 * or XML_ATTRIBUTE_FIXED.
3416 */
3417
3418int
3419xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3420 int val;
3421 xmlChar *ret;
3422
3423 *value = NULL;
3424 if ((RAW == '#') && (NXT(1) == 'R') &&
3425 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3426 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3427 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3428 (NXT(8) == 'D')) {
3429 SKIP(9);
3430 return(XML_ATTRIBUTE_REQUIRED);
3431 }
3432 if ((RAW == '#') && (NXT(1) == 'I') &&
3433 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3434 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3435 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3436 SKIP(8);
3437 return(XML_ATTRIBUTE_IMPLIED);
3438 }
3439 val = XML_ATTRIBUTE_NONE;
3440 if ((RAW == '#') && (NXT(1) == 'F') &&
3441 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3442 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3443 SKIP(6);
3444 val = XML_ATTRIBUTE_FIXED;
3445 if (!IS_BLANK(CUR)) {
3446 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3448 ctxt->sax->error(ctxt->userData,
3449 "Space required after '#FIXED'\n");
3450 ctxt->wellFormed = 0;
3451 ctxt->disableSAX = 1;
3452 }
3453 SKIP_BLANKS;
3454 }
3455 ret = xmlParseAttValue(ctxt);
3456 ctxt->instate = XML_PARSER_DTD;
3457 if (ret == NULL) {
3458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3459 ctxt->sax->error(ctxt->userData,
3460 "Attribute default value declaration error\n");
3461 ctxt->wellFormed = 0;
3462 ctxt->disableSAX = 1;
3463 } else
3464 *value = ret;
3465 return(val);
3466}
3467
3468/**
3469 * xmlParseNotationType:
3470 * @ctxt: an XML parser context
3471 *
3472 * parse an Notation attribute type.
3473 *
3474 * Note: the leading 'NOTATION' S part has already being parsed...
3475 *
3476 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3477 *
3478 * [ VC: Notation Attributes ]
3479 * Values of this type must match one of the notation names included
3480 * in the declaration; all notation names in the declaration must be declared.
3481 *
3482 * Returns: the notation attribute tree built while parsing
3483 */
3484
3485xmlEnumerationPtr
3486xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3487 xmlChar *name;
3488 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3489
3490 if (RAW != '(') {
3491 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3493 ctxt->sax->error(ctxt->userData,
3494 "'(' required to start 'NOTATION'\n");
3495 ctxt->wellFormed = 0;
3496 ctxt->disableSAX = 1;
3497 return(NULL);
3498 }
3499 SHRINK;
3500 do {
3501 NEXT;
3502 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003503 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003504 if (name == NULL) {
3505 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3507 ctxt->sax->error(ctxt->userData,
3508 "Name expected in NOTATION declaration\n");
3509 ctxt->wellFormed = 0;
3510 ctxt->disableSAX = 1;
3511 return(ret);
3512 }
3513 cur = xmlCreateEnumeration(name);
3514 xmlFree(name);
3515 if (cur == NULL) return(ret);
3516 if (last == NULL) ret = last = cur;
3517 else {
3518 last->next = cur;
3519 last = cur;
3520 }
3521 SKIP_BLANKS;
3522 } while (RAW == '|');
3523 if (RAW != ')') {
3524 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3526 ctxt->sax->error(ctxt->userData,
3527 "')' required to finish NOTATION declaration\n");
3528 ctxt->wellFormed = 0;
3529 ctxt->disableSAX = 1;
3530 if ((last != NULL) && (last != ret))
3531 xmlFreeEnumeration(last);
3532 return(ret);
3533 }
3534 NEXT;
3535 return(ret);
3536}
3537
3538/**
3539 * xmlParseEnumerationType:
3540 * @ctxt: an XML parser context
3541 *
3542 * parse an Enumeration attribute type.
3543 *
3544 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3545 *
3546 * [ VC: Enumeration ]
3547 * Values of this type must match one of the Nmtoken tokens in
3548 * the declaration
3549 *
3550 * Returns: the enumeration attribute tree built while parsing
3551 */
3552
3553xmlEnumerationPtr
3554xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3555 xmlChar *name;
3556 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3557
3558 if (RAW != '(') {
3559 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3561 ctxt->sax->error(ctxt->userData,
3562 "'(' required to start ATTLIST enumeration\n");
3563 ctxt->wellFormed = 0;
3564 ctxt->disableSAX = 1;
3565 return(NULL);
3566 }
3567 SHRINK;
3568 do {
3569 NEXT;
3570 SKIP_BLANKS;
3571 name = xmlParseNmtoken(ctxt);
3572 if (name == NULL) {
3573 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3575 ctxt->sax->error(ctxt->userData,
3576 "NmToken expected in ATTLIST enumeration\n");
3577 ctxt->wellFormed = 0;
3578 ctxt->disableSAX = 1;
3579 return(ret);
3580 }
3581 cur = xmlCreateEnumeration(name);
3582 xmlFree(name);
3583 if (cur == NULL) return(ret);
3584 if (last == NULL) ret = last = cur;
3585 else {
3586 last->next = cur;
3587 last = cur;
3588 }
3589 SKIP_BLANKS;
3590 } while (RAW == '|');
3591 if (RAW != ')') {
3592 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3594 ctxt->sax->error(ctxt->userData,
3595 "')' required to finish ATTLIST enumeration\n");
3596 ctxt->wellFormed = 0;
3597 ctxt->disableSAX = 1;
3598 return(ret);
3599 }
3600 NEXT;
3601 return(ret);
3602}
3603
3604/**
3605 * xmlParseEnumeratedType:
3606 * @ctxt: an XML parser context
3607 * @tree: the enumeration tree built while parsing
3608 *
3609 * parse an Enumerated attribute type.
3610 *
3611 * [57] EnumeratedType ::= NotationType | Enumeration
3612 *
3613 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3614 *
3615 *
3616 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3617 */
3618
3619int
3620xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3621 if ((RAW == 'N') && (NXT(1) == 'O') &&
3622 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3623 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3624 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3625 SKIP(8);
3626 if (!IS_BLANK(CUR)) {
3627 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3629 ctxt->sax->error(ctxt->userData,
3630 "Space required after 'NOTATION'\n");
3631 ctxt->wellFormed = 0;
3632 ctxt->disableSAX = 1;
3633 return(0);
3634 }
3635 SKIP_BLANKS;
3636 *tree = xmlParseNotationType(ctxt);
3637 if (*tree == NULL) return(0);
3638 return(XML_ATTRIBUTE_NOTATION);
3639 }
3640 *tree = xmlParseEnumerationType(ctxt);
3641 if (*tree == NULL) return(0);
3642 return(XML_ATTRIBUTE_ENUMERATION);
3643}
3644
3645/**
3646 * xmlParseAttributeType:
3647 * @ctxt: an XML parser context
3648 * @tree: the enumeration tree built while parsing
3649 *
3650 * parse the Attribute list def for an element
3651 *
3652 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3653 *
3654 * [55] StringType ::= 'CDATA'
3655 *
3656 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3657 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3658 *
3659 * Validity constraints for attribute values syntax are checked in
3660 * xmlValidateAttributeValue()
3661 *
3662 * [ VC: ID ]
3663 * Values of type ID must match the Name production. A name must not
3664 * appear more than once in an XML document as a value of this type;
3665 * i.e., ID values must uniquely identify the elements which bear them.
3666 *
3667 * [ VC: One ID per Element Type ]
3668 * No element type may have more than one ID attribute specified.
3669 *
3670 * [ VC: ID Attribute Default ]
3671 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3672 *
3673 * [ VC: IDREF ]
3674 * Values of type IDREF must match the Name production, and values
3675 * of type IDREFS must match Names; each IDREF Name must match the value
3676 * of an ID attribute on some element in the XML document; i.e. IDREF
3677 * values must match the value of some ID attribute.
3678 *
3679 * [ VC: Entity Name ]
3680 * Values of type ENTITY must match the Name production, values
3681 * of type ENTITIES must match Names; each Entity Name must match the
3682 * name of an unparsed entity declared in the DTD.
3683 *
3684 * [ VC: Name Token ]
3685 * Values of type NMTOKEN must match the Nmtoken production; values
3686 * of type NMTOKENS must match Nmtokens.
3687 *
3688 * Returns the attribute type
3689 */
3690int
3691xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3692 SHRINK;
3693 if ((RAW == 'C') && (NXT(1) == 'D') &&
3694 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3695 (NXT(4) == 'A')) {
3696 SKIP(5);
3697 return(XML_ATTRIBUTE_CDATA);
3698 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3699 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3700 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3701 SKIP(6);
3702 return(XML_ATTRIBUTE_IDREFS);
3703 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3704 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3705 (NXT(4) == 'F')) {
3706 SKIP(5);
3707 return(XML_ATTRIBUTE_IDREF);
3708 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3709 SKIP(2);
3710 return(XML_ATTRIBUTE_ID);
3711 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3712 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3713 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3714 SKIP(6);
3715 return(XML_ATTRIBUTE_ENTITY);
3716 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3717 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3718 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3719 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3720 SKIP(8);
3721 return(XML_ATTRIBUTE_ENTITIES);
3722 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3723 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3724 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3725 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3726 SKIP(8);
3727 return(XML_ATTRIBUTE_NMTOKENS);
3728 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3729 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3730 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3731 (NXT(6) == 'N')) {
3732 SKIP(7);
3733 return(XML_ATTRIBUTE_NMTOKEN);
3734 }
3735 return(xmlParseEnumeratedType(ctxt, tree));
3736}
3737
3738/**
3739 * xmlParseAttributeListDecl:
3740 * @ctxt: an XML parser context
3741 *
3742 * : parse the Attribute list def for an element
3743 *
3744 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3745 *
3746 * [53] AttDef ::= S Name S AttType S DefaultDecl
3747 *
3748 */
3749void
3750xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3751 xmlChar *elemName;
3752 xmlChar *attrName;
3753 xmlEnumerationPtr tree;
3754
3755 if ((RAW == '<') && (NXT(1) == '!') &&
3756 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3757 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3758 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3759 (NXT(8) == 'T')) {
3760 xmlParserInputPtr input = ctxt->input;
3761
3762 SKIP(9);
3763 if (!IS_BLANK(CUR)) {
3764 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3766 ctxt->sax->error(ctxt->userData,
3767 "Space required after '<!ATTLIST'\n");
3768 ctxt->wellFormed = 0;
3769 ctxt->disableSAX = 1;
3770 }
3771 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003772 elemName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003773 if (elemName == NULL) {
3774 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3776 ctxt->sax->error(ctxt->userData,
3777 "ATTLIST: no name for Element\n");
3778 ctxt->wellFormed = 0;
3779 ctxt->disableSAX = 1;
3780 return;
3781 }
3782 SKIP_BLANKS;
3783 GROW;
3784 while (RAW != '>') {
3785 const xmlChar *check = CUR_PTR;
3786 int type;
3787 int def;
3788 xmlChar *defaultValue = NULL;
3789
3790 GROW;
3791 tree = NULL;
Daniel Veillard29631a82001-03-05 09:49:20 +00003792 attrName = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003793 if (attrName == NULL) {
3794 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3796 ctxt->sax->error(ctxt->userData,
3797 "ATTLIST: no name for Attribute\n");
3798 ctxt->wellFormed = 0;
3799 ctxt->disableSAX = 1;
3800 break;
3801 }
3802 GROW;
3803 if (!IS_BLANK(CUR)) {
3804 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3806 ctxt->sax->error(ctxt->userData,
3807 "Space required after the attribute name\n");
3808 ctxt->wellFormed = 0;
3809 ctxt->disableSAX = 1;
3810 if (attrName != NULL)
3811 xmlFree(attrName);
3812 if (defaultValue != NULL)
3813 xmlFree(defaultValue);
3814 break;
3815 }
3816 SKIP_BLANKS;
3817
3818 type = xmlParseAttributeType(ctxt, &tree);
3819 if (type <= 0) {
3820 if (attrName != NULL)
3821 xmlFree(attrName);
3822 if (defaultValue != NULL)
3823 xmlFree(defaultValue);
3824 break;
3825 }
3826
3827 GROW;
3828 if (!IS_BLANK(CUR)) {
3829 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3831 ctxt->sax->error(ctxt->userData,
3832 "Space required after the attribute type\n");
3833 ctxt->wellFormed = 0;
3834 ctxt->disableSAX = 1;
3835 if (attrName != NULL)
3836 xmlFree(attrName);
3837 if (defaultValue != NULL)
3838 xmlFree(defaultValue);
3839 if (tree != NULL)
3840 xmlFreeEnumeration(tree);
3841 break;
3842 }
3843 SKIP_BLANKS;
3844
3845 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3846 if (def <= 0) {
3847 if (attrName != NULL)
3848 xmlFree(attrName);
3849 if (defaultValue != NULL)
3850 xmlFree(defaultValue);
3851 if (tree != NULL)
3852 xmlFreeEnumeration(tree);
3853 break;
3854 }
3855
3856 GROW;
3857 if (RAW != '>') {
3858 if (!IS_BLANK(CUR)) {
3859 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3861 ctxt->sax->error(ctxt->userData,
3862 "Space required after the attribute default value\n");
3863 ctxt->wellFormed = 0;
3864 ctxt->disableSAX = 1;
3865 if (attrName != NULL)
3866 xmlFree(attrName);
3867 if (defaultValue != NULL)
3868 xmlFree(defaultValue);
3869 if (tree != NULL)
3870 xmlFreeEnumeration(tree);
3871 break;
3872 }
3873 SKIP_BLANKS;
3874 }
3875 if (check == CUR_PTR) {
3876 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3878 ctxt->sax->error(ctxt->userData,
3879 "xmlParseAttributeListDecl: detected internal error\n");
3880 if (attrName != NULL)
3881 xmlFree(attrName);
3882 if (defaultValue != NULL)
3883 xmlFree(defaultValue);
3884 if (tree != NULL)
3885 xmlFreeEnumeration(tree);
3886 break;
3887 }
3888 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3889 (ctxt->sax->attributeDecl != NULL))
3890 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3891 type, def, defaultValue, tree);
3892 if (attrName != NULL)
3893 xmlFree(attrName);
3894 if (defaultValue != NULL)
3895 xmlFree(defaultValue);
3896 GROW;
3897 }
3898 if (RAW == '>') {
3899 if (input != ctxt->input) {
3900 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3902 ctxt->sax->error(ctxt->userData,
3903"Attribute list declaration doesn't start and stop in the same entity\n");
3904 ctxt->wellFormed = 0;
3905 ctxt->disableSAX = 1;
3906 }
3907 NEXT;
3908 }
3909
3910 xmlFree(elemName);
3911 }
3912}
3913
3914/**
3915 * xmlParseElementMixedContentDecl:
3916 * @ctxt: an XML parser context
3917 *
3918 * parse the declaration for a Mixed Element content
3919 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3920 *
3921 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3922 * '(' S? '#PCDATA' S? ')'
3923 *
3924 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3925 *
3926 * [ VC: No Duplicate Types ]
3927 * The same name must not appear more than once in a single
3928 * mixed-content declaration.
3929 *
3930 * returns: the list of the xmlElementContentPtr describing the element choices
3931 */
3932xmlElementContentPtr
3933xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3934 xmlElementContentPtr ret = NULL, cur = NULL, n;
3935 xmlChar *elem = NULL;
3936
3937 GROW;
3938 if ((RAW == '#') && (NXT(1) == 'P') &&
3939 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3940 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3941 (NXT(6) == 'A')) {
3942 SKIP(7);
3943 SKIP_BLANKS;
3944 SHRINK;
3945 if (RAW == ')') {
3946 ctxt->entity = ctxt->input;
3947 NEXT;
3948 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3949 if (RAW == '*') {
3950 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3951 NEXT;
3952 }
3953 return(ret);
3954 }
3955 if ((RAW == '(') || (RAW == '|')) {
3956 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3957 if (ret == NULL) return(NULL);
3958 }
3959 while (RAW == '|') {
3960 NEXT;
3961 if (elem == NULL) {
3962 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3963 if (ret == NULL) return(NULL);
3964 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003965 if (cur != NULL)
3966 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003967 cur = ret;
3968 } else {
3969 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3970 if (n == NULL) return(NULL);
3971 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003972 if (n->c1 != NULL)
3973 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00003974 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003975 if (n != NULL)
3976 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003977 cur = n;
3978 xmlFree(elem);
3979 }
3980 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00003981 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003982 if (elem == NULL) {
3983 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3985 ctxt->sax->error(ctxt->userData,
3986 "xmlParseElementMixedContentDecl : Name expected\n");
3987 ctxt->wellFormed = 0;
3988 ctxt->disableSAX = 1;
3989 xmlFreeElementContent(cur);
3990 return(NULL);
3991 }
3992 SKIP_BLANKS;
3993 GROW;
3994 }
3995 if ((RAW == ')') && (NXT(1) == '*')) {
3996 if (elem != NULL) {
3997 cur->c2 = xmlNewElementContent(elem,
3998 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003999 if (cur->c2 != NULL)
4000 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004001 xmlFree(elem);
4002 }
4003 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4004 ctxt->entity = ctxt->input;
4005 SKIP(2);
4006 } else {
4007 if (elem != NULL) xmlFree(elem);
4008 xmlFreeElementContent(ret);
4009 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4011 ctxt->sax->error(ctxt->userData,
4012 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4013 ctxt->wellFormed = 0;
4014 ctxt->disableSAX = 1;
4015 return(NULL);
4016 }
4017
4018 } else {
4019 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4021 ctxt->sax->error(ctxt->userData,
4022 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4023 ctxt->wellFormed = 0;
4024 ctxt->disableSAX = 1;
4025 }
4026 return(ret);
4027}
4028
4029/**
4030 * xmlParseElementChildrenContentDecl:
4031 * @ctxt: an XML parser context
4032 *
4033 * parse the declaration for a Mixed Element content
4034 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4035 *
4036 *
4037 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4038 *
4039 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4040 *
4041 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4042 *
4043 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4044 *
4045 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4046 * TODO Parameter-entity replacement text must be properly nested
4047 * with parenthetized groups. That is to say, if either of the
4048 * opening or closing parentheses in a choice, seq, or Mixed
4049 * construct is contained in the replacement text for a parameter
4050 * entity, both must be contained in the same replacement text. For
4051 * interoperability, if a parameter-entity reference appears in a
4052 * choice, seq, or Mixed construct, its replacement text should not
4053 * be empty, and neither the first nor last non-blank character of
4054 * the replacement text should be a connector (| or ,).
4055 *
4056 * returns: the tree of xmlElementContentPtr describing the element
4057 * hierarchy.
4058 */
4059xmlElementContentPtr
4060#ifdef VMS
4061xmlParseElementChildrenContentD
4062#else
4063xmlParseElementChildrenContentDecl
4064#endif
4065(xmlParserCtxtPtr ctxt) {
4066 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4067 xmlChar *elem;
4068 xmlChar type = 0;
4069
4070 SKIP_BLANKS;
4071 GROW;
4072 if (RAW == '(') {
4073 /* Recurse on first child */
4074 NEXT;
4075 SKIP_BLANKS;
4076 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4077 SKIP_BLANKS;
4078 GROW;
4079 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004080 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004081 if (elem == NULL) {
4082 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4084 ctxt->sax->error(ctxt->userData,
4085 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4086 ctxt->wellFormed = 0;
4087 ctxt->disableSAX = 1;
4088 return(NULL);
4089 }
4090 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4091 GROW;
4092 if (RAW == '?') {
4093 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4094 NEXT;
4095 } else if (RAW == '*') {
4096 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4097 NEXT;
4098 } else if (RAW == '+') {
4099 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4100 NEXT;
4101 } else {
4102 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4103 }
4104 xmlFree(elem);
4105 GROW;
4106 }
4107 SKIP_BLANKS;
4108 SHRINK;
4109 while (RAW != ')') {
4110 /*
4111 * Each loop we parse one separator and one element.
4112 */
4113 if (RAW == ',') {
4114 if (type == 0) type = CUR;
4115
4116 /*
4117 * Detect "Name | Name , Name" error
4118 */
4119 else if (type != CUR) {
4120 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4122 ctxt->sax->error(ctxt->userData,
4123 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4124 type);
4125 ctxt->wellFormed = 0;
4126 ctxt->disableSAX = 1;
4127 if ((op != NULL) && (op != ret))
4128 xmlFreeElementContent(op);
4129 if ((last != NULL) && (last != ret) &&
4130 (last != ret->c1) && (last != ret->c2))
4131 xmlFreeElementContent(last);
4132 if (ret != NULL)
4133 xmlFreeElementContent(ret);
4134 return(NULL);
4135 }
4136 NEXT;
4137
4138 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4139 if (op == NULL) {
4140 xmlFreeElementContent(ret);
4141 return(NULL);
4142 }
4143 if (last == NULL) {
4144 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004145 if (ret != NULL)
4146 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004147 ret = cur = op;
4148 } else {
4149 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004150 if (op != NULL)
4151 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004152 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004153 if (last != NULL)
4154 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004155 cur =op;
4156 last = NULL;
4157 }
4158 } else if (RAW == '|') {
4159 if (type == 0) type = CUR;
4160
4161 /*
4162 * Detect "Name , Name | Name" error
4163 */
4164 else if (type != CUR) {
4165 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4167 ctxt->sax->error(ctxt->userData,
4168 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4169 type);
4170 ctxt->wellFormed = 0;
4171 ctxt->disableSAX = 1;
4172 if ((op != NULL) && (op != ret) && (op != last))
4173 xmlFreeElementContent(op);
4174 if ((last != NULL) && (last != ret) &&
4175 (last != ret->c1) && (last != ret->c2))
4176 xmlFreeElementContent(last);
4177 if (ret != NULL)
4178 xmlFreeElementContent(ret);
4179 return(NULL);
4180 }
4181 NEXT;
4182
4183 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4184 if (op == NULL) {
4185 if ((op != NULL) && (op != ret))
4186 xmlFreeElementContent(op);
4187 if ((last != NULL) && (last != ret) &&
4188 (last != ret->c1) && (last != ret->c2))
4189 xmlFreeElementContent(last);
4190 if (ret != NULL)
4191 xmlFreeElementContent(ret);
4192 return(NULL);
4193 }
4194 if (last == NULL) {
4195 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004196 if (ret != NULL)
4197 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004198 ret = cur = op;
4199 } else {
4200 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004201 if (op != NULL)
4202 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004203 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004204 if (last != NULL)
4205 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004206 cur =op;
4207 last = NULL;
4208 }
4209 } else {
4210 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4212 ctxt->sax->error(ctxt->userData,
4213 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4214 ctxt->wellFormed = 0;
4215 ctxt->disableSAX = 1;
4216 if ((op != NULL) && (op != ret))
4217 xmlFreeElementContent(op);
4218 if ((last != NULL) && (last != ret) &&
4219 (last != ret->c1) && (last != ret->c2))
4220 xmlFreeElementContent(last);
4221 if (ret != NULL)
4222 xmlFreeElementContent(ret);
4223 return(NULL);
4224 }
4225 GROW;
4226 SKIP_BLANKS;
4227 GROW;
4228 if (RAW == '(') {
4229 /* Recurse on second child */
4230 NEXT;
4231 SKIP_BLANKS;
4232 last = xmlParseElementChildrenContentDecl(ctxt);
4233 SKIP_BLANKS;
4234 } else {
Daniel Veillard29631a82001-03-05 09:49:20 +00004235 elem = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004236 if (elem == NULL) {
4237 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4239 ctxt->sax->error(ctxt->userData,
4240 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4241 ctxt->wellFormed = 0;
4242 ctxt->disableSAX = 1;
4243 if ((op != NULL) && (op != ret))
4244 xmlFreeElementContent(op);
4245 if ((last != NULL) && (last != ret) &&
4246 (last != ret->c1) && (last != ret->c2))
4247 xmlFreeElementContent(last);
4248 if (ret != NULL)
4249 xmlFreeElementContent(ret);
4250 return(NULL);
4251 }
4252 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4253 xmlFree(elem);
4254 if (RAW == '?') {
4255 last->ocur = XML_ELEMENT_CONTENT_OPT;
4256 NEXT;
4257 } else if (RAW == '*') {
4258 last->ocur = XML_ELEMENT_CONTENT_MULT;
4259 NEXT;
4260 } else if (RAW == '+') {
4261 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4262 NEXT;
4263 } else {
4264 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4265 }
4266 }
4267 SKIP_BLANKS;
4268 GROW;
4269 }
4270 if ((cur != NULL) && (last != NULL)) {
4271 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004272 if (last != NULL)
4273 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004274 }
4275 ctxt->entity = ctxt->input;
4276 NEXT;
4277 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004278 if (ret != NULL)
4279 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004280 NEXT;
4281 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004282 if (ret != NULL)
4283 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004284 NEXT;
4285 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004286 if (ret != NULL)
4287 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004288 NEXT;
4289 }
4290 return(ret);
4291}
4292
4293/**
4294 * xmlParseElementContentDecl:
4295 * @ctxt: an XML parser context
4296 * @name: the name of the element being defined.
4297 * @result: the Element Content pointer will be stored here if any
4298 *
4299 * parse the declaration for an Element content either Mixed or Children,
4300 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4301 *
4302 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4303 *
4304 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4305 */
4306
4307int
4308xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4309 xmlElementContentPtr *result) {
4310
4311 xmlElementContentPtr tree = NULL;
4312 xmlParserInputPtr input = ctxt->input;
4313 int res;
4314
4315 *result = NULL;
4316
4317 if (RAW != '(') {
4318 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4320 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004321 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004322 ctxt->wellFormed = 0;
4323 ctxt->disableSAX = 1;
4324 return(-1);
4325 }
4326 NEXT;
4327 GROW;
4328 SKIP_BLANKS;
4329 if ((RAW == '#') && (NXT(1) == 'P') &&
4330 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4331 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4332 (NXT(6) == 'A')) {
4333 tree = xmlParseElementMixedContentDecl(ctxt);
4334 res = XML_ELEMENT_TYPE_MIXED;
4335 } else {
4336 tree = xmlParseElementChildrenContentDecl(ctxt);
4337 res = XML_ELEMENT_TYPE_ELEMENT;
4338 }
4339 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4340 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4341 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4342 ctxt->sax->error(ctxt->userData,
4343"Element content declaration doesn't start and stop in the same entity\n");
4344 ctxt->wellFormed = 0;
4345 ctxt->disableSAX = 1;
4346 }
4347 SKIP_BLANKS;
4348 *result = tree;
4349 return(res);
4350}
4351
4352/**
4353 * xmlParseElementDecl:
4354 * @ctxt: an XML parser context
4355 *
4356 * parse an Element declaration.
4357 *
4358 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4359 *
4360 * [ VC: Unique Element Type Declaration ]
4361 * No element type may be declared more than once
4362 *
4363 * Returns the type of the element, or -1 in case of error
4364 */
4365int
4366xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4367 xmlChar *name;
4368 int ret = -1;
4369 xmlElementContentPtr content = NULL;
4370
4371 GROW;
4372 if ((RAW == '<') && (NXT(1) == '!') &&
4373 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4374 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4375 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4376 (NXT(8) == 'T')) {
4377 xmlParserInputPtr input = ctxt->input;
4378
4379 SKIP(9);
4380 if (!IS_BLANK(CUR)) {
4381 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4383 ctxt->sax->error(ctxt->userData,
4384 "Space required after 'ELEMENT'\n");
4385 ctxt->wellFormed = 0;
4386 ctxt->disableSAX = 1;
4387 }
4388 SKIP_BLANKS;
Daniel Veillard29631a82001-03-05 09:49:20 +00004389 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004390 if (name == NULL) {
4391 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4393 ctxt->sax->error(ctxt->userData,
4394 "xmlParseElementDecl: no name for Element\n");
4395 ctxt->wellFormed = 0;
4396 ctxt->disableSAX = 1;
4397 return(-1);
4398 }
4399 while ((RAW == 0) && (ctxt->inputNr > 1))
4400 xmlPopInput(ctxt);
4401 if (!IS_BLANK(CUR)) {
4402 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4404 ctxt->sax->error(ctxt->userData,
4405 "Space required after the element name\n");
4406 ctxt->wellFormed = 0;
4407 ctxt->disableSAX = 1;
4408 }
4409 SKIP_BLANKS;
4410 if ((RAW == 'E') && (NXT(1) == 'M') &&
4411 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4412 (NXT(4) == 'Y')) {
4413 SKIP(5);
4414 /*
4415 * Element must always be empty.
4416 */
4417 ret = XML_ELEMENT_TYPE_EMPTY;
4418 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4419 (NXT(2) == 'Y')) {
4420 SKIP(3);
4421 /*
4422 * Element is a generic container.
4423 */
4424 ret = XML_ELEMENT_TYPE_ANY;
4425 } else if (RAW == '(') {
4426 ret = xmlParseElementContentDecl(ctxt, name, &content);
4427 } else {
4428 /*
4429 * [ WFC: PEs in Internal Subset ] error handling.
4430 */
4431 if ((RAW == '%') && (ctxt->external == 0) &&
4432 (ctxt->inputNr == 1)) {
4433 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4434 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4435 ctxt->sax->error(ctxt->userData,
4436 "PEReference: forbidden within markup decl in internal subset\n");
4437 } else {
4438 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4439 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4440 ctxt->sax->error(ctxt->userData,
4441 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4442 }
4443 ctxt->wellFormed = 0;
4444 ctxt->disableSAX = 1;
4445 if (name != NULL) xmlFree(name);
4446 return(-1);
4447 }
4448
4449 SKIP_BLANKS;
4450 /*
4451 * Pop-up of finished entities.
4452 */
4453 while ((RAW == 0) && (ctxt->inputNr > 1))
4454 xmlPopInput(ctxt);
4455 SKIP_BLANKS;
4456
4457 if (RAW != '>') {
4458 ctxt->errNo = XML_ERR_GT_REQUIRED;
4459 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4460 ctxt->sax->error(ctxt->userData,
4461 "xmlParseElementDecl: expected '>' at the end\n");
4462 ctxt->wellFormed = 0;
4463 ctxt->disableSAX = 1;
4464 } else {
4465 if (input != ctxt->input) {
4466 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4468 ctxt->sax->error(ctxt->userData,
4469"Element declaration doesn't start and stop in the same entity\n");
4470 ctxt->wellFormed = 0;
4471 ctxt->disableSAX = 1;
4472 }
4473
4474 NEXT;
4475 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4476 (ctxt->sax->elementDecl != NULL))
4477 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4478 content);
4479 }
4480 if (content != NULL) {
4481 xmlFreeElementContent(content);
4482 }
4483 if (name != NULL) {
4484 xmlFree(name);
4485 }
4486 }
4487 return(ret);
4488}
4489
4490/**
4491 * xmlParseMarkupDecl:
4492 * @ctxt: an XML parser context
4493 *
4494 * parse Markup declarations
4495 *
4496 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4497 * NotationDecl | PI | Comment
4498 *
4499 * [ VC: Proper Declaration/PE Nesting ]
4500 * Parameter-entity replacement text must be properly nested with
4501 * markup declarations. That is to say, if either the first character
4502 * or the last character of a markup declaration (markupdecl above) is
4503 * contained in the replacement text for a parameter-entity reference,
4504 * both must be contained in the same replacement text.
4505 *
4506 * [ WFC: PEs in Internal Subset ]
4507 * In the internal DTD subset, parameter-entity references can occur
4508 * only where markup declarations can occur, not within markup declarations.
4509 * (This does not apply to references that occur in external parameter
4510 * entities or to the external subset.)
4511 */
4512void
4513xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4514 GROW;
4515 xmlParseElementDecl(ctxt);
4516 xmlParseAttributeListDecl(ctxt);
4517 xmlParseEntityDecl(ctxt);
4518 xmlParseNotationDecl(ctxt);
4519 xmlParsePI(ctxt);
4520 xmlParseComment(ctxt);
4521 /*
4522 * This is only for internal subset. On external entities,
4523 * the replacement is done before parsing stage
4524 */
4525 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4526 xmlParsePEReference(ctxt);
4527 ctxt->instate = XML_PARSER_DTD;
4528}
4529
4530/**
4531 * xmlParseTextDecl:
4532 * @ctxt: an XML parser context
4533 *
4534 * parse an XML declaration header for external entities
4535 *
4536 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4537 *
4538 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4539 */
4540
4541void
4542xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4543 xmlChar *version;
4544
4545 /*
4546 * We know that '<?xml' is here.
4547 */
4548 if ((RAW == '<') && (NXT(1) == '?') &&
4549 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4550 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4551 SKIP(5);
4552 } else {
4553 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4555 ctxt->sax->error(ctxt->userData,
4556 "Text declaration '<?xml' required\n");
4557 ctxt->wellFormed = 0;
4558 ctxt->disableSAX = 1;
4559
4560 return;
4561 }
4562
4563 if (!IS_BLANK(CUR)) {
4564 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4565 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4566 ctxt->sax->error(ctxt->userData,
4567 "Space needed after '<?xml'\n");
4568 ctxt->wellFormed = 0;
4569 ctxt->disableSAX = 1;
4570 }
4571 SKIP_BLANKS;
4572
4573 /*
4574 * We may have the VersionInfo here.
4575 */
4576 version = xmlParseVersionInfo(ctxt);
4577 if (version == NULL)
4578 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4579 ctxt->input->version = version;
4580
4581 /*
4582 * We must have the encoding declaration
4583 */
4584 if (!IS_BLANK(CUR)) {
4585 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4587 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4588 ctxt->wellFormed = 0;
4589 ctxt->disableSAX = 1;
4590 }
4591 xmlParseEncodingDecl(ctxt);
4592 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4593 /*
4594 * The XML REC instructs us to stop parsing right here
4595 */
4596 return;
4597 }
4598
4599 SKIP_BLANKS;
4600 if ((RAW == '?') && (NXT(1) == '>')) {
4601 SKIP(2);
4602 } else if (RAW == '>') {
4603 /* Deprecated old WD ... */
4604 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4606 ctxt->sax->error(ctxt->userData,
4607 "XML declaration must end-up with '?>'\n");
4608 ctxt->wellFormed = 0;
4609 ctxt->disableSAX = 1;
4610 NEXT;
4611 } else {
4612 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4614 ctxt->sax->error(ctxt->userData,
4615 "parsing XML declaration: '?>' expected\n");
4616 ctxt->wellFormed = 0;
4617 ctxt->disableSAX = 1;
4618 MOVETO_ENDTAG(CUR_PTR);
4619 NEXT;
4620 }
4621}
4622
4623/*
4624 * xmlParseConditionalSections
4625 * @ctxt: an XML parser context
4626 *
4627 * [61] conditionalSect ::= includeSect | ignoreSect
4628 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4629 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4630 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4631 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4632 */
4633
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004634static void
Owen Taylor3473f882001-02-23 17:55:21 +00004635xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4636 SKIP(3);
4637 SKIP_BLANKS;
4638 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4639 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4640 (NXT(6) == 'E')) {
4641 SKIP(7);
4642 SKIP_BLANKS;
4643 if (RAW != '[') {
4644 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4646 ctxt->sax->error(ctxt->userData,
4647 "XML conditional section '[' expected\n");
4648 ctxt->wellFormed = 0;
4649 ctxt->disableSAX = 1;
4650 } else {
4651 NEXT;
4652 }
4653 if (xmlParserDebugEntities) {
4654 if ((ctxt->input != NULL) && (ctxt->input->filename))
4655 xmlGenericError(xmlGenericErrorContext,
4656 "%s(%d): ", ctxt->input->filename,
4657 ctxt->input->line);
4658 xmlGenericError(xmlGenericErrorContext,
4659 "Entering INCLUDE Conditional Section\n");
4660 }
4661
4662 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4663 (NXT(2) != '>'))) {
4664 const xmlChar *check = CUR_PTR;
4665 int cons = ctxt->input->consumed;
4666 int tok = ctxt->token;
4667
4668 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4669 xmlParseConditionalSections(ctxt);
4670 } else if (IS_BLANK(CUR)) {
4671 NEXT;
4672 } else if (RAW == '%') {
4673 xmlParsePEReference(ctxt);
4674 } else
4675 xmlParseMarkupDecl(ctxt);
4676
4677 /*
4678 * Pop-up of finished entities.
4679 */
4680 while ((RAW == 0) && (ctxt->inputNr > 1))
4681 xmlPopInput(ctxt);
4682
4683 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4684 (tok == ctxt->token)) {
4685 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4687 ctxt->sax->error(ctxt->userData,
4688 "Content error in the external subset\n");
4689 ctxt->wellFormed = 0;
4690 ctxt->disableSAX = 1;
4691 break;
4692 }
4693 }
4694 if (xmlParserDebugEntities) {
4695 if ((ctxt->input != NULL) && (ctxt->input->filename))
4696 xmlGenericError(xmlGenericErrorContext,
4697 "%s(%d): ", ctxt->input->filename,
4698 ctxt->input->line);
4699 xmlGenericError(xmlGenericErrorContext,
4700 "Leaving INCLUDE Conditional Section\n");
4701 }
4702
4703 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4704 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4705 int state;
4706 int instate;
4707 int depth = 0;
4708
4709 SKIP(6);
4710 SKIP_BLANKS;
4711 if (RAW != '[') {
4712 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4714 ctxt->sax->error(ctxt->userData,
4715 "XML conditional section '[' expected\n");
4716 ctxt->wellFormed = 0;
4717 ctxt->disableSAX = 1;
4718 } else {
4719 NEXT;
4720 }
4721 if (xmlParserDebugEntities) {
4722 if ((ctxt->input != NULL) && (ctxt->input->filename))
4723 xmlGenericError(xmlGenericErrorContext,
4724 "%s(%d): ", ctxt->input->filename,
4725 ctxt->input->line);
4726 xmlGenericError(xmlGenericErrorContext,
4727 "Entering IGNORE Conditional Section\n");
4728 }
4729
4730 /*
4731 * Parse up to the end of the conditionnal section
4732 * But disable SAX event generating DTD building in the meantime
4733 */
4734 state = ctxt->disableSAX;
4735 instate = ctxt->instate;
4736 ctxt->disableSAX = 1;
4737 ctxt->instate = XML_PARSER_IGNORE;
4738
4739 while (depth >= 0) {
4740 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4741 depth++;
4742 SKIP(3);
4743 continue;
4744 }
4745 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4746 if (--depth >= 0) SKIP(3);
4747 continue;
4748 }
4749 NEXT;
4750 continue;
4751 }
4752
4753 ctxt->disableSAX = state;
4754 ctxt->instate = instate;
4755
4756 if (xmlParserDebugEntities) {
4757 if ((ctxt->input != NULL) && (ctxt->input->filename))
4758 xmlGenericError(xmlGenericErrorContext,
4759 "%s(%d): ", ctxt->input->filename,
4760 ctxt->input->line);
4761 xmlGenericError(xmlGenericErrorContext,
4762 "Leaving IGNORE Conditional Section\n");
4763 }
4764
4765 } else {
4766 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4767 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4768 ctxt->sax->error(ctxt->userData,
4769 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4770 ctxt->wellFormed = 0;
4771 ctxt->disableSAX = 1;
4772 }
4773
4774 if (RAW == 0)
4775 SHRINK;
4776
4777 if (RAW == 0) {
4778 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4779 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4780 ctxt->sax->error(ctxt->userData,
4781 "XML conditional section not closed\n");
4782 ctxt->wellFormed = 0;
4783 ctxt->disableSAX = 1;
4784 } else {
4785 SKIP(3);
4786 }
4787}
4788
4789/**
4790 * xmlParseExternalSubset:
4791 * @ctxt: an XML parser context
4792 * @ExternalID: the external identifier
4793 * @SystemID: the system identifier (or URL)
4794 *
4795 * parse Markup declarations from an external subset
4796 *
4797 * [30] extSubset ::= textDecl? extSubsetDecl
4798 *
4799 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4800 */
4801void
4802xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4803 const xmlChar *SystemID) {
4804 GROW;
4805 if ((RAW == '<') && (NXT(1) == '?') &&
4806 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4807 (NXT(4) == 'l')) {
4808 xmlParseTextDecl(ctxt);
4809 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4810 /*
4811 * The XML REC instructs us to stop parsing right here
4812 */
4813 ctxt->instate = XML_PARSER_EOF;
4814 return;
4815 }
4816 }
4817 if (ctxt->myDoc == NULL) {
4818 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4819 }
4820 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4821 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4822
4823 ctxt->instate = XML_PARSER_DTD;
4824 ctxt->external = 1;
4825 while (((RAW == '<') && (NXT(1) == '?')) ||
4826 ((RAW == '<') && (NXT(1) == '!')) ||
4827 IS_BLANK(CUR)) {
4828 const xmlChar *check = CUR_PTR;
4829 int cons = ctxt->input->consumed;
4830 int tok = ctxt->token;
4831
4832 GROW;
4833 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4834 xmlParseConditionalSections(ctxt);
4835 } else if (IS_BLANK(CUR)) {
4836 NEXT;
4837 } else if (RAW == '%') {
4838 xmlParsePEReference(ctxt);
4839 } else
4840 xmlParseMarkupDecl(ctxt);
4841
4842 /*
4843 * Pop-up of finished entities.
4844 */
4845 while ((RAW == 0) && (ctxt->inputNr > 1))
4846 xmlPopInput(ctxt);
4847
4848 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4849 (tok == ctxt->token)) {
4850 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4852 ctxt->sax->error(ctxt->userData,
4853 "Content error in the external subset\n");
4854 ctxt->wellFormed = 0;
4855 ctxt->disableSAX = 1;
4856 break;
4857 }
4858 }
4859
4860 if (RAW != 0) {
4861 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4863 ctxt->sax->error(ctxt->userData,
4864 "Extra content at the end of the document\n");
4865 ctxt->wellFormed = 0;
4866 ctxt->disableSAX = 1;
4867 }
4868
4869}
4870
4871/**
4872 * xmlParseReference:
4873 * @ctxt: an XML parser context
4874 *
4875 * parse and handle entity references in content, depending on the SAX
4876 * interface, this may end-up in a call to character() if this is a
4877 * CharRef, a predefined entity, if there is no reference() callback.
4878 * or if the parser was asked to switch to that mode.
4879 *
4880 * [67] Reference ::= EntityRef | CharRef
4881 */
4882void
4883xmlParseReference(xmlParserCtxtPtr ctxt) {
4884 xmlEntityPtr ent;
4885 xmlChar *val;
4886 if (RAW != '&') return;
4887
4888 if (NXT(1) == '#') {
4889 int i = 0;
4890 xmlChar out[10];
4891 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004892 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004893
4894 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4895 /*
4896 * So we are using non-UTF-8 buffers
4897 * Check that the char fit on 8bits, if not
4898 * generate a CharRef.
4899 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004900 if (value <= 0xFF) {
4901 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004902 out[1] = 0;
4903 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4904 (!ctxt->disableSAX))
4905 ctxt->sax->characters(ctxt->userData, out, 1);
4906 } else {
4907 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004908 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004909 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004910 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004911 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4912 (!ctxt->disableSAX))
4913 ctxt->sax->reference(ctxt->userData, out);
4914 }
4915 } else {
4916 /*
4917 * Just encode the value in UTF-8
4918 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004919 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00004920 out[i] = 0;
4921 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4922 (!ctxt->disableSAX))
4923 ctxt->sax->characters(ctxt->userData, out, i);
4924 }
4925 } else {
4926 ent = xmlParseEntityRef(ctxt);
4927 if (ent == NULL) return;
4928 if ((ent->name != NULL) &&
4929 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4930 xmlNodePtr list = NULL;
4931 int ret;
4932
4933
4934 /*
4935 * The first reference to the entity trigger a parsing phase
4936 * where the ent->children is filled with the result from
4937 * the parsing.
4938 */
4939 if (ent->children == NULL) {
4940 xmlChar *value;
4941 value = ent->content;
4942
4943 /*
4944 * Check that this entity is well formed
4945 */
4946 if ((value != NULL) &&
4947 (value[1] == 0) && (value[0] == '<') &&
4948 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4949 /*
4950 * DONE: get definite answer on this !!!
4951 * Lots of entity decls are used to declare a single
4952 * char
4953 * <!ENTITY lt "<">
4954 * Which seems to be valid since
4955 * 2.4: The ampersand character (&) and the left angle
4956 * bracket (<) may appear in their literal form only
4957 * when used ... They are also legal within the literal
4958 * entity value of an internal entity declaration;i
4959 * see "4.3.2 Well-Formed Parsed Entities".
4960 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4961 * Looking at the OASIS test suite and James Clark
4962 * tests, this is broken. However the XML REC uses
4963 * it. Is the XML REC not well-formed ????
4964 * This is a hack to avoid this problem
4965 *
4966 * ANSWER: since lt gt amp .. are already defined,
4967 * this is a redefinition and hence the fact that the
4968 * contentis not well balanced is not a Wf error, this
4969 * is lousy but acceptable.
4970 */
4971 list = xmlNewDocText(ctxt->myDoc, value);
4972 if (list != NULL) {
4973 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4974 (ent->children == NULL)) {
4975 ent->children = list;
4976 ent->last = list;
4977 list->parent = (xmlNodePtr) ent;
4978 } else {
4979 xmlFreeNodeList(list);
4980 }
4981 } else if (list != NULL) {
4982 xmlFreeNodeList(list);
4983 }
4984 } else {
4985 /*
4986 * 4.3.2: An internal general parsed entity is well-formed
4987 * if its replacement text matches the production labeled
4988 * content.
4989 */
4990 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4991 ctxt->depth++;
4992 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
4993 ctxt->sax, NULL, ctxt->depth,
4994 value, &list);
4995 ctxt->depth--;
4996 } else if (ent->etype ==
4997 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4998 ctxt->depth++;
Daniel Veillard257d9102001-05-08 10:41:44 +00004999 ret = xmlParseExternalEntityPrivate(ctxt->myDoc,
Owen Taylor3473f882001-02-23 17:55:21 +00005000 ctxt->sax, NULL, ctxt->depth,
Daniel Veillard257d9102001-05-08 10:41:44 +00005001 ent->URI, ent->ExternalID, &list,
5002 ctxt->_private);
Owen Taylor3473f882001-02-23 17:55:21 +00005003 ctxt->depth--;
5004 } else {
5005 ret = -1;
5006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5007 ctxt->sax->error(ctxt->userData,
5008 "Internal: invalid entity type\n");
5009 }
5010 if (ret == XML_ERR_ENTITY_LOOP) {
5011 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5013 ctxt->sax->error(ctxt->userData,
5014 "Detected entity reference loop\n");
5015 ctxt->wellFormed = 0;
5016 ctxt->disableSAX = 1;
5017 } else if ((ret == 0) && (list != NULL)) {
5018 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5019 (ent->children == NULL)) {
5020 ent->children = list;
5021 while (list != NULL) {
5022 list->parent = (xmlNodePtr) ent;
5023 if (list->next == NULL)
5024 ent->last = list;
5025 list = list->next;
5026 }
5027 } else {
5028 xmlFreeNodeList(list);
5029 }
5030 } else if (ret > 0) {
5031 ctxt->errNo = ret;
5032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5033 ctxt->sax->error(ctxt->userData,
5034 "Entity value required\n");
5035 ctxt->wellFormed = 0;
5036 ctxt->disableSAX = 1;
5037 } else if (list != NULL) {
5038 xmlFreeNodeList(list);
5039 }
5040 }
5041 }
5042 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5043 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5044 /*
5045 * Create a node.
5046 */
5047 ctxt->sax->reference(ctxt->userData, ent->name);
5048 return;
5049 } else if (ctxt->replaceEntities) {
5050 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5051 /*
5052 * Seems we are generating the DOM content, do
5053 * a simple tree copy
5054 */
5055 xmlNodePtr new;
5056 new = xmlCopyNodeList(ent->children);
5057
5058 xmlAddChildList(ctxt->node, new);
5059 /*
5060 * This is to avoid a nasty side effect, see
5061 * characters() in SAX.c
5062 */
5063 ctxt->nodemem = 0;
5064 ctxt->nodelen = 0;
5065 return;
5066 } else {
5067 /*
5068 * Probably running in SAX mode
5069 */
5070 xmlParserInputPtr input;
5071
5072 input = xmlNewEntityInputStream(ctxt, ent);
5073 xmlPushInput(ctxt, input);
5074 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5075 (RAW == '<') && (NXT(1) == '?') &&
5076 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5077 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5078 xmlParseTextDecl(ctxt);
5079 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5080 /*
5081 * The XML REC instructs us to stop parsing right here
5082 */
5083 ctxt->instate = XML_PARSER_EOF;
5084 return;
5085 }
5086 if (input->standalone == 1) {
5087 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5088 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5089 ctxt->sax->error(ctxt->userData,
5090 "external parsed entities cannot be standalone\n");
5091 ctxt->wellFormed = 0;
5092 ctxt->disableSAX = 1;
5093 }
5094 }
5095 return;
5096 }
5097 }
5098 } else {
5099 val = ent->content;
5100 if (val == NULL) return;
5101 /*
5102 * inline the entity.
5103 */
5104 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5105 (!ctxt->disableSAX))
5106 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5107 }
5108 }
5109}
5110
5111/**
5112 * xmlParseEntityRef:
5113 * @ctxt: an XML parser context
5114 *
5115 * parse ENTITY references declarations
5116 *
5117 * [68] EntityRef ::= '&' Name ';'
5118 *
5119 * [ WFC: Entity Declared ]
5120 * In a document without any DTD, a document with only an internal DTD
5121 * subset which contains no parameter entity references, or a document
5122 * with "standalone='yes'", the Name given in the entity reference
5123 * must match that in an entity declaration, except that well-formed
5124 * documents need not declare any of the following entities: amp, lt,
5125 * gt, apos, quot. The declaration of a parameter entity must precede
5126 * any reference to it. Similarly, the declaration of a general entity
5127 * must precede any reference to it which appears in a default value in an
5128 * attribute-list declaration. Note that if entities are declared in the
5129 * external subset or in external parameter entities, a non-validating
5130 * processor is not obligated to read and process their declarations;
5131 * for such documents, the rule that an entity must be declared is a
5132 * well-formedness constraint only if standalone='yes'.
5133 *
5134 * [ WFC: Parsed Entity ]
5135 * An entity reference must not contain the name of an unparsed entity
5136 *
5137 * Returns the xmlEntityPtr if found, or NULL otherwise.
5138 */
5139xmlEntityPtr
5140xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5141 xmlChar *name;
5142 xmlEntityPtr ent = NULL;
5143
5144 GROW;
5145
5146 if (RAW == '&') {
5147 NEXT;
5148 name = xmlParseName(ctxt);
5149 if (name == NULL) {
5150 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5152 ctxt->sax->error(ctxt->userData,
5153 "xmlParseEntityRef: no name\n");
5154 ctxt->wellFormed = 0;
5155 ctxt->disableSAX = 1;
5156 } else {
5157 if (RAW == ';') {
5158 NEXT;
5159 /*
5160 * Ask first SAX for entity resolution, otherwise try the
5161 * predefined set.
5162 */
5163 if (ctxt->sax != NULL) {
5164 if (ctxt->sax->getEntity != NULL)
5165 ent = ctxt->sax->getEntity(ctxt->userData, name);
5166 if (ent == NULL)
5167 ent = xmlGetPredefinedEntity(name);
5168 }
5169 /*
5170 * [ WFC: Entity Declared ]
5171 * In a document without any DTD, a document with only an
5172 * internal DTD subset which contains no parameter entity
5173 * references, or a document with "standalone='yes'", the
5174 * Name given in the entity reference must match that in an
5175 * entity declaration, except that well-formed documents
5176 * need not declare any of the following entities: amp, lt,
5177 * gt, apos, quot.
5178 * The declaration of a parameter entity must precede any
5179 * reference to it.
5180 * Similarly, the declaration of a general entity must
5181 * precede any reference to it which appears in a default
5182 * value in an attribute-list declaration. Note that if
5183 * entities are declared in the external subset or in
5184 * external parameter entities, a non-validating processor
5185 * is not obligated to read and process their declarations;
5186 * for such documents, the rule that an entity must be
5187 * declared is a well-formedness constraint only if
5188 * standalone='yes'.
5189 */
5190 if (ent == NULL) {
5191 if ((ctxt->standalone == 1) ||
5192 ((ctxt->hasExternalSubset == 0) &&
5193 (ctxt->hasPErefs == 0))) {
5194 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5196 ctxt->sax->error(ctxt->userData,
5197 "Entity '%s' not defined\n", name);
5198 ctxt->wellFormed = 0;
5199 ctxt->disableSAX = 1;
5200 } else {
5201 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5202 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5203 ctxt->sax->warning(ctxt->userData,
5204 "Entity '%s' not defined\n", name);
5205 }
5206 }
5207
5208 /*
5209 * [ WFC: Parsed Entity ]
5210 * An entity reference must not contain the name of an
5211 * unparsed entity
5212 */
5213 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5214 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5216 ctxt->sax->error(ctxt->userData,
5217 "Entity reference to unparsed entity %s\n", name);
5218 ctxt->wellFormed = 0;
5219 ctxt->disableSAX = 1;
5220 }
5221
5222 /*
5223 * [ WFC: No External Entity References ]
5224 * Attribute values cannot contain direct or indirect
5225 * entity references to external entities.
5226 */
5227 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5228 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5229 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5231 ctxt->sax->error(ctxt->userData,
5232 "Attribute references external entity '%s'\n", name);
5233 ctxt->wellFormed = 0;
5234 ctxt->disableSAX = 1;
5235 }
5236 /*
5237 * [ WFC: No < in Attribute Values ]
5238 * The replacement text of any entity referred to directly or
5239 * indirectly in an attribute value (other than "&lt;") must
5240 * not contain a <.
5241 */
5242 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5243 (ent != NULL) &&
5244 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5245 (ent->content != NULL) &&
5246 (xmlStrchr(ent->content, '<'))) {
5247 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5249 ctxt->sax->error(ctxt->userData,
5250 "'<' in entity '%s' is not allowed in attributes values\n", name);
5251 ctxt->wellFormed = 0;
5252 ctxt->disableSAX = 1;
5253 }
5254
5255 /*
5256 * Internal check, no parameter entities here ...
5257 */
5258 else {
5259 switch (ent->etype) {
5260 case XML_INTERNAL_PARAMETER_ENTITY:
5261 case XML_EXTERNAL_PARAMETER_ENTITY:
5262 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5263 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5264 ctxt->sax->error(ctxt->userData,
5265 "Attempt to reference the parameter entity '%s'\n", name);
5266 ctxt->wellFormed = 0;
5267 ctxt->disableSAX = 1;
5268 break;
5269 default:
5270 break;
5271 }
5272 }
5273
5274 /*
5275 * [ WFC: No Recursion ]
5276 * A parsed entity must not contain a recursive reference
5277 * to itself, either directly or indirectly.
5278 * Done somewhere else
5279 */
5280
5281 } else {
5282 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5284 ctxt->sax->error(ctxt->userData,
5285 "xmlParseEntityRef: expecting ';'\n");
5286 ctxt->wellFormed = 0;
5287 ctxt->disableSAX = 1;
5288 }
5289 xmlFree(name);
5290 }
5291 }
5292 return(ent);
5293}
5294
5295/**
5296 * xmlParseStringEntityRef:
5297 * @ctxt: an XML parser context
5298 * @str: a pointer to an index in the string
5299 *
5300 * parse ENTITY references declarations, but this version parses it from
5301 * a string value.
5302 *
5303 * [68] EntityRef ::= '&' Name ';'
5304 *
5305 * [ WFC: Entity Declared ]
5306 * In a document without any DTD, a document with only an internal DTD
5307 * subset which contains no parameter entity references, or a document
5308 * with "standalone='yes'", the Name given in the entity reference
5309 * must match that in an entity declaration, except that well-formed
5310 * documents need not declare any of the following entities: amp, lt,
5311 * gt, apos, quot. The declaration of a parameter entity must precede
5312 * any reference to it. Similarly, the declaration of a general entity
5313 * must precede any reference to it which appears in a default value in an
5314 * attribute-list declaration. Note that if entities are declared in the
5315 * external subset or in external parameter entities, a non-validating
5316 * processor is not obligated to read and process their declarations;
5317 * for such documents, the rule that an entity must be declared is a
5318 * well-formedness constraint only if standalone='yes'.
5319 *
5320 * [ WFC: Parsed Entity ]
5321 * An entity reference must not contain the name of an unparsed entity
5322 *
5323 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5324 * is updated to the current location in the string.
5325 */
5326xmlEntityPtr
5327xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5328 xmlChar *name;
5329 const xmlChar *ptr;
5330 xmlChar cur;
5331 xmlEntityPtr ent = NULL;
5332
5333 if ((str == NULL) || (*str == NULL))
5334 return(NULL);
5335 ptr = *str;
5336 cur = *ptr;
5337 if (cur == '&') {
5338 ptr++;
5339 cur = *ptr;
5340 name = xmlParseStringName(ctxt, &ptr);
5341 if (name == NULL) {
5342 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5344 ctxt->sax->error(ctxt->userData,
5345 "xmlParseEntityRef: no name\n");
5346 ctxt->wellFormed = 0;
5347 ctxt->disableSAX = 1;
5348 } else {
5349 if (*ptr == ';') {
5350 ptr++;
5351 /*
5352 * Ask first SAX for entity resolution, otherwise try the
5353 * predefined set.
5354 */
5355 if (ctxt->sax != NULL) {
5356 if (ctxt->sax->getEntity != NULL)
5357 ent = ctxt->sax->getEntity(ctxt->userData, name);
5358 if (ent == NULL)
5359 ent = xmlGetPredefinedEntity(name);
5360 }
5361 /*
5362 * [ WFC: Entity Declared ]
5363 * In a document without any DTD, a document with only an
5364 * internal DTD subset which contains no parameter entity
5365 * references, or a document with "standalone='yes'", the
5366 * Name given in the entity reference must match that in an
5367 * entity declaration, except that well-formed documents
5368 * need not declare any of the following entities: amp, lt,
5369 * gt, apos, quot.
5370 * The declaration of a parameter entity must precede any
5371 * reference to it.
5372 * Similarly, the declaration of a general entity must
5373 * precede any reference to it which appears in a default
5374 * value in an attribute-list declaration. Note that if
5375 * entities are declared in the external subset or in
5376 * external parameter entities, a non-validating processor
5377 * is not obligated to read and process their declarations;
5378 * for such documents, the rule that an entity must be
5379 * declared is a well-formedness constraint only if
5380 * standalone='yes'.
5381 */
5382 if (ent == NULL) {
5383 if ((ctxt->standalone == 1) ||
5384 ((ctxt->hasExternalSubset == 0) &&
5385 (ctxt->hasPErefs == 0))) {
5386 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5388 ctxt->sax->error(ctxt->userData,
5389 "Entity '%s' not defined\n", name);
5390 ctxt->wellFormed = 0;
5391 ctxt->disableSAX = 1;
5392 } else {
5393 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5394 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5395 ctxt->sax->warning(ctxt->userData,
5396 "Entity '%s' not defined\n", name);
5397 }
5398 }
5399
5400 /*
5401 * [ WFC: Parsed Entity ]
5402 * An entity reference must not contain the name of an
5403 * unparsed entity
5404 */
5405 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5406 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5408 ctxt->sax->error(ctxt->userData,
5409 "Entity reference to unparsed entity %s\n", name);
5410 ctxt->wellFormed = 0;
5411 ctxt->disableSAX = 1;
5412 }
5413
5414 /*
5415 * [ WFC: No External Entity References ]
5416 * Attribute values cannot contain direct or indirect
5417 * entity references to external entities.
5418 */
5419 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5420 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5421 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5423 ctxt->sax->error(ctxt->userData,
5424 "Attribute references external entity '%s'\n", name);
5425 ctxt->wellFormed = 0;
5426 ctxt->disableSAX = 1;
5427 }
5428 /*
5429 * [ WFC: No < in Attribute Values ]
5430 * The replacement text of any entity referred to directly or
5431 * indirectly in an attribute value (other than "&lt;") must
5432 * not contain a <.
5433 */
5434 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5435 (ent != NULL) &&
5436 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5437 (ent->content != NULL) &&
5438 (xmlStrchr(ent->content, '<'))) {
5439 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5440 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5441 ctxt->sax->error(ctxt->userData,
5442 "'<' in entity '%s' is not allowed in attributes values\n", name);
5443 ctxt->wellFormed = 0;
5444 ctxt->disableSAX = 1;
5445 }
5446
5447 /*
5448 * Internal check, no parameter entities here ...
5449 */
5450 else {
5451 switch (ent->etype) {
5452 case XML_INTERNAL_PARAMETER_ENTITY:
5453 case XML_EXTERNAL_PARAMETER_ENTITY:
5454 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5456 ctxt->sax->error(ctxt->userData,
5457 "Attempt to reference the parameter entity '%s'\n", name);
5458 ctxt->wellFormed = 0;
5459 ctxt->disableSAX = 1;
5460 break;
5461 default:
5462 break;
5463 }
5464 }
5465
5466 /*
5467 * [ WFC: No Recursion ]
5468 * A parsed entity must not contain a recursive reference
5469 * to itself, either directly or indirectly.
5470 * Done somewhwere else
5471 */
5472
5473 } else {
5474 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5476 ctxt->sax->error(ctxt->userData,
5477 "xmlParseEntityRef: expecting ';'\n");
5478 ctxt->wellFormed = 0;
5479 ctxt->disableSAX = 1;
5480 }
5481 xmlFree(name);
5482 }
5483 }
5484 *str = ptr;
5485 return(ent);
5486}
5487
5488/**
5489 * xmlParsePEReference:
5490 * @ctxt: an XML parser context
5491 *
5492 * parse PEReference declarations
5493 * The entity content is handled directly by pushing it's content as
5494 * a new input stream.
5495 *
5496 * [69] PEReference ::= '%' Name ';'
5497 *
5498 * [ WFC: No Recursion ]
5499 * A parsed entity must not contain a recursive
5500 * reference to itself, either directly or indirectly.
5501 *
5502 * [ WFC: Entity Declared ]
5503 * In a document without any DTD, a document with only an internal DTD
5504 * subset which contains no parameter entity references, or a document
5505 * with "standalone='yes'", ... ... The declaration of a parameter
5506 * entity must precede any reference to it...
5507 *
5508 * [ VC: Entity Declared ]
5509 * In a document with an external subset or external parameter entities
5510 * with "standalone='no'", ... ... The declaration of a parameter entity
5511 * must precede any reference to it...
5512 *
5513 * [ WFC: In DTD ]
5514 * Parameter-entity references may only appear in the DTD.
5515 * NOTE: misleading but this is handled.
5516 */
5517void
5518xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5519 xmlChar *name;
5520 xmlEntityPtr entity = NULL;
5521 xmlParserInputPtr input;
5522
5523 if (RAW == '%') {
5524 NEXT;
Daniel Veillard29631a82001-03-05 09:49:20 +00005525 name = xmlParseNameComplex(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005526 if (name == NULL) {
5527 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5528 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5529 ctxt->sax->error(ctxt->userData,
5530 "xmlParsePEReference: no name\n");
5531 ctxt->wellFormed = 0;
5532 ctxt->disableSAX = 1;
5533 } else {
5534 if (RAW == ';') {
5535 NEXT;
5536 if ((ctxt->sax != NULL) &&
5537 (ctxt->sax->getParameterEntity != NULL))
5538 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5539 name);
5540 if (entity == NULL) {
5541 /*
5542 * [ WFC: Entity Declared ]
5543 * In a document without any DTD, a document with only an
5544 * internal DTD subset which contains no parameter entity
5545 * references, or a document with "standalone='yes'", ...
5546 * ... The declaration of a parameter entity must precede
5547 * any reference to it...
5548 */
5549 if ((ctxt->standalone == 1) ||
5550 ((ctxt->hasExternalSubset == 0) &&
5551 (ctxt->hasPErefs == 0))) {
5552 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5553 if ((!ctxt->disableSAX) &&
5554 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5555 ctxt->sax->error(ctxt->userData,
5556 "PEReference: %%%s; not found\n", name);
5557 ctxt->wellFormed = 0;
5558 ctxt->disableSAX = 1;
5559 } else {
5560 /*
5561 * [ VC: Entity Declared ]
5562 * In a document with an external subset or external
5563 * parameter entities with "standalone='no'", ...
5564 * ... The declaration of a parameter entity must precede
5565 * any reference to it...
5566 */
5567 if ((!ctxt->disableSAX) &&
5568 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5569 ctxt->sax->warning(ctxt->userData,
5570 "PEReference: %%%s; not found\n", name);
5571 ctxt->valid = 0;
5572 }
5573 } else {
5574 /*
5575 * Internal checking in case the entity quest barfed
5576 */
5577 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5578 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5579 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5580 ctxt->sax->warning(ctxt->userData,
5581 "Internal: %%%s; is not a parameter entity\n", name);
5582 } else {
5583 /*
5584 * TODO !!!
5585 * handle the extra spaces added before and after
5586 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5587 */
5588 input = xmlNewEntityInputStream(ctxt, entity);
5589 xmlPushInput(ctxt, input);
5590 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5591 (RAW == '<') && (NXT(1) == '?') &&
5592 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5593 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5594 xmlParseTextDecl(ctxt);
5595 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5596 /*
5597 * The XML REC instructs us to stop parsing
5598 * right here
5599 */
5600 ctxt->instate = XML_PARSER_EOF;
5601 xmlFree(name);
5602 return;
5603 }
5604 }
5605 if (ctxt->token == 0)
5606 ctxt->token = ' ';
5607 }
5608 }
5609 ctxt->hasPErefs = 1;
5610 } else {
5611 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5613 ctxt->sax->error(ctxt->userData,
5614 "xmlParsePEReference: expecting ';'\n");
5615 ctxt->wellFormed = 0;
5616 ctxt->disableSAX = 1;
5617 }
5618 xmlFree(name);
5619 }
5620 }
5621}
5622
5623/**
5624 * xmlParseStringPEReference:
5625 * @ctxt: an XML parser context
5626 * @str: a pointer to an index in the string
5627 *
5628 * parse PEReference declarations
5629 *
5630 * [69] PEReference ::= '%' Name ';'
5631 *
5632 * [ WFC: No Recursion ]
5633 * A parsed entity must not contain a recursive
5634 * reference to itself, either directly or indirectly.
5635 *
5636 * [ WFC: Entity Declared ]
5637 * In a document without any DTD, a document with only an internal DTD
5638 * subset which contains no parameter entity references, or a document
5639 * with "standalone='yes'", ... ... The declaration of a parameter
5640 * entity must precede any reference to it...
5641 *
5642 * [ VC: Entity Declared ]
5643 * In a document with an external subset or external parameter entities
5644 * with "standalone='no'", ... ... The declaration of a parameter entity
5645 * must precede any reference to it...
5646 *
5647 * [ WFC: In DTD ]
5648 * Parameter-entity references may only appear in the DTD.
5649 * NOTE: misleading but this is handled.
5650 *
5651 * Returns the string of the entity content.
5652 * str is updated to the current value of the index
5653 */
5654xmlEntityPtr
5655xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5656 const xmlChar *ptr;
5657 xmlChar cur;
5658 xmlChar *name;
5659 xmlEntityPtr entity = NULL;
5660
5661 if ((str == NULL) || (*str == NULL)) return(NULL);
5662 ptr = *str;
5663 cur = *ptr;
5664 if (cur == '%') {
5665 ptr++;
5666 cur = *ptr;
5667 name = xmlParseStringName(ctxt, &ptr);
5668 if (name == NULL) {
5669 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5671 ctxt->sax->error(ctxt->userData,
5672 "xmlParseStringPEReference: no name\n");
5673 ctxt->wellFormed = 0;
5674 ctxt->disableSAX = 1;
5675 } else {
5676 cur = *ptr;
5677 if (cur == ';') {
5678 ptr++;
5679 cur = *ptr;
5680 if ((ctxt->sax != NULL) &&
5681 (ctxt->sax->getParameterEntity != NULL))
5682 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5683 name);
5684 if (entity == NULL) {
5685 /*
5686 * [ WFC: Entity Declared ]
5687 * In a document without any DTD, a document with only an
5688 * internal DTD subset which contains no parameter entity
5689 * references, or a document with "standalone='yes'", ...
5690 * ... The declaration of a parameter entity must precede
5691 * any reference to it...
5692 */
5693 if ((ctxt->standalone == 1) ||
5694 ((ctxt->hasExternalSubset == 0) &&
5695 (ctxt->hasPErefs == 0))) {
5696 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5698 ctxt->sax->error(ctxt->userData,
5699 "PEReference: %%%s; not found\n", name);
5700 ctxt->wellFormed = 0;
5701 ctxt->disableSAX = 1;
5702 } else {
5703 /*
5704 * [ VC: Entity Declared ]
5705 * In a document with an external subset or external
5706 * parameter entities with "standalone='no'", ...
5707 * ... The declaration of a parameter entity must
5708 * precede any reference to it...
5709 */
5710 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5711 ctxt->sax->warning(ctxt->userData,
5712 "PEReference: %%%s; not found\n", name);
5713 ctxt->valid = 0;
5714 }
5715 } else {
5716 /*
5717 * Internal checking in case the entity quest barfed
5718 */
5719 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5720 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5721 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5722 ctxt->sax->warning(ctxt->userData,
5723 "Internal: %%%s; is not a parameter entity\n", name);
5724 }
5725 }
5726 ctxt->hasPErefs = 1;
5727 } else {
5728 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5730 ctxt->sax->error(ctxt->userData,
5731 "xmlParseStringPEReference: expecting ';'\n");
5732 ctxt->wellFormed = 0;
5733 ctxt->disableSAX = 1;
5734 }
5735 xmlFree(name);
5736 }
5737 }
5738 *str = ptr;
5739 return(entity);
5740}
5741
5742/**
5743 * xmlParseDocTypeDecl:
5744 * @ctxt: an XML parser context
5745 *
5746 * parse a DOCTYPE declaration
5747 *
5748 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5749 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5750 *
5751 * [ VC: Root Element Type ]
5752 * The Name in the document type declaration must match the element
5753 * type of the root element.
5754 */
5755
5756void
5757xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5758 xmlChar *name = NULL;
5759 xmlChar *ExternalID = NULL;
5760 xmlChar *URI = NULL;
5761
5762 /*
5763 * We know that '<!DOCTYPE' has been detected.
5764 */
5765 SKIP(9);
5766
5767 SKIP_BLANKS;
5768
5769 /*
5770 * Parse the DOCTYPE name.
5771 */
5772 name = xmlParseName(ctxt);
5773 if (name == NULL) {
5774 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5776 ctxt->sax->error(ctxt->userData,
5777 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5778 ctxt->wellFormed = 0;
5779 ctxt->disableSAX = 1;
5780 }
5781 ctxt->intSubName = name;
5782
5783 SKIP_BLANKS;
5784
5785 /*
5786 * Check for SystemID and ExternalID
5787 */
5788 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5789
5790 if ((URI != NULL) || (ExternalID != NULL)) {
5791 ctxt->hasExternalSubset = 1;
5792 }
5793 ctxt->extSubURI = URI;
5794 ctxt->extSubSystem = ExternalID;
5795
5796 SKIP_BLANKS;
5797
5798 /*
5799 * Create and update the internal subset.
5800 */
5801 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5802 (!ctxt->disableSAX))
5803 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5804
5805 /*
5806 * Is there any internal subset declarations ?
5807 * they are handled separately in xmlParseInternalSubset()
5808 */
5809 if (RAW == '[')
5810 return;
5811
5812 /*
5813 * We should be at the end of the DOCTYPE declaration.
5814 */
5815 if (RAW != '>') {
5816 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5817 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5818 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5819 ctxt->wellFormed = 0;
5820 ctxt->disableSAX = 1;
5821 }
5822 NEXT;
5823}
5824
5825/**
5826 * xmlParseInternalsubset:
5827 * @ctxt: an XML parser context
5828 *
5829 * parse the internal subset declaration
5830 *
5831 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5832 */
5833
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005834static void
Owen Taylor3473f882001-02-23 17:55:21 +00005835xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5836 /*
5837 * Is there any DTD definition ?
5838 */
5839 if (RAW == '[') {
5840 ctxt->instate = XML_PARSER_DTD;
5841 NEXT;
5842 /*
5843 * Parse the succession of Markup declarations and
5844 * PEReferences.
5845 * Subsequence (markupdecl | PEReference | S)*
5846 */
5847 while (RAW != ']') {
5848 const xmlChar *check = CUR_PTR;
5849 int cons = ctxt->input->consumed;
5850
5851 SKIP_BLANKS;
5852 xmlParseMarkupDecl(ctxt);
5853 xmlParsePEReference(ctxt);
5854
5855 /*
5856 * Pop-up of finished entities.
5857 */
5858 while ((RAW == 0) && (ctxt->inputNr > 1))
5859 xmlPopInput(ctxt);
5860
5861 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5862 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5864 ctxt->sax->error(ctxt->userData,
5865 "xmlParseInternalSubset: error detected in Markup declaration\n");
5866 ctxt->wellFormed = 0;
5867 ctxt->disableSAX = 1;
5868 break;
5869 }
5870 }
5871 if (RAW == ']') {
5872 NEXT;
5873 SKIP_BLANKS;
5874 }
5875 }
5876
5877 /*
5878 * We should be at the end of the DOCTYPE declaration.
5879 */
5880 if (RAW != '>') {
5881 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5883 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5884 ctxt->wellFormed = 0;
5885 ctxt->disableSAX = 1;
5886 }
5887 NEXT;
5888}
5889
5890/**
5891 * xmlParseAttribute:
5892 * @ctxt: an XML parser context
5893 * @value: a xmlChar ** used to store the value of the attribute
5894 *
5895 * parse an attribute
5896 *
5897 * [41] Attribute ::= Name Eq AttValue
5898 *
5899 * [ WFC: No External Entity References ]
5900 * Attribute values cannot contain direct or indirect entity references
5901 * to external entities.
5902 *
5903 * [ WFC: No < in Attribute Values ]
5904 * The replacement text of any entity referred to directly or indirectly in
5905 * an attribute value (other than "&lt;") must not contain a <.
5906 *
5907 * [ VC: Attribute Value Type ]
5908 * The attribute must have been declared; the value must be of the type
5909 * declared for it.
5910 *
5911 * [25] Eq ::= S? '=' S?
5912 *
5913 * With namespace:
5914 *
5915 * [NS 11] Attribute ::= QName Eq AttValue
5916 *
5917 * Also the case QName == xmlns:??? is handled independently as a namespace
5918 * definition.
5919 *
5920 * Returns the attribute name, and the value in *value.
5921 */
5922
5923xmlChar *
5924xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5925 xmlChar *name, *val;
5926
5927 *value = NULL;
5928 name = xmlParseName(ctxt);
5929 if (name == NULL) {
5930 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5932 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5933 ctxt->wellFormed = 0;
5934 ctxt->disableSAX = 1;
5935 return(NULL);
5936 }
5937
5938 /*
5939 * read the value
5940 */
5941 SKIP_BLANKS;
5942 if (RAW == '=') {
5943 NEXT;
5944 SKIP_BLANKS;
5945 val = xmlParseAttValue(ctxt);
5946 ctxt->instate = XML_PARSER_CONTENT;
5947 } else {
5948 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5950 ctxt->sax->error(ctxt->userData,
5951 "Specification mandate value for attribute %s\n", name);
5952 ctxt->wellFormed = 0;
5953 ctxt->disableSAX = 1;
5954 xmlFree(name);
5955 return(NULL);
5956 }
5957
5958 /*
5959 * Check that xml:lang conforms to the specification
5960 * No more registered as an error, just generate a warning now
5961 * since this was deprecated in XML second edition
5962 */
5963 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5964 if (!xmlCheckLanguageID(val)) {
5965 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5966 ctxt->sax->warning(ctxt->userData,
5967 "Malformed value for xml:lang : %s\n", val);
5968 }
5969 }
5970
5971 /*
5972 * Check that xml:space conforms to the specification
5973 */
5974 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5975 if (xmlStrEqual(val, BAD_CAST "default"))
5976 *(ctxt->space) = 0;
5977 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5978 *(ctxt->space) = 1;
5979 else {
5980 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5981 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5982 ctxt->sax->error(ctxt->userData,
5983"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5984 val);
5985 ctxt->wellFormed = 0;
5986 ctxt->disableSAX = 1;
5987 }
5988 }
5989
5990 *value = val;
5991 return(name);
5992}
5993
5994/**
5995 * xmlParseStartTag:
5996 * @ctxt: an XML parser context
5997 *
5998 * parse a start of tag either for rule element or
5999 * EmptyElement. In both case we don't parse the tag closing chars.
6000 *
6001 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6002 *
6003 * [ WFC: Unique Att Spec ]
6004 * No attribute name may appear more than once in the same start-tag or
6005 * empty-element tag.
6006 *
6007 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6008 *
6009 * [ WFC: Unique Att Spec ]
6010 * No attribute name may appear more than once in the same start-tag or
6011 * empty-element tag.
6012 *
6013 * With namespace:
6014 *
6015 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6016 *
6017 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6018 *
6019 * Returns the element name parsed
6020 */
6021
6022xmlChar *
6023xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6024 xmlChar *name;
6025 xmlChar *attname;
6026 xmlChar *attvalue;
6027 const xmlChar **atts = NULL;
6028 int nbatts = 0;
6029 int maxatts = 0;
6030 int i;
6031
6032 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006033 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006034
6035 name = xmlParseName(ctxt);
6036 if (name == NULL) {
6037 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6039 ctxt->sax->error(ctxt->userData,
6040 "xmlParseStartTag: invalid element name\n");
6041 ctxt->wellFormed = 0;
6042 ctxt->disableSAX = 1;
6043 return(NULL);
6044 }
6045
6046 /*
6047 * Now parse the attributes, it ends up with the ending
6048 *
6049 * (S Attribute)* S?
6050 */
6051 SKIP_BLANKS;
6052 GROW;
6053
Daniel Veillard21a0f912001-02-25 19:54:14 +00006054 while ((RAW != '>') &&
6055 ((RAW != '/') || (NXT(1) != '>')) &&
6056 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006057 const xmlChar *q = CUR_PTR;
6058 int cons = ctxt->input->consumed;
6059
6060 attname = xmlParseAttribute(ctxt, &attvalue);
6061 if ((attname != NULL) && (attvalue != NULL)) {
6062 /*
6063 * [ WFC: Unique Att Spec ]
6064 * No attribute name may appear more than once in the same
6065 * start-tag or empty-element tag.
6066 */
6067 for (i = 0; i < nbatts;i += 2) {
6068 if (xmlStrEqual(atts[i], attname)) {
6069 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6070 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6071 ctxt->sax->error(ctxt->userData,
6072 "Attribute %s redefined\n",
6073 attname);
6074 ctxt->wellFormed = 0;
6075 ctxt->disableSAX = 1;
6076 xmlFree(attname);
6077 xmlFree(attvalue);
6078 goto failed;
6079 }
6080 }
6081
6082 /*
6083 * Add the pair to atts
6084 */
6085 if (atts == NULL) {
6086 maxatts = 10;
6087 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6088 if (atts == NULL) {
6089 xmlGenericError(xmlGenericErrorContext,
6090 "malloc of %ld byte failed\n",
6091 maxatts * (long)sizeof(xmlChar *));
6092 return(NULL);
6093 }
6094 } else if (nbatts + 4 > maxatts) {
6095 maxatts *= 2;
6096 atts = (const xmlChar **) xmlRealloc((void *) atts,
6097 maxatts * sizeof(xmlChar *));
6098 if (atts == NULL) {
6099 xmlGenericError(xmlGenericErrorContext,
6100 "realloc of %ld byte failed\n",
6101 maxatts * (long)sizeof(xmlChar *));
6102 return(NULL);
6103 }
6104 }
6105 atts[nbatts++] = attname;
6106 atts[nbatts++] = attvalue;
6107 atts[nbatts] = NULL;
6108 atts[nbatts + 1] = NULL;
6109 } else {
6110 if (attname != NULL)
6111 xmlFree(attname);
6112 if (attvalue != NULL)
6113 xmlFree(attvalue);
6114 }
6115
6116failed:
6117
6118 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6119 break;
6120 if (!IS_BLANK(RAW)) {
6121 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6123 ctxt->sax->error(ctxt->userData,
6124 "attributes construct error\n");
6125 ctxt->wellFormed = 0;
6126 ctxt->disableSAX = 1;
6127 }
6128 SKIP_BLANKS;
6129 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6130 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6132 ctxt->sax->error(ctxt->userData,
6133 "xmlParseStartTag: problem parsing attributes\n");
6134 ctxt->wellFormed = 0;
6135 ctxt->disableSAX = 1;
6136 break;
6137 }
6138 GROW;
6139 }
6140
6141 /*
6142 * SAX: Start of Element !
6143 */
6144 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6145 (!ctxt->disableSAX))
6146 ctxt->sax->startElement(ctxt->userData, name, atts);
6147
6148 if (atts != NULL) {
6149 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6150 xmlFree((void *) atts);
6151 }
6152 return(name);
6153}
6154
6155/**
6156 * xmlParseEndTag:
6157 * @ctxt: an XML parser context
6158 *
6159 * parse an end of tag
6160 *
6161 * [42] ETag ::= '</' Name S? '>'
6162 *
6163 * With namespace
6164 *
6165 * [NS 9] ETag ::= '</' QName S? '>'
6166 */
6167
6168void
6169xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6170 xmlChar *name;
6171 xmlChar *oldname;
6172
6173 GROW;
6174 if ((RAW != '<') || (NXT(1) != '/')) {
6175 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6177 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6178 ctxt->wellFormed = 0;
6179 ctxt->disableSAX = 1;
6180 return;
6181 }
6182 SKIP(2);
6183
6184 name = xmlParseName(ctxt);
6185
6186 /*
6187 * We should definitely be at the ending "S? '>'" part
6188 */
6189 GROW;
6190 SKIP_BLANKS;
6191 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6192 ctxt->errNo = XML_ERR_GT_REQUIRED;
6193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6194 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6195 ctxt->wellFormed = 0;
6196 ctxt->disableSAX = 1;
6197 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006198 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006199
6200 /*
6201 * [ WFC: Element Type Match ]
6202 * The Name in an element's end-tag must match the element type in the
6203 * start-tag.
6204 *
6205 */
6206 if ((name == NULL) || (ctxt->name == NULL) ||
6207 (!xmlStrEqual(name, ctxt->name))) {
6208 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6209 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6210 if ((name != NULL) && (ctxt->name != NULL)) {
6211 ctxt->sax->error(ctxt->userData,
6212 "Opening and ending tag mismatch: %s and %s\n",
6213 ctxt->name, name);
6214 } else if (ctxt->name != NULL) {
6215 ctxt->sax->error(ctxt->userData,
6216 "Ending tag eror for: %s\n", ctxt->name);
6217 } else {
6218 ctxt->sax->error(ctxt->userData,
6219 "Ending tag error: internal error ???\n");
6220 }
6221
6222 }
6223 ctxt->wellFormed = 0;
6224 ctxt->disableSAX = 1;
6225 }
6226
6227 /*
6228 * SAX: End of Tag
6229 */
6230 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6231 (!ctxt->disableSAX))
6232 ctxt->sax->endElement(ctxt->userData, name);
6233
6234 if (name != NULL)
6235 xmlFree(name);
6236 oldname = namePop(ctxt);
6237 spacePop(ctxt);
6238 if (oldname != NULL) {
6239#ifdef DEBUG_STACK
6240 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6241#endif
6242 xmlFree(oldname);
6243 }
6244 return;
6245}
6246
6247/**
6248 * xmlParseCDSect:
6249 * @ctxt: an XML parser context
6250 *
6251 * Parse escaped pure raw content.
6252 *
6253 * [18] CDSect ::= CDStart CData CDEnd
6254 *
6255 * [19] CDStart ::= '<![CDATA['
6256 *
6257 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6258 *
6259 * [21] CDEnd ::= ']]>'
6260 */
6261void
6262xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6263 xmlChar *buf = NULL;
6264 int len = 0;
6265 int size = XML_PARSER_BUFFER_SIZE;
6266 int r, rl;
6267 int s, sl;
6268 int cur, l;
6269 int count = 0;
6270
6271 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6272 (NXT(2) == '[') && (NXT(3) == 'C') &&
6273 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6274 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6275 (NXT(8) == '[')) {
6276 SKIP(9);
6277 } else
6278 return;
6279
6280 ctxt->instate = XML_PARSER_CDATA_SECTION;
6281 r = CUR_CHAR(rl);
6282 if (!IS_CHAR(r)) {
6283 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6285 ctxt->sax->error(ctxt->userData,
6286 "CData section not finished\n");
6287 ctxt->wellFormed = 0;
6288 ctxt->disableSAX = 1;
6289 ctxt->instate = XML_PARSER_CONTENT;
6290 return;
6291 }
6292 NEXTL(rl);
6293 s = CUR_CHAR(sl);
6294 if (!IS_CHAR(s)) {
6295 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6296 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6297 ctxt->sax->error(ctxt->userData,
6298 "CData section not finished\n");
6299 ctxt->wellFormed = 0;
6300 ctxt->disableSAX = 1;
6301 ctxt->instate = XML_PARSER_CONTENT;
6302 return;
6303 }
6304 NEXTL(sl);
6305 cur = CUR_CHAR(l);
6306 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6307 if (buf == NULL) {
6308 xmlGenericError(xmlGenericErrorContext,
6309 "malloc of %d byte failed\n", size);
6310 return;
6311 }
6312 while (IS_CHAR(cur) &&
6313 ((r != ']') || (s != ']') || (cur != '>'))) {
6314 if (len + 5 >= size) {
6315 size *= 2;
6316 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6317 if (buf == NULL) {
6318 xmlGenericError(xmlGenericErrorContext,
6319 "realloc of %d byte failed\n", size);
6320 return;
6321 }
6322 }
6323 COPY_BUF(rl,buf,len,r);
6324 r = s;
6325 rl = sl;
6326 s = cur;
6327 sl = l;
6328 count++;
6329 if (count > 50) {
6330 GROW;
6331 count = 0;
6332 }
6333 NEXTL(l);
6334 cur = CUR_CHAR(l);
6335 }
6336 buf[len] = 0;
6337 ctxt->instate = XML_PARSER_CONTENT;
6338 if (cur != '>') {
6339 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6341 ctxt->sax->error(ctxt->userData,
6342 "CData section not finished\n%.50s\n", buf);
6343 ctxt->wellFormed = 0;
6344 ctxt->disableSAX = 1;
6345 xmlFree(buf);
6346 return;
6347 }
6348 NEXTL(l);
6349
6350 /*
6351 * Ok the buffer is to be consumed as cdata.
6352 */
6353 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6354 if (ctxt->sax->cdataBlock != NULL)
6355 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6356 }
6357 xmlFree(buf);
6358}
6359
6360/**
6361 * xmlParseContent:
6362 * @ctxt: an XML parser context
6363 *
6364 * Parse a content:
6365 *
6366 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6367 */
6368
6369void
6370xmlParseContent(xmlParserCtxtPtr ctxt) {
6371 GROW;
6372 while (((RAW != 0) || (ctxt->token != 0)) &&
6373 ((RAW != '<') || (NXT(1) != '/'))) {
6374 const xmlChar *test = CUR_PTR;
6375 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006376 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006377 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006378
6379 /*
6380 * Handle possible processed charrefs.
6381 */
6382 if (ctxt->token != 0) {
6383 xmlParseCharData(ctxt, 0);
6384 }
6385 /*
6386 * First case : a Processing Instruction.
6387 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006388 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006389 xmlParsePI(ctxt);
6390 }
6391
6392 /*
6393 * Second case : a CDSection
6394 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006395 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006396 (NXT(2) == '[') && (NXT(3) == 'C') &&
6397 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6398 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6399 (NXT(8) == '[')) {
6400 xmlParseCDSect(ctxt);
6401 }
6402
6403 /*
6404 * Third case : a comment
6405 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006406 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006407 (NXT(2) == '-') && (NXT(3) == '-')) {
6408 xmlParseComment(ctxt);
6409 ctxt->instate = XML_PARSER_CONTENT;
6410 }
6411
6412 /*
6413 * Fourth case : a sub-element.
6414 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006415 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006416 xmlParseElement(ctxt);
6417 }
6418
6419 /*
6420 * Fifth case : a reference. If if has not been resolved,
6421 * parsing returns it's Name, create the node
6422 */
6423
Daniel Veillard21a0f912001-02-25 19:54:14 +00006424 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006425 xmlParseReference(ctxt);
6426 }
6427
6428 /*
6429 * Last case, text. Note that References are handled directly.
6430 */
6431 else {
6432 xmlParseCharData(ctxt, 0);
6433 }
6434
6435 GROW;
6436 /*
6437 * Pop-up of finished entities.
6438 */
6439 while ((RAW == 0) && (ctxt->inputNr > 1))
6440 xmlPopInput(ctxt);
6441 SHRINK;
6442
6443 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6444 (tok == ctxt->token)) {
6445 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6446 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6447 ctxt->sax->error(ctxt->userData,
6448 "detected an error in element content\n");
6449 ctxt->wellFormed = 0;
6450 ctxt->disableSAX = 1;
6451 ctxt->instate = XML_PARSER_EOF;
6452 break;
6453 }
6454 }
6455}
6456
6457/**
6458 * xmlParseElement:
6459 * @ctxt: an XML parser context
6460 *
6461 * parse an XML element, this is highly recursive
6462 *
6463 * [39] element ::= EmptyElemTag | STag content ETag
6464 *
6465 * [ WFC: Element Type Match ]
6466 * The Name in an element's end-tag must match the element type in the
6467 * start-tag.
6468 *
6469 * [ VC: Element Valid ]
6470 * An element is valid if there is a declaration matching elementdecl
6471 * where the Name matches the element type and one of the following holds:
6472 * - The declaration matches EMPTY and the element has no content.
6473 * - The declaration matches children and the sequence of child elements
6474 * belongs to the language generated by the regular expression in the
6475 * content model, with optional white space (characters matching the
6476 * nonterminal S) between each pair of child elements.
6477 * - The declaration matches Mixed and the content consists of character
6478 * data and child elements whose types match names in the content model.
6479 * - The declaration matches ANY, and the types of any child elements have
6480 * been declared.
6481 */
6482
6483void
6484xmlParseElement(xmlParserCtxtPtr ctxt) {
6485 const xmlChar *openTag = CUR_PTR;
6486 xmlChar *name;
6487 xmlChar *oldname;
6488 xmlParserNodeInfo node_info;
6489 xmlNodePtr ret;
6490
6491 /* Capture start position */
6492 if (ctxt->record_info) {
6493 node_info.begin_pos = ctxt->input->consumed +
6494 (CUR_PTR - ctxt->input->base);
6495 node_info.begin_line = ctxt->input->line;
6496 }
6497
6498 if (ctxt->spaceNr == 0)
6499 spacePush(ctxt, -1);
6500 else
6501 spacePush(ctxt, *ctxt->space);
6502
6503 name = xmlParseStartTag(ctxt);
6504 if (name == NULL) {
6505 spacePop(ctxt);
6506 return;
6507 }
6508 namePush(ctxt, name);
6509 ret = ctxt->node;
6510
6511 /*
6512 * [ VC: Root Element Type ]
6513 * The Name in the document type declaration must match the element
6514 * type of the root element.
6515 */
6516 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6517 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6518 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6519
6520 /*
6521 * Check for an Empty Element.
6522 */
6523 if ((RAW == '/') && (NXT(1) == '>')) {
6524 SKIP(2);
6525 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6526 (!ctxt->disableSAX))
6527 ctxt->sax->endElement(ctxt->userData, name);
6528 oldname = namePop(ctxt);
6529 spacePop(ctxt);
6530 if (oldname != NULL) {
6531#ifdef DEBUG_STACK
6532 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6533#endif
6534 xmlFree(oldname);
6535 }
6536 if ( ret != NULL && ctxt->record_info ) {
6537 node_info.end_pos = ctxt->input->consumed +
6538 (CUR_PTR - ctxt->input->base);
6539 node_info.end_line = ctxt->input->line;
6540 node_info.node = ret;
6541 xmlParserAddNodeInfo(ctxt, &node_info);
6542 }
6543 return;
6544 }
6545 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006546 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006547 } else {
6548 ctxt->errNo = XML_ERR_GT_REQUIRED;
6549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6550 ctxt->sax->error(ctxt->userData,
6551 "Couldn't find end of Start Tag\n%.30s\n",
6552 openTag);
6553 ctxt->wellFormed = 0;
6554 ctxt->disableSAX = 1;
6555
6556 /*
6557 * end of parsing of this node.
6558 */
6559 nodePop(ctxt);
6560 oldname = namePop(ctxt);
6561 spacePop(ctxt);
6562 if (oldname != NULL) {
6563#ifdef DEBUG_STACK
6564 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6565#endif
6566 xmlFree(oldname);
6567 }
6568
6569 /*
6570 * Capture end position and add node
6571 */
6572 if ( ret != NULL && ctxt->record_info ) {
6573 node_info.end_pos = ctxt->input->consumed +
6574 (CUR_PTR - ctxt->input->base);
6575 node_info.end_line = ctxt->input->line;
6576 node_info.node = ret;
6577 xmlParserAddNodeInfo(ctxt, &node_info);
6578 }
6579 return;
6580 }
6581
6582 /*
6583 * Parse the content of the element:
6584 */
6585 xmlParseContent(ctxt);
6586 if (!IS_CHAR(RAW)) {
6587 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6589 ctxt->sax->error(ctxt->userData,
6590 "Premature end of data in tag %.30s\n", openTag);
6591 ctxt->wellFormed = 0;
6592 ctxt->disableSAX = 1;
6593
6594 /*
6595 * end of parsing of this node.
6596 */
6597 nodePop(ctxt);
6598 oldname = namePop(ctxt);
6599 spacePop(ctxt);
6600 if (oldname != NULL) {
6601#ifdef DEBUG_STACK
6602 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6603#endif
6604 xmlFree(oldname);
6605 }
6606 return;
6607 }
6608
6609 /*
6610 * parse the end of tag: '</' should be here.
6611 */
6612 xmlParseEndTag(ctxt);
6613
6614 /*
6615 * Capture end position and add node
6616 */
6617 if ( ret != NULL && ctxt->record_info ) {
6618 node_info.end_pos = ctxt->input->consumed +
6619 (CUR_PTR - ctxt->input->base);
6620 node_info.end_line = ctxt->input->line;
6621 node_info.node = ret;
6622 xmlParserAddNodeInfo(ctxt, &node_info);
6623 }
6624}
6625
6626/**
6627 * xmlParseVersionNum:
6628 * @ctxt: an XML parser context
6629 *
6630 * parse the XML version value.
6631 *
6632 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6633 *
6634 * Returns the string giving the XML version number, or NULL
6635 */
6636xmlChar *
6637xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6638 xmlChar *buf = NULL;
6639 int len = 0;
6640 int size = 10;
6641 xmlChar cur;
6642
6643 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6644 if (buf == NULL) {
6645 xmlGenericError(xmlGenericErrorContext,
6646 "malloc of %d byte failed\n", size);
6647 return(NULL);
6648 }
6649 cur = CUR;
6650 while (((cur >= 'a') && (cur <= 'z')) ||
6651 ((cur >= 'A') && (cur <= 'Z')) ||
6652 ((cur >= '0') && (cur <= '9')) ||
6653 (cur == '_') || (cur == '.') ||
6654 (cur == ':') || (cur == '-')) {
6655 if (len + 1 >= size) {
6656 size *= 2;
6657 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6658 if (buf == NULL) {
6659 xmlGenericError(xmlGenericErrorContext,
6660 "realloc of %d byte failed\n", size);
6661 return(NULL);
6662 }
6663 }
6664 buf[len++] = cur;
6665 NEXT;
6666 cur=CUR;
6667 }
6668 buf[len] = 0;
6669 return(buf);
6670}
6671
6672/**
6673 * xmlParseVersionInfo:
6674 * @ctxt: an XML parser context
6675 *
6676 * parse the XML version.
6677 *
6678 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6679 *
6680 * [25] Eq ::= S? '=' S?
6681 *
6682 * Returns the version string, e.g. "1.0"
6683 */
6684
6685xmlChar *
6686xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6687 xmlChar *version = NULL;
6688 const xmlChar *q;
6689
6690 if ((RAW == 'v') && (NXT(1) == 'e') &&
6691 (NXT(2) == 'r') && (NXT(3) == 's') &&
6692 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6693 (NXT(6) == 'n')) {
6694 SKIP(7);
6695 SKIP_BLANKS;
6696 if (RAW != '=') {
6697 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6698 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6699 ctxt->sax->error(ctxt->userData,
6700 "xmlParseVersionInfo : expected '='\n");
6701 ctxt->wellFormed = 0;
6702 ctxt->disableSAX = 1;
6703 return(NULL);
6704 }
6705 NEXT;
6706 SKIP_BLANKS;
6707 if (RAW == '"') {
6708 NEXT;
6709 q = CUR_PTR;
6710 version = xmlParseVersionNum(ctxt);
6711 if (RAW != '"') {
6712 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6714 ctxt->sax->error(ctxt->userData,
6715 "String not closed\n%.50s\n", q);
6716 ctxt->wellFormed = 0;
6717 ctxt->disableSAX = 1;
6718 } else
6719 NEXT;
6720 } else if (RAW == '\''){
6721 NEXT;
6722 q = CUR_PTR;
6723 version = xmlParseVersionNum(ctxt);
6724 if (RAW != '\'') {
6725 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6727 ctxt->sax->error(ctxt->userData,
6728 "String not closed\n%.50s\n", q);
6729 ctxt->wellFormed = 0;
6730 ctxt->disableSAX = 1;
6731 } else
6732 NEXT;
6733 } else {
6734 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6736 ctxt->sax->error(ctxt->userData,
6737 "xmlParseVersionInfo : expected ' or \"\n");
6738 ctxt->wellFormed = 0;
6739 ctxt->disableSAX = 1;
6740 }
6741 }
6742 return(version);
6743}
6744
6745/**
6746 * xmlParseEncName:
6747 * @ctxt: an XML parser context
6748 *
6749 * parse the XML encoding name
6750 *
6751 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6752 *
6753 * Returns the encoding name value or NULL
6754 */
6755xmlChar *
6756xmlParseEncName(xmlParserCtxtPtr ctxt) {
6757 xmlChar *buf = NULL;
6758 int len = 0;
6759 int size = 10;
6760 xmlChar cur;
6761
6762 cur = CUR;
6763 if (((cur >= 'a') && (cur <= 'z')) ||
6764 ((cur >= 'A') && (cur <= 'Z'))) {
6765 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6766 if (buf == NULL) {
6767 xmlGenericError(xmlGenericErrorContext,
6768 "malloc of %d byte failed\n", size);
6769 return(NULL);
6770 }
6771
6772 buf[len++] = cur;
6773 NEXT;
6774 cur = CUR;
6775 while (((cur >= 'a') && (cur <= 'z')) ||
6776 ((cur >= 'A') && (cur <= 'Z')) ||
6777 ((cur >= '0') && (cur <= '9')) ||
6778 (cur == '.') || (cur == '_') ||
6779 (cur == '-')) {
6780 if (len + 1 >= size) {
6781 size *= 2;
6782 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6783 if (buf == NULL) {
6784 xmlGenericError(xmlGenericErrorContext,
6785 "realloc of %d byte failed\n", size);
6786 return(NULL);
6787 }
6788 }
6789 buf[len++] = cur;
6790 NEXT;
6791 cur = CUR;
6792 if (cur == 0) {
6793 SHRINK;
6794 GROW;
6795 cur = CUR;
6796 }
6797 }
6798 buf[len] = 0;
6799 } else {
6800 ctxt->errNo = XML_ERR_ENCODING_NAME;
6801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6802 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6803 ctxt->wellFormed = 0;
6804 ctxt->disableSAX = 1;
6805 }
6806 return(buf);
6807}
6808
6809/**
6810 * xmlParseEncodingDecl:
6811 * @ctxt: an XML parser context
6812 *
6813 * parse the XML encoding declaration
6814 *
6815 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6816 *
6817 * this setups the conversion filters.
6818 *
6819 * Returns the encoding value or NULL
6820 */
6821
6822xmlChar *
6823xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6824 xmlChar *encoding = NULL;
6825 const xmlChar *q;
6826
6827 SKIP_BLANKS;
6828 if ((RAW == 'e') && (NXT(1) == 'n') &&
6829 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6830 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6831 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6832 SKIP(8);
6833 SKIP_BLANKS;
6834 if (RAW != '=') {
6835 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6837 ctxt->sax->error(ctxt->userData,
6838 "xmlParseEncodingDecl : expected '='\n");
6839 ctxt->wellFormed = 0;
6840 ctxt->disableSAX = 1;
6841 return(NULL);
6842 }
6843 NEXT;
6844 SKIP_BLANKS;
6845 if (RAW == '"') {
6846 NEXT;
6847 q = CUR_PTR;
6848 encoding = xmlParseEncName(ctxt);
6849 if (RAW != '"') {
6850 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6852 ctxt->sax->error(ctxt->userData,
6853 "String not closed\n%.50s\n", q);
6854 ctxt->wellFormed = 0;
6855 ctxt->disableSAX = 1;
6856 } else
6857 NEXT;
6858 } else if (RAW == '\''){
6859 NEXT;
6860 q = CUR_PTR;
6861 encoding = xmlParseEncName(ctxt);
6862 if (RAW != '\'') {
6863 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6865 ctxt->sax->error(ctxt->userData,
6866 "String not closed\n%.50s\n", q);
6867 ctxt->wellFormed = 0;
6868 ctxt->disableSAX = 1;
6869 } else
6870 NEXT;
6871 } else if (RAW == '"'){
6872 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6874 ctxt->sax->error(ctxt->userData,
6875 "xmlParseEncodingDecl : expected ' or \"\n");
6876 ctxt->wellFormed = 0;
6877 ctxt->disableSAX = 1;
6878 }
6879 if (encoding != NULL) {
6880 xmlCharEncoding enc;
6881 xmlCharEncodingHandlerPtr handler;
6882
6883 if (ctxt->input->encoding != NULL)
6884 xmlFree((xmlChar *) ctxt->input->encoding);
6885 ctxt->input->encoding = encoding;
6886
6887 enc = xmlParseCharEncoding((const char *) encoding);
6888 /*
6889 * registered set of known encodings
6890 */
6891 if (enc != XML_CHAR_ENCODING_ERROR) {
6892 xmlSwitchEncoding(ctxt, enc);
6893 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6894 xmlFree(encoding);
6895 return(NULL);
6896 }
6897 } else {
6898 /*
6899 * fallback for unknown encodings
6900 */
6901 handler = xmlFindCharEncodingHandler((const char *) encoding);
6902 if (handler != NULL) {
6903 xmlSwitchToEncoding(ctxt, handler);
6904 } else {
6905 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6907 ctxt->sax->error(ctxt->userData,
6908 "Unsupported encoding %s\n", encoding);
6909 return(NULL);
6910 }
6911 }
6912 }
6913 }
6914 return(encoding);
6915}
6916
6917/**
6918 * xmlParseSDDecl:
6919 * @ctxt: an XML parser context
6920 *
6921 * parse the XML standalone declaration
6922 *
6923 * [32] SDDecl ::= S 'standalone' Eq
6924 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6925 *
6926 * [ VC: Standalone Document Declaration ]
6927 * TODO The standalone document declaration must have the value "no"
6928 * if any external markup declarations contain declarations of:
6929 * - attributes with default values, if elements to which these
6930 * attributes apply appear in the document without specifications
6931 * of values for these attributes, or
6932 * - entities (other than amp, lt, gt, apos, quot), if references
6933 * to those entities appear in the document, or
6934 * - attributes with values subject to normalization, where the
6935 * attribute appears in the document with a value which will change
6936 * as a result of normalization, or
6937 * - element types with element content, if white space occurs directly
6938 * within any instance of those types.
6939 *
6940 * Returns 1 if standalone, 0 otherwise
6941 */
6942
6943int
6944xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6945 int standalone = -1;
6946
6947 SKIP_BLANKS;
6948 if ((RAW == 's') && (NXT(1) == 't') &&
6949 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6950 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6951 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6952 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6953 SKIP(10);
6954 SKIP_BLANKS;
6955 if (RAW != '=') {
6956 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6957 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6958 ctxt->sax->error(ctxt->userData,
6959 "XML standalone declaration : expected '='\n");
6960 ctxt->wellFormed = 0;
6961 ctxt->disableSAX = 1;
6962 return(standalone);
6963 }
6964 NEXT;
6965 SKIP_BLANKS;
6966 if (RAW == '\''){
6967 NEXT;
6968 if ((RAW == 'n') && (NXT(1) == 'o')) {
6969 standalone = 0;
6970 SKIP(2);
6971 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6972 (NXT(2) == 's')) {
6973 standalone = 1;
6974 SKIP(3);
6975 } else {
6976 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6978 ctxt->sax->error(ctxt->userData,
6979 "standalone accepts only 'yes' or 'no'\n");
6980 ctxt->wellFormed = 0;
6981 ctxt->disableSAX = 1;
6982 }
6983 if (RAW != '\'') {
6984 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6985 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6986 ctxt->sax->error(ctxt->userData, "String not closed\n");
6987 ctxt->wellFormed = 0;
6988 ctxt->disableSAX = 1;
6989 } else
6990 NEXT;
6991 } else if (RAW == '"'){
6992 NEXT;
6993 if ((RAW == 'n') && (NXT(1) == 'o')) {
6994 standalone = 0;
6995 SKIP(2);
6996 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6997 (NXT(2) == 's')) {
6998 standalone = 1;
6999 SKIP(3);
7000 } else {
7001 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7003 ctxt->sax->error(ctxt->userData,
7004 "standalone accepts only 'yes' or 'no'\n");
7005 ctxt->wellFormed = 0;
7006 ctxt->disableSAX = 1;
7007 }
7008 if (RAW != '"') {
7009 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7011 ctxt->sax->error(ctxt->userData, "String not closed\n");
7012 ctxt->wellFormed = 0;
7013 ctxt->disableSAX = 1;
7014 } else
7015 NEXT;
7016 } else {
7017 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7019 ctxt->sax->error(ctxt->userData,
7020 "Standalone value not found\n");
7021 ctxt->wellFormed = 0;
7022 ctxt->disableSAX = 1;
7023 }
7024 }
7025 return(standalone);
7026}
7027
7028/**
7029 * xmlParseXMLDecl:
7030 * @ctxt: an XML parser context
7031 *
7032 * parse an XML declaration header
7033 *
7034 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7035 */
7036
7037void
7038xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7039 xmlChar *version;
7040
7041 /*
7042 * We know that '<?xml' is here.
7043 */
7044 SKIP(5);
7045
7046 if (!IS_BLANK(RAW)) {
7047 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7049 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7050 ctxt->wellFormed = 0;
7051 ctxt->disableSAX = 1;
7052 }
7053 SKIP_BLANKS;
7054
7055 /*
7056 * We should have the VersionInfo here.
7057 */
7058 version = xmlParseVersionInfo(ctxt);
7059 if (version == NULL)
7060 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7061 ctxt->version = xmlStrdup(version);
7062 xmlFree(version);
7063
7064 /*
7065 * We may have the encoding declaration
7066 */
7067 if (!IS_BLANK(RAW)) {
7068 if ((RAW == '?') && (NXT(1) == '>')) {
7069 SKIP(2);
7070 return;
7071 }
7072 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7073 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7074 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7075 ctxt->wellFormed = 0;
7076 ctxt->disableSAX = 1;
7077 }
7078 xmlParseEncodingDecl(ctxt);
7079 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7080 /*
7081 * The XML REC instructs us to stop parsing right here
7082 */
7083 return;
7084 }
7085
7086 /*
7087 * We may have the standalone status.
7088 */
7089 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7090 if ((RAW == '?') && (NXT(1) == '>')) {
7091 SKIP(2);
7092 return;
7093 }
7094 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7096 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7097 ctxt->wellFormed = 0;
7098 ctxt->disableSAX = 1;
7099 }
7100 SKIP_BLANKS;
7101 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7102
7103 SKIP_BLANKS;
7104 if ((RAW == '?') && (NXT(1) == '>')) {
7105 SKIP(2);
7106 } else if (RAW == '>') {
7107 /* Deprecated old WD ... */
7108 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7110 ctxt->sax->error(ctxt->userData,
7111 "XML declaration must end-up with '?>'\n");
7112 ctxt->wellFormed = 0;
7113 ctxt->disableSAX = 1;
7114 NEXT;
7115 } else {
7116 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7118 ctxt->sax->error(ctxt->userData,
7119 "parsing XML declaration: '?>' expected\n");
7120 ctxt->wellFormed = 0;
7121 ctxt->disableSAX = 1;
7122 MOVETO_ENDTAG(CUR_PTR);
7123 NEXT;
7124 }
7125}
7126
7127/**
7128 * xmlParseMisc:
7129 * @ctxt: an XML parser context
7130 *
7131 * parse an XML Misc* optionnal field.
7132 *
7133 * [27] Misc ::= Comment | PI | S
7134 */
7135
7136void
7137xmlParseMisc(xmlParserCtxtPtr ctxt) {
7138 while (((RAW == '<') && (NXT(1) == '?')) ||
7139 ((RAW == '<') && (NXT(1) == '!') &&
7140 (NXT(2) == '-') && (NXT(3) == '-')) ||
7141 IS_BLANK(CUR)) {
7142 if ((RAW == '<') && (NXT(1) == '?')) {
7143 xmlParsePI(ctxt);
7144 } else if (IS_BLANK(CUR)) {
7145 NEXT;
7146 } else
7147 xmlParseComment(ctxt);
7148 }
7149}
7150
7151/**
7152 * xmlParseDocument:
7153 * @ctxt: an XML parser context
7154 *
7155 * parse an XML document (and build a tree if using the standard SAX
7156 * interface).
7157 *
7158 * [1] document ::= prolog element Misc*
7159 *
7160 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7161 *
7162 * Returns 0, -1 in case of error. the parser context is augmented
7163 * as a result of the parsing.
7164 */
7165
7166int
7167xmlParseDocument(xmlParserCtxtPtr ctxt) {
7168 xmlChar start[4];
7169 xmlCharEncoding enc;
7170
7171 xmlInitParser();
7172
7173 GROW;
7174
7175 /*
7176 * SAX: beginning of the document processing.
7177 */
7178 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7179 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7180
7181 /*
7182 * Get the 4 first bytes and decode the charset
7183 * if enc != XML_CHAR_ENCODING_NONE
7184 * plug some encoding conversion routines.
7185 */
7186 start[0] = RAW;
7187 start[1] = NXT(1);
7188 start[2] = NXT(2);
7189 start[3] = NXT(3);
7190 enc = xmlDetectCharEncoding(start, 4);
7191 if (enc != XML_CHAR_ENCODING_NONE) {
7192 xmlSwitchEncoding(ctxt, enc);
7193 }
7194
7195
7196 if (CUR == 0) {
7197 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7199 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7200 ctxt->wellFormed = 0;
7201 ctxt->disableSAX = 1;
7202 }
7203
7204 /*
7205 * Check for the XMLDecl in the Prolog.
7206 */
7207 GROW;
7208 if ((RAW == '<') && (NXT(1) == '?') &&
7209 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7210 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7211
7212 /*
7213 * Note that we will switch encoding on the fly.
7214 */
7215 xmlParseXMLDecl(ctxt);
7216 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7217 /*
7218 * The XML REC instructs us to stop parsing right here
7219 */
7220 return(-1);
7221 }
7222 ctxt->standalone = ctxt->input->standalone;
7223 SKIP_BLANKS;
7224 } else {
7225 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7226 }
7227 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7228 ctxt->sax->startDocument(ctxt->userData);
7229
7230 /*
7231 * The Misc part of the Prolog
7232 */
7233 GROW;
7234 xmlParseMisc(ctxt);
7235
7236 /*
7237 * Then possibly doc type declaration(s) and more Misc
7238 * (doctypedecl Misc*)?
7239 */
7240 GROW;
7241 if ((RAW == '<') && (NXT(1) == '!') &&
7242 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7243 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7244 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7245 (NXT(8) == 'E')) {
7246
7247 ctxt->inSubset = 1;
7248 xmlParseDocTypeDecl(ctxt);
7249 if (RAW == '[') {
7250 ctxt->instate = XML_PARSER_DTD;
7251 xmlParseInternalSubset(ctxt);
7252 }
7253
7254 /*
7255 * Create and update the external subset.
7256 */
7257 ctxt->inSubset = 2;
7258 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7259 (!ctxt->disableSAX))
7260 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7261 ctxt->extSubSystem, ctxt->extSubURI);
7262 ctxt->inSubset = 0;
7263
7264
7265 ctxt->instate = XML_PARSER_PROLOG;
7266 xmlParseMisc(ctxt);
7267 }
7268
7269 /*
7270 * Time to start parsing the tree itself
7271 */
7272 GROW;
7273 if (RAW != '<') {
7274 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7275 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7276 ctxt->sax->error(ctxt->userData,
7277 "Start tag expected, '<' not found\n");
7278 ctxt->wellFormed = 0;
7279 ctxt->disableSAX = 1;
7280 ctxt->instate = XML_PARSER_EOF;
7281 } else {
7282 ctxt->instate = XML_PARSER_CONTENT;
7283 xmlParseElement(ctxt);
7284 ctxt->instate = XML_PARSER_EPILOG;
7285
7286
7287 /*
7288 * The Misc part at the end
7289 */
7290 xmlParseMisc(ctxt);
7291
7292 if (RAW != 0) {
7293 ctxt->errNo = XML_ERR_DOCUMENT_END;
7294 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7295 ctxt->sax->error(ctxt->userData,
7296 "Extra content at the end of the document\n");
7297 ctxt->wellFormed = 0;
7298 ctxt->disableSAX = 1;
7299 }
7300 ctxt->instate = XML_PARSER_EOF;
7301 }
7302
7303 /*
7304 * SAX: end of the document processing.
7305 */
7306 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7307 (!ctxt->disableSAX))
7308 ctxt->sax->endDocument(ctxt->userData);
7309
7310 if (! ctxt->wellFormed) return(-1);
7311 return(0);
7312}
7313
7314/**
7315 * xmlParseExtParsedEnt:
7316 * @ctxt: an XML parser context
7317 *
7318 * parse a genreral parsed entity
7319 * An external general parsed entity is well-formed if it matches the
7320 * production labeled extParsedEnt.
7321 *
7322 * [78] extParsedEnt ::= TextDecl? content
7323 *
7324 * Returns 0, -1 in case of error. the parser context is augmented
7325 * as a result of the parsing.
7326 */
7327
7328int
7329xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7330 xmlChar start[4];
7331 xmlCharEncoding enc;
7332
7333 xmlDefaultSAXHandlerInit();
7334
7335 GROW;
7336
7337 /*
7338 * SAX: beginning of the document processing.
7339 */
7340 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7341 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7342
7343 /*
7344 * Get the 4 first bytes and decode the charset
7345 * if enc != XML_CHAR_ENCODING_NONE
7346 * plug some encoding conversion routines.
7347 */
7348 start[0] = RAW;
7349 start[1] = NXT(1);
7350 start[2] = NXT(2);
7351 start[3] = NXT(3);
7352 enc = xmlDetectCharEncoding(start, 4);
7353 if (enc != XML_CHAR_ENCODING_NONE) {
7354 xmlSwitchEncoding(ctxt, enc);
7355 }
7356
7357
7358 if (CUR == 0) {
7359 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7361 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7362 ctxt->wellFormed = 0;
7363 ctxt->disableSAX = 1;
7364 }
7365
7366 /*
7367 * Check for the XMLDecl in the Prolog.
7368 */
7369 GROW;
7370 if ((RAW == '<') && (NXT(1) == '?') &&
7371 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7372 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7373
7374 /*
7375 * Note that we will switch encoding on the fly.
7376 */
7377 xmlParseXMLDecl(ctxt);
7378 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7379 /*
7380 * The XML REC instructs us to stop parsing right here
7381 */
7382 return(-1);
7383 }
7384 SKIP_BLANKS;
7385 } else {
7386 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7387 }
7388 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7389 ctxt->sax->startDocument(ctxt->userData);
7390
7391 /*
7392 * Doing validity checking on chunk doesn't make sense
7393 */
7394 ctxt->instate = XML_PARSER_CONTENT;
7395 ctxt->validate = 0;
7396 ctxt->loadsubset = 0;
7397 ctxt->depth = 0;
7398
7399 xmlParseContent(ctxt);
7400
7401 if ((RAW == '<') && (NXT(1) == '/')) {
7402 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7404 ctxt->sax->error(ctxt->userData,
7405 "chunk is not well balanced\n");
7406 ctxt->wellFormed = 0;
7407 ctxt->disableSAX = 1;
7408 } else if (RAW != 0) {
7409 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7411 ctxt->sax->error(ctxt->userData,
7412 "extra content at the end of well balanced chunk\n");
7413 ctxt->wellFormed = 0;
7414 ctxt->disableSAX = 1;
7415 }
7416
7417 /*
7418 * SAX: end of the document processing.
7419 */
7420 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7421 (!ctxt->disableSAX))
7422 ctxt->sax->endDocument(ctxt->userData);
7423
7424 if (! ctxt->wellFormed) return(-1);
7425 return(0);
7426}
7427
7428/************************************************************************
7429 * *
7430 * Progressive parsing interfaces *
7431 * *
7432 ************************************************************************/
7433
7434/**
7435 * xmlParseLookupSequence:
7436 * @ctxt: an XML parser context
7437 * @first: the first char to lookup
7438 * @next: the next char to lookup or zero
7439 * @third: the next char to lookup or zero
7440 *
7441 * Try to find if a sequence (first, next, third) or just (first next) or
7442 * (first) is available in the input stream.
7443 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7444 * to avoid rescanning sequences of bytes, it DOES change the state of the
7445 * parser, do not use liberally.
7446 *
7447 * Returns the index to the current parsing point if the full sequence
7448 * is available, -1 otherwise.
7449 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007450static int
Owen Taylor3473f882001-02-23 17:55:21 +00007451xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7452 xmlChar next, xmlChar third) {
7453 int base, len;
7454 xmlParserInputPtr in;
7455 const xmlChar *buf;
7456
7457 in = ctxt->input;
7458 if (in == NULL) return(-1);
7459 base = in->cur - in->base;
7460 if (base < 0) return(-1);
7461 if (ctxt->checkIndex > base)
7462 base = ctxt->checkIndex;
7463 if (in->buf == NULL) {
7464 buf = in->base;
7465 len = in->length;
7466 } else {
7467 buf = in->buf->buffer->content;
7468 len = in->buf->buffer->use;
7469 }
7470 /* take into account the sequence length */
7471 if (third) len -= 2;
7472 else if (next) len --;
7473 for (;base < len;base++) {
7474 if (buf[base] == first) {
7475 if (third != 0) {
7476 if ((buf[base + 1] != next) ||
7477 (buf[base + 2] != third)) continue;
7478 } else if (next != 0) {
7479 if (buf[base + 1] != next) continue;
7480 }
7481 ctxt->checkIndex = 0;
7482#ifdef DEBUG_PUSH
7483 if (next == 0)
7484 xmlGenericError(xmlGenericErrorContext,
7485 "PP: lookup '%c' found at %d\n",
7486 first, base);
7487 else if (third == 0)
7488 xmlGenericError(xmlGenericErrorContext,
7489 "PP: lookup '%c%c' found at %d\n",
7490 first, next, base);
7491 else
7492 xmlGenericError(xmlGenericErrorContext,
7493 "PP: lookup '%c%c%c' found at %d\n",
7494 first, next, third, base);
7495#endif
7496 return(base - (in->cur - in->base));
7497 }
7498 }
7499 ctxt->checkIndex = base;
7500#ifdef DEBUG_PUSH
7501 if (next == 0)
7502 xmlGenericError(xmlGenericErrorContext,
7503 "PP: lookup '%c' failed\n", first);
7504 else if (third == 0)
7505 xmlGenericError(xmlGenericErrorContext,
7506 "PP: lookup '%c%c' failed\n", first, next);
7507 else
7508 xmlGenericError(xmlGenericErrorContext,
7509 "PP: lookup '%c%c%c' failed\n", first, next, third);
7510#endif
7511 return(-1);
7512}
7513
7514/**
7515 * xmlParseTryOrFinish:
7516 * @ctxt: an XML parser context
7517 * @terminate: last chunk indicator
7518 *
7519 * Try to progress on parsing
7520 *
7521 * Returns zero if no parsing was possible
7522 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007523static int
Owen Taylor3473f882001-02-23 17:55:21 +00007524xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7525 int ret = 0;
7526 int avail;
7527 xmlChar cur, next;
7528
7529#ifdef DEBUG_PUSH
7530 switch (ctxt->instate) {
7531 case XML_PARSER_EOF:
7532 xmlGenericError(xmlGenericErrorContext,
7533 "PP: try EOF\n"); break;
7534 case XML_PARSER_START:
7535 xmlGenericError(xmlGenericErrorContext,
7536 "PP: try START\n"); break;
7537 case XML_PARSER_MISC:
7538 xmlGenericError(xmlGenericErrorContext,
7539 "PP: try MISC\n");break;
7540 case XML_PARSER_COMMENT:
7541 xmlGenericError(xmlGenericErrorContext,
7542 "PP: try COMMENT\n");break;
7543 case XML_PARSER_PROLOG:
7544 xmlGenericError(xmlGenericErrorContext,
7545 "PP: try PROLOG\n");break;
7546 case XML_PARSER_START_TAG:
7547 xmlGenericError(xmlGenericErrorContext,
7548 "PP: try START_TAG\n");break;
7549 case XML_PARSER_CONTENT:
7550 xmlGenericError(xmlGenericErrorContext,
7551 "PP: try CONTENT\n");break;
7552 case XML_PARSER_CDATA_SECTION:
7553 xmlGenericError(xmlGenericErrorContext,
7554 "PP: try CDATA_SECTION\n");break;
7555 case XML_PARSER_END_TAG:
7556 xmlGenericError(xmlGenericErrorContext,
7557 "PP: try END_TAG\n");break;
7558 case XML_PARSER_ENTITY_DECL:
7559 xmlGenericError(xmlGenericErrorContext,
7560 "PP: try ENTITY_DECL\n");break;
7561 case XML_PARSER_ENTITY_VALUE:
7562 xmlGenericError(xmlGenericErrorContext,
7563 "PP: try ENTITY_VALUE\n");break;
7564 case XML_PARSER_ATTRIBUTE_VALUE:
7565 xmlGenericError(xmlGenericErrorContext,
7566 "PP: try ATTRIBUTE_VALUE\n");break;
7567 case XML_PARSER_DTD:
7568 xmlGenericError(xmlGenericErrorContext,
7569 "PP: try DTD\n");break;
7570 case XML_PARSER_EPILOG:
7571 xmlGenericError(xmlGenericErrorContext,
7572 "PP: try EPILOG\n");break;
7573 case XML_PARSER_PI:
7574 xmlGenericError(xmlGenericErrorContext,
7575 "PP: try PI\n");break;
7576 case XML_PARSER_IGNORE:
7577 xmlGenericError(xmlGenericErrorContext,
7578 "PP: try IGNORE\n");break;
7579 }
7580#endif
7581
7582 while (1) {
7583 /*
7584 * Pop-up of finished entities.
7585 */
7586 while ((RAW == 0) && (ctxt->inputNr > 1))
7587 xmlPopInput(ctxt);
7588
7589 if (ctxt->input ==NULL) break;
7590 if (ctxt->input->buf == NULL)
7591 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7592 else
7593 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7594 if (avail < 1)
7595 goto done;
7596 switch (ctxt->instate) {
7597 case XML_PARSER_EOF:
7598 /*
7599 * Document parsing is done !
7600 */
7601 goto done;
7602 case XML_PARSER_START:
7603 /*
7604 * Very first chars read from the document flow.
7605 */
Owen Taylor3473f882001-02-23 17:55:21 +00007606 if (avail < 2)
7607 goto done;
7608
7609 cur = ctxt->input->cur[0];
7610 next = ctxt->input->cur[1];
7611 if (cur == 0) {
7612 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7613 ctxt->sax->setDocumentLocator(ctxt->userData,
7614 &xmlDefaultSAXLocator);
7615 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7617 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7618 ctxt->wellFormed = 0;
7619 ctxt->disableSAX = 1;
7620 ctxt->instate = XML_PARSER_EOF;
7621#ifdef DEBUG_PUSH
7622 xmlGenericError(xmlGenericErrorContext,
7623 "PP: entering EOF\n");
7624#endif
7625 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7626 ctxt->sax->endDocument(ctxt->userData);
7627 goto done;
7628 }
7629 if ((cur == '<') && (next == '?')) {
7630 /* PI or XML decl */
7631 if (avail < 5) return(ret);
7632 if ((!terminate) &&
7633 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7634 return(ret);
7635 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7636 ctxt->sax->setDocumentLocator(ctxt->userData,
7637 &xmlDefaultSAXLocator);
7638 if ((ctxt->input->cur[2] == 'x') &&
7639 (ctxt->input->cur[3] == 'm') &&
7640 (ctxt->input->cur[4] == 'l') &&
7641 (IS_BLANK(ctxt->input->cur[5]))) {
7642 ret += 5;
7643#ifdef DEBUG_PUSH
7644 xmlGenericError(xmlGenericErrorContext,
7645 "PP: Parsing XML Decl\n");
7646#endif
7647 xmlParseXMLDecl(ctxt);
7648 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7649 /*
7650 * The XML REC instructs us to stop parsing right
7651 * here
7652 */
7653 ctxt->instate = XML_PARSER_EOF;
7654 return(0);
7655 }
7656 ctxt->standalone = ctxt->input->standalone;
7657 if ((ctxt->encoding == NULL) &&
7658 (ctxt->input->encoding != NULL))
7659 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7660 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7661 (!ctxt->disableSAX))
7662 ctxt->sax->startDocument(ctxt->userData);
7663 ctxt->instate = XML_PARSER_MISC;
7664#ifdef DEBUG_PUSH
7665 xmlGenericError(xmlGenericErrorContext,
7666 "PP: entering MISC\n");
7667#endif
7668 } else {
7669 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7670 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7671 (!ctxt->disableSAX))
7672 ctxt->sax->startDocument(ctxt->userData);
7673 ctxt->instate = XML_PARSER_MISC;
7674#ifdef DEBUG_PUSH
7675 xmlGenericError(xmlGenericErrorContext,
7676 "PP: entering MISC\n");
7677#endif
7678 }
7679 } else {
7680 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7681 ctxt->sax->setDocumentLocator(ctxt->userData,
7682 &xmlDefaultSAXLocator);
7683 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7684 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7685 (!ctxt->disableSAX))
7686 ctxt->sax->startDocument(ctxt->userData);
7687 ctxt->instate = XML_PARSER_MISC;
7688#ifdef DEBUG_PUSH
7689 xmlGenericError(xmlGenericErrorContext,
7690 "PP: entering MISC\n");
7691#endif
7692 }
7693 break;
7694 case XML_PARSER_MISC:
7695 SKIP_BLANKS;
7696 if (ctxt->input->buf == NULL)
7697 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7698 else
7699 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7700 if (avail < 2)
7701 goto done;
7702 cur = ctxt->input->cur[0];
7703 next = ctxt->input->cur[1];
7704 if ((cur == '<') && (next == '?')) {
7705 if ((!terminate) &&
7706 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7707 goto done;
7708#ifdef DEBUG_PUSH
7709 xmlGenericError(xmlGenericErrorContext,
7710 "PP: Parsing PI\n");
7711#endif
7712 xmlParsePI(ctxt);
7713 } else if ((cur == '<') && (next == '!') &&
7714 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7715 if ((!terminate) &&
7716 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7717 goto done;
7718#ifdef DEBUG_PUSH
7719 xmlGenericError(xmlGenericErrorContext,
7720 "PP: Parsing Comment\n");
7721#endif
7722 xmlParseComment(ctxt);
7723 ctxt->instate = XML_PARSER_MISC;
7724 } else if ((cur == '<') && (next == '!') &&
7725 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7726 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7727 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7728 (ctxt->input->cur[8] == 'E')) {
7729 if ((!terminate) &&
7730 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7731 goto done;
7732#ifdef DEBUG_PUSH
7733 xmlGenericError(xmlGenericErrorContext,
7734 "PP: Parsing internal subset\n");
7735#endif
7736 ctxt->inSubset = 1;
7737 xmlParseDocTypeDecl(ctxt);
7738 if (RAW == '[') {
7739 ctxt->instate = XML_PARSER_DTD;
7740#ifdef DEBUG_PUSH
7741 xmlGenericError(xmlGenericErrorContext,
7742 "PP: entering DTD\n");
7743#endif
7744 } else {
7745 /*
7746 * Create and update the external subset.
7747 */
7748 ctxt->inSubset = 2;
7749 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7750 (ctxt->sax->externalSubset != NULL))
7751 ctxt->sax->externalSubset(ctxt->userData,
7752 ctxt->intSubName, ctxt->extSubSystem,
7753 ctxt->extSubURI);
7754 ctxt->inSubset = 0;
7755 ctxt->instate = XML_PARSER_PROLOG;
7756#ifdef DEBUG_PUSH
7757 xmlGenericError(xmlGenericErrorContext,
7758 "PP: entering PROLOG\n");
7759#endif
7760 }
7761 } else if ((cur == '<') && (next == '!') &&
7762 (avail < 9)) {
7763 goto done;
7764 } else {
7765 ctxt->instate = XML_PARSER_START_TAG;
7766#ifdef DEBUG_PUSH
7767 xmlGenericError(xmlGenericErrorContext,
7768 "PP: entering START_TAG\n");
7769#endif
7770 }
7771 break;
7772 case XML_PARSER_IGNORE:
7773 xmlGenericError(xmlGenericErrorContext,
7774 "PP: internal error, state == IGNORE");
7775 ctxt->instate = XML_PARSER_DTD;
7776#ifdef DEBUG_PUSH
7777 xmlGenericError(xmlGenericErrorContext,
7778 "PP: entering DTD\n");
7779#endif
7780 break;
7781 case XML_PARSER_PROLOG:
7782 SKIP_BLANKS;
7783 if (ctxt->input->buf == NULL)
7784 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7785 else
7786 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7787 if (avail < 2)
7788 goto done;
7789 cur = ctxt->input->cur[0];
7790 next = ctxt->input->cur[1];
7791 if ((cur == '<') && (next == '?')) {
7792 if ((!terminate) &&
7793 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7794 goto done;
7795#ifdef DEBUG_PUSH
7796 xmlGenericError(xmlGenericErrorContext,
7797 "PP: Parsing PI\n");
7798#endif
7799 xmlParsePI(ctxt);
7800 } else if ((cur == '<') && (next == '!') &&
7801 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7802 if ((!terminate) &&
7803 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7804 goto done;
7805#ifdef DEBUG_PUSH
7806 xmlGenericError(xmlGenericErrorContext,
7807 "PP: Parsing Comment\n");
7808#endif
7809 xmlParseComment(ctxt);
7810 ctxt->instate = XML_PARSER_PROLOG;
7811 } else if ((cur == '<') && (next == '!') &&
7812 (avail < 4)) {
7813 goto done;
7814 } else {
7815 ctxt->instate = XML_PARSER_START_TAG;
7816#ifdef DEBUG_PUSH
7817 xmlGenericError(xmlGenericErrorContext,
7818 "PP: entering START_TAG\n");
7819#endif
7820 }
7821 break;
7822 case XML_PARSER_EPILOG:
7823 SKIP_BLANKS;
7824 if (ctxt->input->buf == NULL)
7825 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7826 else
7827 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7828 if (avail < 2)
7829 goto done;
7830 cur = ctxt->input->cur[0];
7831 next = ctxt->input->cur[1];
7832 if ((cur == '<') && (next == '?')) {
7833 if ((!terminate) &&
7834 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7835 goto done;
7836#ifdef DEBUG_PUSH
7837 xmlGenericError(xmlGenericErrorContext,
7838 "PP: Parsing PI\n");
7839#endif
7840 xmlParsePI(ctxt);
7841 ctxt->instate = XML_PARSER_EPILOG;
7842 } else if ((cur == '<') && (next == '!') &&
7843 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7844 if ((!terminate) &&
7845 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7846 goto done;
7847#ifdef DEBUG_PUSH
7848 xmlGenericError(xmlGenericErrorContext,
7849 "PP: Parsing Comment\n");
7850#endif
7851 xmlParseComment(ctxt);
7852 ctxt->instate = XML_PARSER_EPILOG;
7853 } else if ((cur == '<') && (next == '!') &&
7854 (avail < 4)) {
7855 goto done;
7856 } else {
7857 ctxt->errNo = XML_ERR_DOCUMENT_END;
7858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7859 ctxt->sax->error(ctxt->userData,
7860 "Extra content at the end of the document\n");
7861 ctxt->wellFormed = 0;
7862 ctxt->disableSAX = 1;
7863 ctxt->instate = XML_PARSER_EOF;
7864#ifdef DEBUG_PUSH
7865 xmlGenericError(xmlGenericErrorContext,
7866 "PP: entering EOF\n");
7867#endif
7868 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7869 (!ctxt->disableSAX))
7870 ctxt->sax->endDocument(ctxt->userData);
7871 goto done;
7872 }
7873 break;
7874 case XML_PARSER_START_TAG: {
7875 xmlChar *name, *oldname;
7876
7877 if ((avail < 2) && (ctxt->inputNr == 1))
7878 goto done;
7879 cur = ctxt->input->cur[0];
7880 if (cur != '<') {
7881 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7883 ctxt->sax->error(ctxt->userData,
7884 "Start tag expect, '<' not found\n");
7885 ctxt->wellFormed = 0;
7886 ctxt->disableSAX = 1;
7887 ctxt->instate = XML_PARSER_EOF;
7888#ifdef DEBUG_PUSH
7889 xmlGenericError(xmlGenericErrorContext,
7890 "PP: entering EOF\n");
7891#endif
7892 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7893 (!ctxt->disableSAX))
7894 ctxt->sax->endDocument(ctxt->userData);
7895 goto done;
7896 }
7897 if ((!terminate) &&
7898 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7899 goto done;
7900 if (ctxt->spaceNr == 0)
7901 spacePush(ctxt, -1);
7902 else
7903 spacePush(ctxt, *ctxt->space);
7904 name = xmlParseStartTag(ctxt);
7905 if (name == NULL) {
7906 spacePop(ctxt);
7907 ctxt->instate = XML_PARSER_EOF;
7908#ifdef DEBUG_PUSH
7909 xmlGenericError(xmlGenericErrorContext,
7910 "PP: entering EOF\n");
7911#endif
7912 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7913 (!ctxt->disableSAX))
7914 ctxt->sax->endDocument(ctxt->userData);
7915 goto done;
7916 }
7917 namePush(ctxt, xmlStrdup(name));
7918
7919 /*
7920 * [ VC: Root Element Type ]
7921 * The Name in the document type declaration must match
7922 * the element type of the root element.
7923 */
7924 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7925 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7926 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7927
7928 /*
7929 * Check for an Empty Element.
7930 */
7931 if ((RAW == '/') && (NXT(1) == '>')) {
7932 SKIP(2);
7933 if ((ctxt->sax != NULL) &&
7934 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7935 ctxt->sax->endElement(ctxt->userData, name);
7936 xmlFree(name);
7937 oldname = namePop(ctxt);
7938 spacePop(ctxt);
7939 if (oldname != NULL) {
7940#ifdef DEBUG_STACK
7941 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7942#endif
7943 xmlFree(oldname);
7944 }
7945 if (ctxt->name == NULL) {
7946 ctxt->instate = XML_PARSER_EPILOG;
7947#ifdef DEBUG_PUSH
7948 xmlGenericError(xmlGenericErrorContext,
7949 "PP: entering EPILOG\n");
7950#endif
7951 } else {
7952 ctxt->instate = XML_PARSER_CONTENT;
7953#ifdef DEBUG_PUSH
7954 xmlGenericError(xmlGenericErrorContext,
7955 "PP: entering CONTENT\n");
7956#endif
7957 }
7958 break;
7959 }
7960 if (RAW == '>') {
7961 NEXT;
7962 } else {
7963 ctxt->errNo = XML_ERR_GT_REQUIRED;
7964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7965 ctxt->sax->error(ctxt->userData,
7966 "Couldn't find end of Start Tag %s\n",
7967 name);
7968 ctxt->wellFormed = 0;
7969 ctxt->disableSAX = 1;
7970
7971 /*
7972 * end of parsing of this node.
7973 */
7974 nodePop(ctxt);
7975 oldname = namePop(ctxt);
7976 spacePop(ctxt);
7977 if (oldname != NULL) {
7978#ifdef DEBUG_STACK
7979 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7980#endif
7981 xmlFree(oldname);
7982 }
7983 }
7984 xmlFree(name);
7985 ctxt->instate = XML_PARSER_CONTENT;
7986#ifdef DEBUG_PUSH
7987 xmlGenericError(xmlGenericErrorContext,
7988 "PP: entering CONTENT\n");
7989#endif
7990 break;
7991 }
7992 case XML_PARSER_CONTENT: {
7993 const xmlChar *test;
7994 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00007995 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00007996
7997 /*
7998 * Handle preparsed entities and charRef
7999 */
8000 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008001 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008002
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008003 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008004 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8005 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008006 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008007 ctxt->token = 0;
8008 }
8009 if ((avail < 2) && (ctxt->inputNr == 1))
8010 goto done;
8011 cur = ctxt->input->cur[0];
8012 next = ctxt->input->cur[1];
8013
8014 test = CUR_PTR;
8015 cons = ctxt->input->consumed;
8016 tok = ctxt->token;
8017 if ((cur == '<') && (next == '?')) {
8018 if ((!terminate) &&
8019 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8020 goto done;
8021#ifdef DEBUG_PUSH
8022 xmlGenericError(xmlGenericErrorContext,
8023 "PP: Parsing PI\n");
8024#endif
8025 xmlParsePI(ctxt);
8026 } else if ((cur == '<') && (next == '!') &&
8027 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8028 if ((!terminate) &&
8029 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8030 goto done;
8031#ifdef DEBUG_PUSH
8032 xmlGenericError(xmlGenericErrorContext,
8033 "PP: Parsing Comment\n");
8034#endif
8035 xmlParseComment(ctxt);
8036 ctxt->instate = XML_PARSER_CONTENT;
8037 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8038 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8039 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8040 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8041 (ctxt->input->cur[8] == '[')) {
8042 SKIP(9);
8043 ctxt->instate = XML_PARSER_CDATA_SECTION;
8044#ifdef DEBUG_PUSH
8045 xmlGenericError(xmlGenericErrorContext,
8046 "PP: entering CDATA_SECTION\n");
8047#endif
8048 break;
8049 } else if ((cur == '<') && (next == '!') &&
8050 (avail < 9)) {
8051 goto done;
8052 } else if ((cur == '<') && (next == '/')) {
8053 ctxt->instate = XML_PARSER_END_TAG;
8054#ifdef DEBUG_PUSH
8055 xmlGenericError(xmlGenericErrorContext,
8056 "PP: entering END_TAG\n");
8057#endif
8058 break;
8059 } else if (cur == '<') {
8060 ctxt->instate = XML_PARSER_START_TAG;
8061#ifdef DEBUG_PUSH
8062 xmlGenericError(xmlGenericErrorContext,
8063 "PP: entering START_TAG\n");
8064#endif
8065 break;
8066 } else if (cur == '&') {
8067 if ((!terminate) &&
8068 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8069 goto done;
8070#ifdef DEBUG_PUSH
8071 xmlGenericError(xmlGenericErrorContext,
8072 "PP: Parsing Reference\n");
8073#endif
8074 xmlParseReference(ctxt);
8075 } else {
8076 /* TODO Avoid the extra copy, handle directly !!! */
8077 /*
8078 * Goal of the following test is:
8079 * - minimize calls to the SAX 'character' callback
8080 * when they are mergeable
8081 * - handle an problem for isBlank when we only parse
8082 * a sequence of blank chars and the next one is
8083 * not available to check against '<' presence.
8084 * - tries to homogenize the differences in SAX
8085 * callbacks beween the push and pull versions
8086 * of the parser.
8087 */
8088 if ((ctxt->inputNr == 1) &&
8089 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8090 if ((!terminate) &&
8091 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8092 goto done;
8093 }
8094 ctxt->checkIndex = 0;
8095#ifdef DEBUG_PUSH
8096 xmlGenericError(xmlGenericErrorContext,
8097 "PP: Parsing char data\n");
8098#endif
8099 xmlParseCharData(ctxt, 0);
8100 }
8101 /*
8102 * Pop-up of finished entities.
8103 */
8104 while ((RAW == 0) && (ctxt->inputNr > 1))
8105 xmlPopInput(ctxt);
8106 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8107 (tok == ctxt->token)) {
8108 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8110 ctxt->sax->error(ctxt->userData,
8111 "detected an error in element content\n");
8112 ctxt->wellFormed = 0;
8113 ctxt->disableSAX = 1;
8114 ctxt->instate = XML_PARSER_EOF;
8115 break;
8116 }
8117 break;
8118 }
8119 case XML_PARSER_CDATA_SECTION: {
8120 /*
8121 * The Push mode need to have the SAX callback for
8122 * cdataBlock merge back contiguous callbacks.
8123 */
8124 int base;
8125
8126 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8127 if (base < 0) {
8128 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8129 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8130 if (ctxt->sax->cdataBlock != NULL)
8131 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8132 XML_PARSER_BIG_BUFFER_SIZE);
8133 }
8134 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8135 ctxt->checkIndex = 0;
8136 }
8137 goto done;
8138 } else {
8139 if ((ctxt->sax != NULL) && (base > 0) &&
8140 (!ctxt->disableSAX)) {
8141 if (ctxt->sax->cdataBlock != NULL)
8142 ctxt->sax->cdataBlock(ctxt->userData,
8143 ctxt->input->cur, base);
8144 }
8145 SKIP(base + 3);
8146 ctxt->checkIndex = 0;
8147 ctxt->instate = XML_PARSER_CONTENT;
8148#ifdef DEBUG_PUSH
8149 xmlGenericError(xmlGenericErrorContext,
8150 "PP: entering CONTENT\n");
8151#endif
8152 }
8153 break;
8154 }
8155 case XML_PARSER_END_TAG:
8156 if (avail < 2)
8157 goto done;
8158 if ((!terminate) &&
8159 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8160 goto done;
8161 xmlParseEndTag(ctxt);
8162 if (ctxt->name == NULL) {
8163 ctxt->instate = XML_PARSER_EPILOG;
8164#ifdef DEBUG_PUSH
8165 xmlGenericError(xmlGenericErrorContext,
8166 "PP: entering EPILOG\n");
8167#endif
8168 } else {
8169 ctxt->instate = XML_PARSER_CONTENT;
8170#ifdef DEBUG_PUSH
8171 xmlGenericError(xmlGenericErrorContext,
8172 "PP: entering CONTENT\n");
8173#endif
8174 }
8175 break;
8176 case XML_PARSER_DTD: {
8177 /*
8178 * Sorry but progressive parsing of the internal subset
8179 * is not expected to be supported. We first check that
8180 * the full content of the internal subset is available and
8181 * the parsing is launched only at that point.
8182 * Internal subset ends up with "']' S? '>'" in an unescaped
8183 * section and not in a ']]>' sequence which are conditional
8184 * sections (whoever argued to keep that crap in XML deserve
8185 * a place in hell !).
8186 */
8187 int base, i;
8188 xmlChar *buf;
8189 xmlChar quote = 0;
8190
8191 base = ctxt->input->cur - ctxt->input->base;
8192 if (base < 0) return(0);
8193 if (ctxt->checkIndex > base)
8194 base = ctxt->checkIndex;
8195 buf = ctxt->input->buf->buffer->content;
8196 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8197 base++) {
8198 if (quote != 0) {
8199 if (buf[base] == quote)
8200 quote = 0;
8201 continue;
8202 }
8203 if (buf[base] == '"') {
8204 quote = '"';
8205 continue;
8206 }
8207 if (buf[base] == '\'') {
8208 quote = '\'';
8209 continue;
8210 }
8211 if (buf[base] == ']') {
8212 if ((unsigned int) base +1 >=
8213 ctxt->input->buf->buffer->use)
8214 break;
8215 if (buf[base + 1] == ']') {
8216 /* conditional crap, skip both ']' ! */
8217 base++;
8218 continue;
8219 }
8220 for (i = 0;
8221 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8222 i++) {
8223 if (buf[base + i] == '>')
8224 goto found_end_int_subset;
8225 }
8226 break;
8227 }
8228 }
8229 /*
8230 * We didn't found the end of the Internal subset
8231 */
8232 if (quote == 0)
8233 ctxt->checkIndex = base;
8234#ifdef DEBUG_PUSH
8235 if (next == 0)
8236 xmlGenericError(xmlGenericErrorContext,
8237 "PP: lookup of int subset end filed\n");
8238#endif
8239 goto done;
8240
8241found_end_int_subset:
8242 xmlParseInternalSubset(ctxt);
8243 ctxt->inSubset = 2;
8244 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8245 (ctxt->sax->externalSubset != NULL))
8246 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8247 ctxt->extSubSystem, ctxt->extSubURI);
8248 ctxt->inSubset = 0;
8249 ctxt->instate = XML_PARSER_PROLOG;
8250 ctxt->checkIndex = 0;
8251#ifdef DEBUG_PUSH
8252 xmlGenericError(xmlGenericErrorContext,
8253 "PP: entering PROLOG\n");
8254#endif
8255 break;
8256 }
8257 case XML_PARSER_COMMENT:
8258 xmlGenericError(xmlGenericErrorContext,
8259 "PP: internal error, state == COMMENT\n");
8260 ctxt->instate = XML_PARSER_CONTENT;
8261#ifdef DEBUG_PUSH
8262 xmlGenericError(xmlGenericErrorContext,
8263 "PP: entering CONTENT\n");
8264#endif
8265 break;
8266 case XML_PARSER_PI:
8267 xmlGenericError(xmlGenericErrorContext,
8268 "PP: internal error, state == PI\n");
8269 ctxt->instate = XML_PARSER_CONTENT;
8270#ifdef DEBUG_PUSH
8271 xmlGenericError(xmlGenericErrorContext,
8272 "PP: entering CONTENT\n");
8273#endif
8274 break;
8275 case XML_PARSER_ENTITY_DECL:
8276 xmlGenericError(xmlGenericErrorContext,
8277 "PP: internal error, state == ENTITY_DECL\n");
8278 ctxt->instate = XML_PARSER_DTD;
8279#ifdef DEBUG_PUSH
8280 xmlGenericError(xmlGenericErrorContext,
8281 "PP: entering DTD\n");
8282#endif
8283 break;
8284 case XML_PARSER_ENTITY_VALUE:
8285 xmlGenericError(xmlGenericErrorContext,
8286 "PP: internal error, state == ENTITY_VALUE\n");
8287 ctxt->instate = XML_PARSER_CONTENT;
8288#ifdef DEBUG_PUSH
8289 xmlGenericError(xmlGenericErrorContext,
8290 "PP: entering DTD\n");
8291#endif
8292 break;
8293 case XML_PARSER_ATTRIBUTE_VALUE:
8294 xmlGenericError(xmlGenericErrorContext,
8295 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8296 ctxt->instate = XML_PARSER_START_TAG;
8297#ifdef DEBUG_PUSH
8298 xmlGenericError(xmlGenericErrorContext,
8299 "PP: entering START_TAG\n");
8300#endif
8301 break;
8302 case XML_PARSER_SYSTEM_LITERAL:
8303 xmlGenericError(xmlGenericErrorContext,
8304 "PP: internal error, state == SYSTEM_LITERAL\n");
8305 ctxt->instate = XML_PARSER_START_TAG;
8306#ifdef DEBUG_PUSH
8307 xmlGenericError(xmlGenericErrorContext,
8308 "PP: entering START_TAG\n");
8309#endif
8310 break;
8311 }
8312 }
8313done:
8314#ifdef DEBUG_PUSH
8315 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8316#endif
8317 return(ret);
8318}
8319
8320/**
Owen Taylor3473f882001-02-23 17:55:21 +00008321 * xmlParseChunk:
8322 * @ctxt: an XML parser context
8323 * @chunk: an char array
8324 * @size: the size in byte of the chunk
8325 * @terminate: last chunk indicator
8326 *
8327 * Parse a Chunk of memory
8328 *
8329 * Returns zero if no error, the xmlParserErrors otherwise.
8330 */
8331int
8332xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8333 int terminate) {
8334 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8335 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8336 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8337 int cur = ctxt->input->cur - ctxt->input->base;
8338
8339 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8340 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8341 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008342 ctxt->input->end =
8343 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008344#ifdef DEBUG_PUSH
8345 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8346#endif
8347
8348 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8349 xmlParseTryOrFinish(ctxt, terminate);
8350 } else if (ctxt->instate != XML_PARSER_EOF) {
8351 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8352 xmlParserInputBufferPtr in = ctxt->input->buf;
8353 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8354 (in->raw != NULL)) {
8355 int nbchars;
8356
8357 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8358 if (nbchars < 0) {
8359 xmlGenericError(xmlGenericErrorContext,
8360 "xmlParseChunk: encoder error\n");
8361 return(XML_ERR_INVALID_ENCODING);
8362 }
8363 }
8364 }
8365 }
8366 xmlParseTryOrFinish(ctxt, terminate);
8367 if (terminate) {
8368 /*
8369 * Check for termination
8370 */
8371 if ((ctxt->instate != XML_PARSER_EOF) &&
8372 (ctxt->instate != XML_PARSER_EPILOG)) {
8373 ctxt->errNo = XML_ERR_DOCUMENT_END;
8374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8375 ctxt->sax->error(ctxt->userData,
8376 "Extra content at the end of the document\n");
8377 ctxt->wellFormed = 0;
8378 ctxt->disableSAX = 1;
8379 }
8380 if (ctxt->instate != XML_PARSER_EOF) {
8381 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8382 (!ctxt->disableSAX))
8383 ctxt->sax->endDocument(ctxt->userData);
8384 }
8385 ctxt->instate = XML_PARSER_EOF;
8386 }
8387 return((xmlParserErrors) ctxt->errNo);
8388}
8389
8390/************************************************************************
8391 * *
8392 * I/O front end functions to the parser *
8393 * *
8394 ************************************************************************/
8395
8396/**
8397 * xmlStopParser:
8398 * @ctxt: an XML parser context
8399 *
8400 * Blocks further parser processing
8401 */
8402void
8403xmlStopParser(xmlParserCtxtPtr ctxt) {
8404 ctxt->instate = XML_PARSER_EOF;
8405 if (ctxt->input != NULL)
8406 ctxt->input->cur = BAD_CAST"";
8407}
8408
8409/**
8410 * xmlCreatePushParserCtxt:
8411 * @sax: a SAX handler
8412 * @user_data: The user data returned on SAX callbacks
8413 * @chunk: a pointer to an array of chars
8414 * @size: number of chars in the array
8415 * @filename: an optional file name or URI
8416 *
8417 * Create a parser context for using the XML parser in push mode
8418 * To allow content encoding detection, @size should be >= 4
8419 * The value of @filename is used for fetching external entities
8420 * and error/warning reports.
8421 *
8422 * Returns the new parser context or NULL
8423 */
8424xmlParserCtxtPtr
8425xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8426 const char *chunk, int size, const char *filename) {
8427 xmlParserCtxtPtr ctxt;
8428 xmlParserInputPtr inputStream;
8429 xmlParserInputBufferPtr buf;
8430 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8431
8432 /*
8433 * plug some encoding conversion routines
8434 */
8435 if ((chunk != NULL) && (size >= 4))
8436 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8437
8438 buf = xmlAllocParserInputBuffer(enc);
8439 if (buf == NULL) return(NULL);
8440
8441 ctxt = xmlNewParserCtxt();
8442 if (ctxt == NULL) {
8443 xmlFree(buf);
8444 return(NULL);
8445 }
8446 if (sax != NULL) {
8447 if (ctxt->sax != &xmlDefaultSAXHandler)
8448 xmlFree(ctxt->sax);
8449 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8450 if (ctxt->sax == NULL) {
8451 xmlFree(buf);
8452 xmlFree(ctxt);
8453 return(NULL);
8454 }
8455 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8456 if (user_data != NULL)
8457 ctxt->userData = user_data;
8458 }
8459 if (filename == NULL) {
8460 ctxt->directory = NULL;
8461 } else {
8462 ctxt->directory = xmlParserGetDirectory(filename);
8463 }
8464
8465 inputStream = xmlNewInputStream(ctxt);
8466 if (inputStream == NULL) {
8467 xmlFreeParserCtxt(ctxt);
8468 return(NULL);
8469 }
8470
8471 if (filename == NULL)
8472 inputStream->filename = NULL;
8473 else
8474 inputStream->filename = xmlMemStrdup(filename);
8475 inputStream->buf = buf;
8476 inputStream->base = inputStream->buf->buffer->content;
8477 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008478 inputStream->end =
8479 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008480 if (enc != XML_CHAR_ENCODING_NONE) {
8481 xmlSwitchEncoding(ctxt, enc);
8482 }
8483
8484 inputPush(ctxt, inputStream);
8485
8486 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8487 (ctxt->input->buf != NULL)) {
8488 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8489#ifdef DEBUG_PUSH
8490 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8491#endif
8492 }
8493
8494 return(ctxt);
8495}
8496
8497/**
8498 * xmlCreateIOParserCtxt:
8499 * @sax: a SAX handler
8500 * @user_data: The user data returned on SAX callbacks
8501 * @ioread: an I/O read function
8502 * @ioclose: an I/O close function
8503 * @ioctx: an I/O handler
8504 * @enc: the charset encoding if known
8505 *
8506 * Create a parser context for using the XML parser with an existing
8507 * I/O stream
8508 *
8509 * Returns the new parser context or NULL
8510 */
8511xmlParserCtxtPtr
8512xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8513 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8514 void *ioctx, xmlCharEncoding enc) {
8515 xmlParserCtxtPtr ctxt;
8516 xmlParserInputPtr inputStream;
8517 xmlParserInputBufferPtr buf;
8518
8519 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8520 if (buf == NULL) return(NULL);
8521
8522 ctxt = xmlNewParserCtxt();
8523 if (ctxt == NULL) {
8524 xmlFree(buf);
8525 return(NULL);
8526 }
8527 if (sax != NULL) {
8528 if (ctxt->sax != &xmlDefaultSAXHandler)
8529 xmlFree(ctxt->sax);
8530 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8531 if (ctxt->sax == NULL) {
8532 xmlFree(buf);
8533 xmlFree(ctxt);
8534 return(NULL);
8535 }
8536 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8537 if (user_data != NULL)
8538 ctxt->userData = user_data;
8539 }
8540
8541 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8542 if (inputStream == NULL) {
8543 xmlFreeParserCtxt(ctxt);
8544 return(NULL);
8545 }
8546 inputPush(ctxt, inputStream);
8547
8548 return(ctxt);
8549}
8550
8551/************************************************************************
8552 * *
8553 * Front ends when parsing a Dtd *
8554 * *
8555 ************************************************************************/
8556
8557/**
8558 * xmlIOParseDTD:
8559 * @sax: the SAX handler block or NULL
8560 * @input: an Input Buffer
8561 * @enc: the charset encoding if known
8562 *
8563 * Load and parse a DTD
8564 *
8565 * Returns the resulting xmlDtdPtr or NULL in case of error.
8566 * @input will be freed at parsing end.
8567 */
8568
8569xmlDtdPtr
8570xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8571 xmlCharEncoding enc) {
8572 xmlDtdPtr ret = NULL;
8573 xmlParserCtxtPtr ctxt;
8574 xmlParserInputPtr pinput = NULL;
8575
8576 if (input == NULL)
8577 return(NULL);
8578
8579 ctxt = xmlNewParserCtxt();
8580 if (ctxt == NULL) {
8581 return(NULL);
8582 }
8583
8584 /*
8585 * Set-up the SAX context
8586 */
8587 if (sax != NULL) {
8588 if (ctxt->sax != NULL)
8589 xmlFree(ctxt->sax);
8590 ctxt->sax = sax;
8591 ctxt->userData = NULL;
8592 }
8593
8594 /*
8595 * generate a parser input from the I/O handler
8596 */
8597
8598 pinput = xmlNewIOInputStream(ctxt, input, enc);
8599 if (pinput == NULL) {
8600 if (sax != NULL) ctxt->sax = NULL;
8601 xmlFreeParserCtxt(ctxt);
8602 return(NULL);
8603 }
8604
8605 /*
8606 * plug some encoding conversion routines here.
8607 */
8608 xmlPushInput(ctxt, pinput);
8609
8610 pinput->filename = NULL;
8611 pinput->line = 1;
8612 pinput->col = 1;
8613 pinput->base = ctxt->input->cur;
8614 pinput->cur = ctxt->input->cur;
8615 pinput->free = NULL;
8616
8617 /*
8618 * let's parse that entity knowing it's an external subset.
8619 */
8620 ctxt->inSubset = 2;
8621 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8622 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8623 BAD_CAST "none", BAD_CAST "none");
8624 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8625
8626 if (ctxt->myDoc != NULL) {
8627 if (ctxt->wellFormed) {
8628 ret = ctxt->myDoc->extSubset;
8629 ctxt->myDoc->extSubset = NULL;
8630 } else {
8631 ret = NULL;
8632 }
8633 xmlFreeDoc(ctxt->myDoc);
8634 ctxt->myDoc = NULL;
8635 }
8636 if (sax != NULL) ctxt->sax = NULL;
8637 xmlFreeParserCtxt(ctxt);
8638
8639 return(ret);
8640}
8641
8642/**
8643 * xmlSAXParseDTD:
8644 * @sax: the SAX handler block
8645 * @ExternalID: a NAME* containing the External ID of the DTD
8646 * @SystemID: a NAME* containing the URL to the DTD
8647 *
8648 * Load and parse an external subset.
8649 *
8650 * Returns the resulting xmlDtdPtr or NULL in case of error.
8651 */
8652
8653xmlDtdPtr
8654xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8655 const xmlChar *SystemID) {
8656 xmlDtdPtr ret = NULL;
8657 xmlParserCtxtPtr ctxt;
8658 xmlParserInputPtr input = NULL;
8659 xmlCharEncoding enc;
8660
8661 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8662
8663 ctxt = xmlNewParserCtxt();
8664 if (ctxt == NULL) {
8665 return(NULL);
8666 }
8667
8668 /*
8669 * Set-up the SAX context
8670 */
8671 if (sax != NULL) {
8672 if (ctxt->sax != NULL)
8673 xmlFree(ctxt->sax);
8674 ctxt->sax = sax;
8675 ctxt->userData = NULL;
8676 }
8677
8678 /*
8679 * Ask the Entity resolver to load the damn thing
8680 */
8681
8682 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8683 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8684 if (input == NULL) {
8685 if (sax != NULL) ctxt->sax = NULL;
8686 xmlFreeParserCtxt(ctxt);
8687 return(NULL);
8688 }
8689
8690 /*
8691 * plug some encoding conversion routines here.
8692 */
8693 xmlPushInput(ctxt, input);
8694 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8695 xmlSwitchEncoding(ctxt, enc);
8696
8697 if (input->filename == NULL)
8698 input->filename = (char *) xmlStrdup(SystemID);
8699 input->line = 1;
8700 input->col = 1;
8701 input->base = ctxt->input->cur;
8702 input->cur = ctxt->input->cur;
8703 input->free = NULL;
8704
8705 /*
8706 * let's parse that entity knowing it's an external subset.
8707 */
8708 ctxt->inSubset = 2;
8709 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8710 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8711 ExternalID, SystemID);
8712 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8713
8714 if (ctxt->myDoc != NULL) {
8715 if (ctxt->wellFormed) {
8716 ret = ctxt->myDoc->extSubset;
8717 ctxt->myDoc->extSubset = NULL;
8718 } else {
8719 ret = NULL;
8720 }
8721 xmlFreeDoc(ctxt->myDoc);
8722 ctxt->myDoc = NULL;
8723 }
8724 if (sax != NULL) ctxt->sax = NULL;
8725 xmlFreeParserCtxt(ctxt);
8726
8727 return(ret);
8728}
8729
8730/**
8731 * xmlParseDTD:
8732 * @ExternalID: a NAME* containing the External ID of the DTD
8733 * @SystemID: a NAME* containing the URL to the DTD
8734 *
8735 * Load and parse an external subset.
8736 *
8737 * Returns the resulting xmlDtdPtr or NULL in case of error.
8738 */
8739
8740xmlDtdPtr
8741xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8742 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8743}
8744
8745/************************************************************************
8746 * *
8747 * Front ends when parsing an Entity *
8748 * *
8749 ************************************************************************/
8750
8751/**
Owen Taylor3473f882001-02-23 17:55:21 +00008752 * xmlParseCtxtExternalEntity:
8753 * @ctx: the existing parsing context
8754 * @URL: the URL for the entity to load
8755 * @ID: the System ID for the entity to load
8756 * @list: the return value for the set of parsed nodes
8757 *
8758 * Parse an external general entity within an existing parsing context
8759 * An external general parsed entity is well-formed if it matches the
8760 * production labeled extParsedEnt.
8761 *
8762 * [78] extParsedEnt ::= TextDecl? content
8763 *
8764 * Returns 0 if the entity is well formed, -1 in case of args problem and
8765 * the parser error code otherwise
8766 */
8767
8768int
8769xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8770 const xmlChar *ID, xmlNodePtr *list) {
8771 xmlParserCtxtPtr ctxt;
8772 xmlDocPtr newDoc;
8773 xmlSAXHandlerPtr oldsax = NULL;
8774 int ret = 0;
8775
8776 if (ctx->depth > 40) {
8777 return(XML_ERR_ENTITY_LOOP);
8778 }
8779
8780 if (list != NULL)
8781 *list = NULL;
8782 if ((URL == NULL) && (ID == NULL))
8783 return(-1);
8784 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8785 return(-1);
8786
8787
8788 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8789 if (ctxt == NULL) return(-1);
8790 ctxt->userData = ctxt;
8791 oldsax = ctxt->sax;
8792 ctxt->sax = ctx->sax;
8793 newDoc = xmlNewDoc(BAD_CAST "1.0");
8794 if (newDoc == NULL) {
8795 xmlFreeParserCtxt(ctxt);
8796 return(-1);
8797 }
8798 if (ctx->myDoc != NULL) {
8799 newDoc->intSubset = ctx->myDoc->intSubset;
8800 newDoc->extSubset = ctx->myDoc->extSubset;
8801 }
8802 if (ctx->myDoc->URL != NULL) {
8803 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8804 }
8805 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8806 if (newDoc->children == NULL) {
8807 ctxt->sax = oldsax;
8808 xmlFreeParserCtxt(ctxt);
8809 newDoc->intSubset = NULL;
8810 newDoc->extSubset = NULL;
8811 xmlFreeDoc(newDoc);
8812 return(-1);
8813 }
8814 nodePush(ctxt, newDoc->children);
8815 if (ctx->myDoc == NULL) {
8816 ctxt->myDoc = newDoc;
8817 } else {
8818 ctxt->myDoc = ctx->myDoc;
8819 newDoc->children->doc = ctx->myDoc;
8820 }
8821
8822 /*
8823 * Parse a possible text declaration first
8824 */
8825 GROW;
8826 if ((RAW == '<') && (NXT(1) == '?') &&
8827 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8828 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8829 xmlParseTextDecl(ctxt);
8830 }
8831
8832 /*
8833 * Doing validity checking on chunk doesn't make sense
8834 */
8835 ctxt->instate = XML_PARSER_CONTENT;
8836 ctxt->validate = ctx->validate;
8837 ctxt->loadsubset = ctx->loadsubset;
8838 ctxt->depth = ctx->depth + 1;
8839 ctxt->replaceEntities = ctx->replaceEntities;
8840 if (ctxt->validate) {
8841 ctxt->vctxt.error = ctx->vctxt.error;
8842 ctxt->vctxt.warning = ctx->vctxt.warning;
8843 /* Allocate the Node stack */
8844 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8845 if (ctxt->vctxt.nodeTab == NULL) {
8846 xmlGenericError(xmlGenericErrorContext,
8847 "xmlParseCtxtExternalEntity: out of memory\n");
8848 ctxt->validate = 0;
8849 ctxt->vctxt.error = NULL;
8850 ctxt->vctxt.warning = NULL;
8851 } else {
8852 ctxt->vctxt.nodeNr = 0;
8853 ctxt->vctxt.nodeMax = 4;
8854 ctxt->vctxt.node = NULL;
8855 }
8856 } else {
8857 ctxt->vctxt.error = NULL;
8858 ctxt->vctxt.warning = NULL;
8859 }
8860
8861 xmlParseContent(ctxt);
8862
8863 if ((RAW == '<') && (NXT(1) == '/')) {
8864 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8866 ctxt->sax->error(ctxt->userData,
8867 "chunk is not well balanced\n");
8868 ctxt->wellFormed = 0;
8869 ctxt->disableSAX = 1;
8870 } else if (RAW != 0) {
8871 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8873 ctxt->sax->error(ctxt->userData,
8874 "extra content at the end of well balanced chunk\n");
8875 ctxt->wellFormed = 0;
8876 ctxt->disableSAX = 1;
8877 }
8878 if (ctxt->node != newDoc->children) {
8879 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8881 ctxt->sax->error(ctxt->userData,
8882 "chunk is not well balanced\n");
8883 ctxt->wellFormed = 0;
8884 ctxt->disableSAX = 1;
8885 }
8886
8887 if (!ctxt->wellFormed) {
8888 if (ctxt->errNo == 0)
8889 ret = 1;
8890 else
8891 ret = ctxt->errNo;
8892 } else {
8893 if (list != NULL) {
8894 xmlNodePtr cur;
8895
8896 /*
8897 * Return the newly created nodeset after unlinking it from
8898 * they pseudo parent.
8899 */
8900 cur = newDoc->children->children;
8901 *list = cur;
8902 while (cur != NULL) {
8903 cur->parent = NULL;
8904 cur = cur->next;
8905 }
8906 newDoc->children->children = NULL;
8907 }
8908 ret = 0;
8909 }
8910 ctxt->sax = oldsax;
8911 xmlFreeParserCtxt(ctxt);
8912 newDoc->intSubset = NULL;
8913 newDoc->extSubset = NULL;
8914 xmlFreeDoc(newDoc);
8915
8916 return(ret);
8917}
8918
8919/**
Daniel Veillard257d9102001-05-08 10:41:44 +00008920 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00008921 * @doc: the document the chunk pertains to
8922 * @sax: the SAX handler bloc (possibly NULL)
8923 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8924 * @depth: Used for loop detection, use 0
8925 * @URL: the URL for the entity to load
8926 * @ID: the System ID for the entity to load
8927 * @list: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00008928 * @private: extra field for the _private parser context
Owen Taylor3473f882001-02-23 17:55:21 +00008929 *
Daniel Veillard257d9102001-05-08 10:41:44 +00008930 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00008931 *
8932 * Returns 0 if the entity is well formed, -1 in case of args problem and
8933 * the parser error code otherwise
8934 */
8935
Daniel Veillard257d9102001-05-08 10:41:44 +00008936static int
8937xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlSAXHandlerPtr sax,
8938 void *user_data, int depth, const xmlChar *URL,
8939 const xmlChar *ID, xmlNodePtr *list, void *private) {
Owen Taylor3473f882001-02-23 17:55:21 +00008940 xmlParserCtxtPtr ctxt;
8941 xmlDocPtr newDoc;
8942 xmlSAXHandlerPtr oldsax = NULL;
8943 int ret = 0;
8944
8945 if (depth > 40) {
8946 return(XML_ERR_ENTITY_LOOP);
8947 }
8948
8949
8950
8951 if (list != NULL)
8952 *list = NULL;
8953 if ((URL == NULL) && (ID == NULL))
8954 return(-1);
8955 if (doc == NULL) /* @@ relax but check for dereferences */
8956 return(-1);
8957
8958
8959 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8960 if (ctxt == NULL) return(-1);
8961 ctxt->userData = ctxt;
Daniel Veillard257d9102001-05-08 10:41:44 +00008962 ctxt->_private = private;
Owen Taylor3473f882001-02-23 17:55:21 +00008963 if (sax != NULL) {
8964 oldsax = ctxt->sax;
8965 ctxt->sax = sax;
8966 if (user_data != NULL)
8967 ctxt->userData = user_data;
8968 }
8969 newDoc = xmlNewDoc(BAD_CAST "1.0");
8970 if (newDoc == NULL) {
8971 xmlFreeParserCtxt(ctxt);
8972 return(-1);
8973 }
8974 if (doc != NULL) {
8975 newDoc->intSubset = doc->intSubset;
8976 newDoc->extSubset = doc->extSubset;
8977 }
8978 if (doc->URL != NULL) {
8979 newDoc->URL = xmlStrdup(doc->URL);
8980 }
8981 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8982 if (newDoc->children == NULL) {
8983 if (sax != NULL)
8984 ctxt->sax = oldsax;
8985 xmlFreeParserCtxt(ctxt);
8986 newDoc->intSubset = NULL;
8987 newDoc->extSubset = NULL;
8988 xmlFreeDoc(newDoc);
8989 return(-1);
8990 }
8991 nodePush(ctxt, newDoc->children);
8992 if (doc == NULL) {
8993 ctxt->myDoc = newDoc;
8994 } else {
8995 ctxt->myDoc = doc;
8996 newDoc->children->doc = doc;
8997 }
8998
8999 /*
9000 * Parse a possible text declaration first
9001 */
9002 GROW;
9003 if ((RAW == '<') && (NXT(1) == '?') &&
9004 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9005 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9006 xmlParseTextDecl(ctxt);
9007 }
9008
9009 /*
9010 * Doing validity checking on chunk doesn't make sense
9011 */
9012 ctxt->instate = XML_PARSER_CONTENT;
9013 ctxt->validate = 0;
Daniel Veillarde470df72001-04-18 21:41:07 +00009014 ctxt->external = 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009015 ctxt->loadsubset = 0;
9016 ctxt->depth = depth;
9017
9018 xmlParseContent(ctxt);
9019
9020 if ((RAW == '<') && (NXT(1) == '/')) {
9021 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9023 ctxt->sax->error(ctxt->userData,
9024 "chunk is not well balanced\n");
9025 ctxt->wellFormed = 0;
9026 ctxt->disableSAX = 1;
9027 } else if (RAW != 0) {
9028 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9030 ctxt->sax->error(ctxt->userData,
9031 "extra content at the end of well balanced chunk\n");
9032 ctxt->wellFormed = 0;
9033 ctxt->disableSAX = 1;
9034 }
9035 if (ctxt->node != newDoc->children) {
9036 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9038 ctxt->sax->error(ctxt->userData,
9039 "chunk is not well balanced\n");
9040 ctxt->wellFormed = 0;
9041 ctxt->disableSAX = 1;
9042 }
9043
9044 if (!ctxt->wellFormed) {
9045 if (ctxt->errNo == 0)
9046 ret = 1;
9047 else
9048 ret = ctxt->errNo;
9049 } else {
9050 if (list != NULL) {
9051 xmlNodePtr cur;
9052
9053 /*
9054 * Return the newly created nodeset after unlinking it from
9055 * they pseudo parent.
9056 */
9057 cur = newDoc->children->children;
9058 *list = cur;
9059 while (cur != NULL) {
9060 cur->parent = NULL;
9061 cur = cur->next;
9062 }
9063 newDoc->children->children = NULL;
9064 }
9065 ret = 0;
9066 }
9067 if (sax != NULL)
9068 ctxt->sax = oldsax;
9069 xmlFreeParserCtxt(ctxt);
9070 newDoc->intSubset = NULL;
9071 newDoc->extSubset = NULL;
9072 xmlFreeDoc(newDoc);
9073
9074 return(ret);
9075}
9076
9077/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009078 * xmlParseExternalEntity:
9079 * @doc: the document the chunk pertains to
9080 * @sax: the SAX handler bloc (possibly NULL)
9081 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9082 * @depth: Used for loop detection, use 0
9083 * @URL: the URL for the entity to load
9084 * @ID: the System ID for the entity to load
9085 * @list: the return value for the set of parsed nodes
9086 *
9087 * Parse an external general entity
9088 * An external general parsed entity is well-formed if it matches the
9089 * production labeled extParsedEnt.
9090 *
9091 * [78] extParsedEnt ::= TextDecl? content
9092 *
9093 * Returns 0 if the entity is well formed, -1 in case of args problem and
9094 * the parser error code otherwise
9095 */
9096
9097int
9098xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
9099 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
9100 return(xmlParseExternalEntityPrivate(doc, sax, user_data, depth, URL,
9101 ID, list, NULL));
9102}
9103
9104/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009105 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009106 * @doc: the document the chunk pertains to
9107 * @sax: the SAX handler bloc (possibly NULL)
9108 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9109 * @depth: Used for loop detection, use 0
9110 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9111 * @list: the return value for the set of parsed nodes
9112 *
9113 * Parse a well-balanced chunk of an XML document
9114 * called by the parser
9115 * The allowed sequence for the Well Balanced Chunk is the one defined by
9116 * the content production in the XML grammar:
9117 *
9118 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9119 *
9120 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9121 * the parser error code otherwise
9122 */
9123
9124int
9125xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9126 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9127 xmlParserCtxtPtr ctxt;
9128 xmlDocPtr newDoc;
9129 xmlSAXHandlerPtr oldsax = NULL;
9130 int size;
9131 int ret = 0;
9132
9133 if (depth > 40) {
9134 return(XML_ERR_ENTITY_LOOP);
9135 }
9136
9137
9138 if (list != NULL)
9139 *list = NULL;
9140 if (string == NULL)
9141 return(-1);
9142
9143 size = xmlStrlen(string);
9144
9145 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9146 if (ctxt == NULL) return(-1);
9147 ctxt->userData = ctxt;
9148 if (sax != NULL) {
9149 oldsax = ctxt->sax;
9150 ctxt->sax = sax;
9151 if (user_data != NULL)
9152 ctxt->userData = user_data;
9153 }
9154 newDoc = xmlNewDoc(BAD_CAST "1.0");
9155 if (newDoc == NULL) {
9156 xmlFreeParserCtxt(ctxt);
9157 return(-1);
9158 }
9159 if (doc != NULL) {
9160 newDoc->intSubset = doc->intSubset;
9161 newDoc->extSubset = doc->extSubset;
9162 }
9163 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9164 if (newDoc->children == NULL) {
9165 if (sax != NULL)
9166 ctxt->sax = oldsax;
9167 xmlFreeParserCtxt(ctxt);
9168 newDoc->intSubset = NULL;
9169 newDoc->extSubset = NULL;
9170 xmlFreeDoc(newDoc);
9171 return(-1);
9172 }
9173 nodePush(ctxt, newDoc->children);
9174 if (doc == NULL) {
9175 ctxt->myDoc = newDoc;
9176 } else {
9177 ctxt->myDoc = doc;
9178 newDoc->children->doc = doc;
9179 }
9180 ctxt->instate = XML_PARSER_CONTENT;
9181 ctxt->depth = depth;
9182
9183 /*
9184 * Doing validity checking on chunk doesn't make sense
9185 */
9186 ctxt->validate = 0;
9187 ctxt->loadsubset = 0;
9188
9189 xmlParseContent(ctxt);
9190
9191 if ((RAW == '<') && (NXT(1) == '/')) {
9192 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9194 ctxt->sax->error(ctxt->userData,
9195 "chunk is not well balanced\n");
9196 ctxt->wellFormed = 0;
9197 ctxt->disableSAX = 1;
9198 } else if (RAW != 0) {
9199 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9201 ctxt->sax->error(ctxt->userData,
9202 "extra content at the end of well balanced chunk\n");
9203 ctxt->wellFormed = 0;
9204 ctxt->disableSAX = 1;
9205 }
9206 if (ctxt->node != newDoc->children) {
9207 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9208 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9209 ctxt->sax->error(ctxt->userData,
9210 "chunk is not well balanced\n");
9211 ctxt->wellFormed = 0;
9212 ctxt->disableSAX = 1;
9213 }
9214
9215 if (!ctxt->wellFormed) {
9216 if (ctxt->errNo == 0)
9217 ret = 1;
9218 else
9219 ret = ctxt->errNo;
9220 } else {
9221 if (list != NULL) {
9222 xmlNodePtr cur;
9223
9224 /*
9225 * Return the newly created nodeset after unlinking it from
9226 * they pseudo parent.
9227 */
9228 cur = newDoc->children->children;
9229 *list = cur;
9230 while (cur != NULL) {
9231 cur->parent = NULL;
9232 cur = cur->next;
9233 }
9234 newDoc->children->children = NULL;
9235 }
9236 ret = 0;
9237 }
9238 if (sax != NULL)
9239 ctxt->sax = oldsax;
9240 xmlFreeParserCtxt(ctxt);
9241 newDoc->intSubset = NULL;
9242 newDoc->extSubset = NULL;
9243 xmlFreeDoc(newDoc);
9244
9245 return(ret);
9246}
9247
9248/**
9249 * xmlSAXParseEntity:
9250 * @sax: the SAX handler block
9251 * @filename: the filename
9252 *
9253 * parse an XML external entity out of context and build a tree.
9254 * It use the given SAX function block to handle the parsing callback.
9255 * If sax is NULL, fallback to the default DOM tree building routines.
9256 *
9257 * [78] extParsedEnt ::= TextDecl? content
9258 *
9259 * This correspond to a "Well Balanced" chunk
9260 *
9261 * Returns the resulting document tree
9262 */
9263
9264xmlDocPtr
9265xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9266 xmlDocPtr ret;
9267 xmlParserCtxtPtr ctxt;
9268 char *directory = NULL;
9269
9270 ctxt = xmlCreateFileParserCtxt(filename);
9271 if (ctxt == NULL) {
9272 return(NULL);
9273 }
9274 if (sax != NULL) {
9275 if (ctxt->sax != NULL)
9276 xmlFree(ctxt->sax);
9277 ctxt->sax = sax;
9278 ctxt->userData = NULL;
9279 }
9280
9281 if ((ctxt->directory == NULL) && (directory == NULL))
9282 directory = xmlParserGetDirectory(filename);
9283
9284 xmlParseExtParsedEnt(ctxt);
9285
9286 if (ctxt->wellFormed)
9287 ret = ctxt->myDoc;
9288 else {
9289 ret = NULL;
9290 xmlFreeDoc(ctxt->myDoc);
9291 ctxt->myDoc = NULL;
9292 }
9293 if (sax != NULL)
9294 ctxt->sax = NULL;
9295 xmlFreeParserCtxt(ctxt);
9296
9297 return(ret);
9298}
9299
9300/**
9301 * xmlParseEntity:
9302 * @filename: the filename
9303 *
9304 * parse an XML external entity out of context and build a tree.
9305 *
9306 * [78] extParsedEnt ::= TextDecl? content
9307 *
9308 * This correspond to a "Well Balanced" chunk
9309 *
9310 * Returns the resulting document tree
9311 */
9312
9313xmlDocPtr
9314xmlParseEntity(const char *filename) {
9315 return(xmlSAXParseEntity(NULL, filename));
9316}
9317
9318/**
9319 * xmlCreateEntityParserCtxt:
9320 * @URL: the entity URL
9321 * @ID: the entity PUBLIC ID
9322 * @base: a posible base for the target URI
9323 *
9324 * Create a parser context for an external entity
9325 * Automatic support for ZLIB/Compress compressed document is provided
9326 * by default if found at compile-time.
9327 *
9328 * Returns the new parser context or NULL
9329 */
9330xmlParserCtxtPtr
9331xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9332 const xmlChar *base) {
9333 xmlParserCtxtPtr ctxt;
9334 xmlParserInputPtr inputStream;
9335 char *directory = NULL;
9336 xmlChar *uri;
9337
9338 ctxt = xmlNewParserCtxt();
9339 if (ctxt == NULL) {
9340 return(NULL);
9341 }
9342
9343 uri = xmlBuildURI(URL, base);
9344
9345 if (uri == NULL) {
9346 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9347 if (inputStream == NULL) {
9348 xmlFreeParserCtxt(ctxt);
9349 return(NULL);
9350 }
9351
9352 inputPush(ctxt, inputStream);
9353
9354 if ((ctxt->directory == NULL) && (directory == NULL))
9355 directory = xmlParserGetDirectory((char *)URL);
9356 if ((ctxt->directory == NULL) && (directory != NULL))
9357 ctxt->directory = directory;
9358 } else {
9359 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9360 if (inputStream == NULL) {
9361 xmlFree(uri);
9362 xmlFreeParserCtxt(ctxt);
9363 return(NULL);
9364 }
9365
9366 inputPush(ctxt, inputStream);
9367
9368 if ((ctxt->directory == NULL) && (directory == NULL))
9369 directory = xmlParserGetDirectory((char *)uri);
9370 if ((ctxt->directory == NULL) && (directory != NULL))
9371 ctxt->directory = directory;
9372 xmlFree(uri);
9373 }
9374
9375 return(ctxt);
9376}
9377
9378/************************************************************************
9379 * *
9380 * Front ends when parsing from a file *
9381 * *
9382 ************************************************************************/
9383
9384/**
9385 * xmlCreateFileParserCtxt:
9386 * @filename: the filename
9387 *
9388 * Create a parser context for a file content.
9389 * Automatic support for ZLIB/Compress compressed document is provided
9390 * by default if found at compile-time.
9391 *
9392 * Returns the new parser context or NULL
9393 */
9394xmlParserCtxtPtr
9395xmlCreateFileParserCtxt(const char *filename)
9396{
9397 xmlParserCtxtPtr ctxt;
9398 xmlParserInputPtr inputStream;
9399 xmlParserInputBufferPtr buf;
9400 char *directory = NULL;
9401
9402 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9403 if (buf == NULL) {
9404 return(NULL);
9405 }
9406
9407 ctxt = xmlNewParserCtxt();
9408 if (ctxt == NULL) {
9409 if (xmlDefaultSAXHandler.error != NULL) {
9410 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9411 }
9412 return(NULL);
9413 }
9414
9415 inputStream = xmlNewInputStream(ctxt);
9416 if (inputStream == NULL) {
9417 xmlFreeParserCtxt(ctxt);
9418 return(NULL);
9419 }
9420
9421 inputStream->filename = xmlMemStrdup(filename);
9422 inputStream->buf = buf;
9423 inputStream->base = inputStream->buf->buffer->content;
9424 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009425 inputStream->end =
9426 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009427
9428 inputPush(ctxt, inputStream);
9429 if ((ctxt->directory == NULL) && (directory == NULL))
9430 directory = xmlParserGetDirectory(filename);
9431 if ((ctxt->directory == NULL) && (directory != NULL))
9432 ctxt->directory = directory;
9433
9434 return(ctxt);
9435}
9436
9437/**
9438 * xmlSAXParseFile:
9439 * @sax: the SAX handler block
9440 * @filename: the filename
9441 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9442 * documents
9443 *
9444 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9445 * compressed document is provided by default if found at compile-time.
9446 * It use the given SAX function block to handle the parsing callback.
9447 * If sax is NULL, fallback to the default DOM tree building routines.
9448 *
9449 * Returns the resulting document tree
9450 */
9451
9452xmlDocPtr
9453xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9454 int recovery) {
9455 xmlDocPtr ret;
9456 xmlParserCtxtPtr ctxt;
9457 char *directory = NULL;
9458
9459 ctxt = xmlCreateFileParserCtxt(filename);
9460 if (ctxt == NULL) {
9461 return(NULL);
9462 }
9463 if (sax != NULL) {
9464 if (ctxt->sax != NULL)
9465 xmlFree(ctxt->sax);
9466 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009467 }
9468
9469 if ((ctxt->directory == NULL) && (directory == NULL))
9470 directory = xmlParserGetDirectory(filename);
9471 if ((ctxt->directory == NULL) && (directory != NULL))
9472 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9473
9474 xmlParseDocument(ctxt);
9475
9476 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9477 else {
9478 ret = NULL;
9479 xmlFreeDoc(ctxt->myDoc);
9480 ctxt->myDoc = NULL;
9481 }
9482 if (sax != NULL)
9483 ctxt->sax = NULL;
9484 xmlFreeParserCtxt(ctxt);
9485
9486 return(ret);
9487}
9488
9489/**
9490 * xmlRecoverDoc:
9491 * @cur: a pointer to an array of xmlChar
9492 *
9493 * parse an XML in-memory document and build a tree.
9494 * In the case the document is not Well Formed, a tree is built anyway
9495 *
9496 * Returns the resulting document tree
9497 */
9498
9499xmlDocPtr
9500xmlRecoverDoc(xmlChar *cur) {
9501 return(xmlSAXParseDoc(NULL, cur, 1));
9502}
9503
9504/**
9505 * xmlParseFile:
9506 * @filename: the filename
9507 *
9508 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9509 * compressed document is provided by default if found at compile-time.
9510 *
9511 * Returns the resulting document tree
9512 */
9513
9514xmlDocPtr
9515xmlParseFile(const char *filename) {
9516 return(xmlSAXParseFile(NULL, filename, 0));
9517}
9518
9519/**
9520 * xmlRecoverFile:
9521 * @filename: the filename
9522 *
9523 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9524 * compressed document is provided by default if found at compile-time.
9525 * In the case the document is not Well Formed, a tree is built anyway
9526 *
9527 * Returns the resulting document tree
9528 */
9529
9530xmlDocPtr
9531xmlRecoverFile(const char *filename) {
9532 return(xmlSAXParseFile(NULL, filename, 1));
9533}
9534
9535
9536/**
9537 * xmlSetupParserForBuffer:
9538 * @ctxt: an XML parser context
9539 * @buffer: a xmlChar * buffer
9540 * @filename: a file name
9541 *
9542 * Setup the parser context to parse a new buffer; Clears any prior
9543 * contents from the parser context. The buffer parameter must not be
9544 * NULL, but the filename parameter can be
9545 */
9546void
9547xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9548 const char* filename)
9549{
9550 xmlParserInputPtr input;
9551
9552 input = xmlNewInputStream(ctxt);
9553 if (input == NULL) {
9554 perror("malloc");
9555 xmlFree(ctxt);
9556 return;
9557 }
9558
9559 xmlClearParserCtxt(ctxt);
9560 if (filename != NULL)
9561 input->filename = xmlMemStrdup(filename);
9562 input->base = buffer;
9563 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009564 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009565 inputPush(ctxt, input);
9566}
9567
9568/**
9569 * xmlSAXUserParseFile:
9570 * @sax: a SAX handler
9571 * @user_data: The user data returned on SAX callbacks
9572 * @filename: a file name
9573 *
9574 * parse an XML file and call the given SAX handler routines.
9575 * Automatic support for ZLIB/Compress compressed document is provided
9576 *
9577 * Returns 0 in case of success or a error number otherwise
9578 */
9579int
9580xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9581 const char *filename) {
9582 int ret = 0;
9583 xmlParserCtxtPtr ctxt;
9584
9585 ctxt = xmlCreateFileParserCtxt(filename);
9586 if (ctxt == NULL) return -1;
9587 if (ctxt->sax != &xmlDefaultSAXHandler)
9588 xmlFree(ctxt->sax);
9589 ctxt->sax = sax;
9590 if (user_data != NULL)
9591 ctxt->userData = user_data;
9592
9593 xmlParseDocument(ctxt);
9594
9595 if (ctxt->wellFormed)
9596 ret = 0;
9597 else {
9598 if (ctxt->errNo != 0)
9599 ret = ctxt->errNo;
9600 else
9601 ret = -1;
9602 }
9603 if (sax != NULL)
9604 ctxt->sax = NULL;
9605 xmlFreeParserCtxt(ctxt);
9606
9607 return ret;
9608}
9609
9610/************************************************************************
9611 * *
9612 * Front ends when parsing from memory *
9613 * *
9614 ************************************************************************/
9615
9616/**
9617 * xmlCreateMemoryParserCtxt:
9618 * @buffer: a pointer to a char array
9619 * @size: the size of the array
9620 *
9621 * Create a parser context for an XML in-memory document.
9622 *
9623 * Returns the new parser context or NULL
9624 */
9625xmlParserCtxtPtr
9626xmlCreateMemoryParserCtxt(char *buffer, int size) {
9627 xmlParserCtxtPtr ctxt;
9628 xmlParserInputPtr input;
9629 xmlParserInputBufferPtr buf;
9630
9631 if (buffer == NULL)
9632 return(NULL);
9633 if (size <= 0)
9634 return(NULL);
9635
9636 ctxt = xmlNewParserCtxt();
9637 if (ctxt == NULL)
9638 return(NULL);
9639
9640 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9641 if (buf == NULL) return(NULL);
9642
9643 input = xmlNewInputStream(ctxt);
9644 if (input == NULL) {
9645 xmlFreeParserCtxt(ctxt);
9646 return(NULL);
9647 }
9648
9649 input->filename = NULL;
9650 input->buf = buf;
9651 input->base = input->buf->buffer->content;
9652 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009653 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009654
9655 inputPush(ctxt, input);
9656 return(ctxt);
9657}
9658
9659/**
9660 * xmlSAXParseMemory:
9661 * @sax: the SAX handler block
9662 * @buffer: an pointer to a char array
9663 * @size: the size of the array
9664 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9665 * documents
9666 *
9667 * parse an XML in-memory block and use the given SAX function block
9668 * to handle the parsing callback. If sax is NULL, fallback to the default
9669 * DOM tree building routines.
9670 *
9671 * Returns the resulting document tree
9672 */
9673xmlDocPtr
9674xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9675 xmlDocPtr ret;
9676 xmlParserCtxtPtr ctxt;
9677
9678 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9679 if (ctxt == NULL) return(NULL);
9680 if (sax != NULL) {
9681 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009682 }
9683
9684 xmlParseDocument(ctxt);
9685
9686 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9687 else {
9688 ret = NULL;
9689 xmlFreeDoc(ctxt->myDoc);
9690 ctxt->myDoc = NULL;
9691 }
9692 if (sax != NULL)
9693 ctxt->sax = NULL;
9694 xmlFreeParserCtxt(ctxt);
9695
9696 return(ret);
9697}
9698
9699/**
9700 * xmlParseMemory:
9701 * @buffer: an pointer to a char array
9702 * @size: the size of the array
9703 *
9704 * parse an XML in-memory block and build a tree.
9705 *
9706 * Returns the resulting document tree
9707 */
9708
9709xmlDocPtr xmlParseMemory(char *buffer, int size) {
9710 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9711}
9712
9713/**
9714 * xmlRecoverMemory:
9715 * @buffer: an pointer to a char array
9716 * @size: the size of the array
9717 *
9718 * parse an XML in-memory block and build a tree.
9719 * In the case the document is not Well Formed, a tree is built anyway
9720 *
9721 * Returns the resulting document tree
9722 */
9723
9724xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9725 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9726}
9727
9728/**
9729 * xmlSAXUserParseMemory:
9730 * @sax: a SAX handler
9731 * @user_data: The user data returned on SAX callbacks
9732 * @buffer: an in-memory XML document input
9733 * @size: the length of the XML document in bytes
9734 *
9735 * A better SAX parsing routine.
9736 * parse an XML in-memory buffer and call the given SAX handler routines.
9737 *
9738 * Returns 0 in case of success or a error number otherwise
9739 */
9740int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9741 char *buffer, int size) {
9742 int ret = 0;
9743 xmlParserCtxtPtr ctxt;
9744 xmlSAXHandlerPtr oldsax = NULL;
9745
9746 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9747 if (ctxt == NULL) return -1;
9748 if (sax != NULL) {
9749 oldsax = ctxt->sax;
9750 ctxt->sax = sax;
9751 }
Daniel Veillard30211a02001-04-26 09:33:18 +00009752 if (user_data != NULL)
9753 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00009754
9755 xmlParseDocument(ctxt);
9756
9757 if (ctxt->wellFormed)
9758 ret = 0;
9759 else {
9760 if (ctxt->errNo != 0)
9761 ret = ctxt->errNo;
9762 else
9763 ret = -1;
9764 }
9765 if (sax != NULL) {
9766 ctxt->sax = oldsax;
9767 }
9768 xmlFreeParserCtxt(ctxt);
9769
9770 return ret;
9771}
9772
9773/**
9774 * xmlCreateDocParserCtxt:
9775 * @cur: a pointer to an array of xmlChar
9776 *
9777 * Creates a parser context for an XML in-memory document.
9778 *
9779 * Returns the new parser context or NULL
9780 */
9781xmlParserCtxtPtr
9782xmlCreateDocParserCtxt(xmlChar *cur) {
9783 int len;
9784
9785 if (cur == NULL)
9786 return(NULL);
9787 len = xmlStrlen(cur);
9788 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9789}
9790
9791/**
9792 * xmlSAXParseDoc:
9793 * @sax: the SAX handler block
9794 * @cur: a pointer to an array of xmlChar
9795 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9796 * documents
9797 *
9798 * parse an XML in-memory document and build a tree.
9799 * It use the given SAX function block to handle the parsing callback.
9800 * If sax is NULL, fallback to the default DOM tree building routines.
9801 *
9802 * Returns the resulting document tree
9803 */
9804
9805xmlDocPtr
9806xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9807 xmlDocPtr ret;
9808 xmlParserCtxtPtr ctxt;
9809
9810 if (cur == NULL) return(NULL);
9811
9812
9813 ctxt = xmlCreateDocParserCtxt(cur);
9814 if (ctxt == NULL) return(NULL);
9815 if (sax != NULL) {
9816 ctxt->sax = sax;
9817 ctxt->userData = NULL;
9818 }
9819
9820 xmlParseDocument(ctxt);
9821 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9822 else {
9823 ret = NULL;
9824 xmlFreeDoc(ctxt->myDoc);
9825 ctxt->myDoc = NULL;
9826 }
9827 if (sax != NULL)
9828 ctxt->sax = NULL;
9829 xmlFreeParserCtxt(ctxt);
9830
9831 return(ret);
9832}
9833
9834/**
9835 * xmlParseDoc:
9836 * @cur: a pointer to an array of xmlChar
9837 *
9838 * parse an XML in-memory document and build a tree.
9839 *
9840 * Returns the resulting document tree
9841 */
9842
9843xmlDocPtr
9844xmlParseDoc(xmlChar *cur) {
9845 return(xmlSAXParseDoc(NULL, cur, 0));
9846}
9847
9848
9849/************************************************************************
9850 * *
9851 * Miscellaneous *
9852 * *
9853 ************************************************************************/
9854
9855#ifdef LIBXML_XPATH_ENABLED
9856#include <libxml/xpath.h>
9857#endif
9858
9859static int xmlParserInitialized = 0;
9860
9861/**
9862 * xmlInitParser:
9863 *
9864 * Initialization function for the XML parser.
9865 * This is not reentrant. Call once before processing in case of
9866 * use in multithreaded programs.
9867 */
9868
9869void
9870xmlInitParser(void) {
9871 if (xmlParserInitialized) return;
9872
9873 xmlInitCharEncodingHandlers();
9874 xmlInitializePredefinedEntities();
9875 xmlDefaultSAXHandlerInit();
9876 xmlRegisterDefaultInputCallbacks();
9877 xmlRegisterDefaultOutputCallbacks();
9878#ifdef LIBXML_HTML_ENABLED
9879 htmlInitAutoClose();
9880 htmlDefaultSAXHandlerInit();
9881#endif
9882#ifdef LIBXML_XPATH_ENABLED
9883 xmlXPathInit();
9884#endif
9885 xmlParserInitialized = 1;
9886}
9887
9888/**
9889 * xmlCleanupParser:
9890 *
9891 * Cleanup function for the XML parser. It tries to reclaim all
9892 * parsing related global memory allocated for the parser processing.
9893 * It doesn't deallocate any document related memory. Calling this
9894 * function should not prevent reusing the parser.
9895 */
9896
9897void
9898xmlCleanupParser(void) {
9899 xmlParserInitialized = 0;
9900 xmlCleanupCharEncodingHandlers();
9901 xmlCleanupPredefinedEntities();
9902}
9903
9904/**
9905 * xmlPedanticParserDefault:
9906 * @val: int 0 or 1
9907 *
9908 * Set and return the previous value for enabling pedantic warnings.
9909 *
9910 * Returns the last value for 0 for no substitution, 1 for substitution.
9911 */
9912
9913int
9914xmlPedanticParserDefault(int val) {
9915 int old = xmlPedanticParserDefaultValue;
9916
9917 xmlPedanticParserDefaultValue = val;
9918 return(old);
9919}
9920
9921/**
9922 * xmlSubstituteEntitiesDefault:
9923 * @val: int 0 or 1
9924 *
9925 * Set and return the previous value for default entity support.
9926 * Initially the parser always keep entity references instead of substituting
9927 * entity values in the output. This function has to be used to change the
9928 * default parser behaviour
9929 * SAX::subtituteEntities() has to be used for changing that on a file by
9930 * file basis.
9931 *
9932 * Returns the last value for 0 for no substitution, 1 for substitution.
9933 */
9934
9935int
9936xmlSubstituteEntitiesDefault(int val) {
9937 int old = xmlSubstituteEntitiesDefaultValue;
9938
9939 xmlSubstituteEntitiesDefaultValue = val;
9940 return(old);
9941}
9942
9943/**
9944 * xmlKeepBlanksDefault:
9945 * @val: int 0 or 1
9946 *
9947 * Set and return the previous value for default blanks text nodes support.
9948 * The 1.x version of the parser used an heuristic to try to detect
9949 * ignorable white spaces. As a result the SAX callback was generating
9950 * ignorableWhitespace() callbacks instead of characters() one, and when
9951 * using the DOM output text nodes containing those blanks were not generated.
9952 * The 2.x and later version will switch to the XML standard way and
9953 * ignorableWhitespace() are only generated when running the parser in
9954 * validating mode and when the current element doesn't allow CDATA or
9955 * mixed content.
9956 * This function is provided as a way to force the standard behaviour
9957 * on 1.X libs and to switch back to the old mode for compatibility when
9958 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9959 * by using xmlIsBlankNode() commodity function to detect the "empty"
9960 * nodes generated.
9961 * This value also affect autogeneration of indentation when saving code
9962 * if blanks sections are kept, indentation is not generated.
9963 *
9964 * Returns the last value for 0 for no substitution, 1 for substitution.
9965 */
9966
9967int
9968xmlKeepBlanksDefault(int val) {
9969 int old = xmlKeepBlanksDefaultValue;
9970
9971 xmlKeepBlanksDefaultValue = val;
9972 xmlIndentTreeOutput = !val;
9973 return(old);
9974}
9975