blob: 4f8b07a25a50ce6793c9c475c16eaedceb77b7b9 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
Bjorn Reese70a9da52001-04-21 16:57:29 +000036#include "libxml.h"
37
Owen Taylor3473f882001-02-23 17:55:21 +000038#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '\\'
40#else
Owen Taylor3473f882001-02-23 17:55:21 +000041#define XML_DIR_SEP '/'
42#endif
43
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <stdlib.h>
45#include <string.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
56
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
83int xmlGetWarningsDefaultValue = 1;
84int xmlParserDebugEntities = 0;
85#ifdef VMS
86int xmlSubstituteEntitiesDefaultVal = 0;
87#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
88int xmlDoValidityCheckingDefaultVal = 0;
89#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
90#else
91int xmlSubstituteEntitiesDefaultValue = 0;
92int xmlDoValidityCheckingDefaultValue = 0;
93#endif
94int xmlLoadExtDtdDefaultValue = 0;
95int xmlPedanticParserDefaultValue = 0;
96int xmlKeepBlanksDefaultValue = 1;
97
98/*
99 * List of XML prefixed PI allowed by W3C specs
100 */
101
102const char *xmlW3CPIs[] = {
103 "xml-stylesheet",
104 NULL
105};
106
107/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
108void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
109xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
110 const xmlChar **str);
111
Daniel Veillard257d9102001-05-08 10:41:44 +0000112static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000113xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
114 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000115 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000116 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000117
118/************************************************************************
119 * *
120 * Parser stacks related functions and macros *
121 * *
122 ************************************************************************/
123
124xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
125 const xmlChar ** str);
126
127/*
128 * Generic function for accessing stacks in the Parser Context
129 */
130
131#define PUSH_AND_POP(scope, type, name) \
132scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
133 if (ctxt->name##Nr >= ctxt->name##Max) { \
134 ctxt->name##Max *= 2; \
135 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
136 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
137 if (ctxt->name##Tab == NULL) { \
138 xmlGenericError(xmlGenericErrorContext, \
139 "realloc failed !\n"); \
140 return(0); \
141 } \
142 } \
143 ctxt->name##Tab[ctxt->name##Nr] = value; \
144 ctxt->name = value; \
145 return(ctxt->name##Nr++); \
146} \
147scope type name##Pop(xmlParserCtxtPtr ctxt) { \
148 type ret; \
149 if (ctxt->name##Nr <= 0) return(0); \
150 ctxt->name##Nr--; \
151 if (ctxt->name##Nr > 0) \
152 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
153 else \
154 ctxt->name = NULL; \
155 ret = ctxt->name##Tab[ctxt->name##Nr]; \
156 ctxt->name##Tab[ctxt->name##Nr] = 0; \
157 return(ret); \
158} \
159
160/*
161 * Those macros actually generate the functions
162 */
163PUSH_AND_POP(extern, xmlParserInputPtr, input)
164PUSH_AND_POP(extern, xmlNodePtr, node)
165PUSH_AND_POP(extern, xmlChar*, name)
166
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000167static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000168 if (ctxt->spaceNr >= ctxt->spaceMax) {
169 ctxt->spaceMax *= 2;
170 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
171 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
172 if (ctxt->spaceTab == NULL) {
173 xmlGenericError(xmlGenericErrorContext,
174 "realloc failed !\n");
175 return(0);
176 }
177 }
178 ctxt->spaceTab[ctxt->spaceNr] = val;
179 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
180 return(ctxt->spaceNr++);
181}
182
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000183static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000184 int ret;
185 if (ctxt->spaceNr <= 0) return(0);
186 ctxt->spaceNr--;
187 if (ctxt->spaceNr > 0)
188 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
189 else
190 ctxt->space = NULL;
191 ret = ctxt->spaceTab[ctxt->spaceNr];
192 ctxt->spaceTab[ctxt->spaceNr] = -1;
193 return(ret);
194}
195
196/*
197 * Macros for accessing the content. Those should be used only by the parser,
198 * and not exported.
199 *
200 * Dirty macros, i.e. one often need to make assumption on the context to
201 * use them
202 *
203 * CUR_PTR return the current pointer to the xmlChar to be parsed.
204 * To be used with extreme caution since operations consuming
205 * characters may move the input buffer to a different location !
206 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
207 * This should be used internally by the parser
208 * only to compare to ASCII values otherwise it would break when
209 * running with UTF-8 encoding.
210 * RAW same as CUR but in the input buffer, bypass any token
211 * extraction that may have been done
212 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
213 * to compare on ASCII based substring.
214 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
215 * strings within the parser.
216 *
217 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
218 *
219 * NEXT Skip to the next character, this does the proper decoding
220 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
221 * NEXTL(l) Skip l xmlChars in the input buffer
222 * CUR_CHAR(l) returns the current unicode character (int), set l
223 * to the number of xmlChars used for the encoding [0-5].
224 * CUR_SCHAR same but operate on a string instead of the context
225 * COPY_BUF copy the current unicode char to the target buffer, increment
226 * the index
227 * GROW, SHRINK handling of input buffers
228 */
229
230#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
231#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
232#define NXT(val) ctxt->input->cur[(val)]
233#define CUR_PTR ctxt->input->cur
234
235#define SKIP(val) do { \
236 ctxt->nbChars += (val),ctxt->input->cur += (val); \
237 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000238 if ((*ctxt->input->cur == 0) && \
239 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
240 xmlPopInput(ctxt); \
241 } while (0)
242
Daniel Veillard48b2f892001-02-25 16:11:03 +0000243#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000244 xmlParserInputShrink(ctxt->input); \
245 if ((*ctxt->input->cur == 0) && \
246 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
247 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000248 }
Owen Taylor3473f882001-02-23 17:55:21 +0000249
Daniel Veillard48b2f892001-02-25 16:11:03 +0000250#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000251 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
252 if ((*ctxt->input->cur == 0) && \
253 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
254 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000255 }
Owen Taylor3473f882001-02-23 17:55:21 +0000256
257#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
258
259#define NEXT xmlNextChar(ctxt)
260
Daniel Veillard21a0f912001-02-25 19:54:14 +0000261#define NEXT1 { \
262 ctxt->input->cur++; \
263 ctxt->nbChars++; \
264 if (*ctxt->input->cur == 0) \
265 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
266 }
267
Owen Taylor3473f882001-02-23 17:55:21 +0000268#define NEXTL(l) do { \
269 if (*(ctxt->input->cur) == '\n') { \
270 ctxt->input->line++; ctxt->input->col = 1; \
271 } else ctxt->input->col++; \
272 ctxt->token = 0; ctxt->input->cur += l; \
273 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000274 } while (0)
275
276#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
277#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
278
279#define COPY_BUF(l,b,i,v) \
280 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000281 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000282
283/**
284 * xmlSkipBlankChars:
285 * @ctxt: the XML parser context
286 *
287 * skip all blanks character found at that point in the input streams.
288 * It pops up finished entities in the process if allowable at that point.
289 *
290 * Returns the number of space chars skipped
291 */
292
293int
294xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000295 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000296
Daniel Veillard02141ea2001-04-30 11:46:40 +0000297 if (ctxt->token != 0) {
298 if (!IS_BLANK(ctxt->token))
299 return(0);
300 ctxt->token = 0;
301 res++;
302 }
Owen Taylor3473f882001-02-23 17:55:21 +0000303 /*
304 * It's Okay to use CUR/NEXT here since all the blanks are on
305 * the ASCII range.
306 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000307 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
308 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000309 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000310 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000311 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000312 cur = ctxt->input->cur;
313 while (IS_BLANK(*cur)) {
314 if (*cur == '\n') {
315 ctxt->input->line++; ctxt->input->col = 1;
316 }
317 cur++;
318 res++;
319 if (*cur == 0) {
320 ctxt->input->cur = cur;
321 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
322 cur = ctxt->input->cur;
323 }
324 }
325 ctxt->input->cur = cur;
326 } else {
327 int cur;
328 do {
329 cur = CUR;
330 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
331 NEXT;
332 cur = CUR;
333 res++;
334 }
335 while ((cur == 0) && (ctxt->inputNr > 1) &&
336 (ctxt->instate != XML_PARSER_COMMENT)) {
337 xmlPopInput(ctxt);
338 cur = CUR;
339 }
340 /*
341 * Need to handle support of entities branching here
342 */
343 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
344 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
345 }
Owen Taylor3473f882001-02-23 17:55:21 +0000346 return(res);
347}
348
349/************************************************************************
350 * *
351 * Commodity functions to handle entities *
352 * *
353 ************************************************************************/
354
355/**
356 * xmlPopInput:
357 * @ctxt: an XML parser context
358 *
359 * xmlPopInput: the current input pointed by ctxt->input came to an end
360 * pop it and return the next char.
361 *
362 * Returns the current xmlChar in the parser context
363 */
364xmlChar
365xmlPopInput(xmlParserCtxtPtr ctxt) {
366 if (ctxt->inputNr == 1) return(0); /* End of main Input */
367 if (xmlParserDebugEntities)
368 xmlGenericError(xmlGenericErrorContext,
369 "Popping input %d\n", ctxt->inputNr);
370 xmlFreeInputStream(inputPop(ctxt));
371 if ((*ctxt->input->cur == 0) &&
372 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
373 return(xmlPopInput(ctxt));
374 return(CUR);
375}
376
377/**
378 * xmlPushInput:
379 * @ctxt: an XML parser context
380 * @input: an XML parser input fragment (entity, XML fragment ...).
381 *
382 * xmlPushInput: switch to a new input stream which is stacked on top
383 * of the previous one(s).
384 */
385void
386xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
387 if (input == NULL) return;
388
389 if (xmlParserDebugEntities) {
390 if ((ctxt->input != NULL) && (ctxt->input->filename))
391 xmlGenericError(xmlGenericErrorContext,
392 "%s(%d): ", ctxt->input->filename,
393 ctxt->input->line);
394 xmlGenericError(xmlGenericErrorContext,
395 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
396 }
397 inputPush(ctxt, input);
398 GROW;
399}
400
401/**
402 * xmlParseCharRef:
403 * @ctxt: an XML parser context
404 *
405 * parse Reference declarations
406 *
407 * [66] CharRef ::= '&#' [0-9]+ ';' |
408 * '&#x' [0-9a-fA-F]+ ';'
409 *
410 * [ WFC: Legal Character ]
411 * Characters referred to using character references must match the
412 * production for Char.
413 *
414 * Returns the value parsed (as an int), 0 in case of error
415 */
416int
417xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000418 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000419 int count = 0;
420
421 if (ctxt->token != 0) {
422 val = ctxt->token;
423 ctxt->token = 0;
424 return(val);
425 }
426 /*
427 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
428 */
429 if ((RAW == '&') && (NXT(1) == '#') &&
430 (NXT(2) == 'x')) {
431 SKIP(3);
432 GROW;
433 while (RAW != ';') { /* loop blocked by count */
434 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
435 val = val * 16 + (CUR - '0');
436 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
437 val = val * 16 + (CUR - 'a') + 10;
438 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
439 val = val * 16 + (CUR - 'A') + 10;
440 else {
441 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
443 ctxt->sax->error(ctxt->userData,
444 "xmlParseCharRef: invalid hexadecimal value\n");
445 ctxt->wellFormed = 0;
446 ctxt->disableSAX = 1;
447 val = 0;
448 break;
449 }
450 NEXT;
451 count++;
452 }
453 if (RAW == ';') {
454 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
455 ctxt->nbChars ++;
456 ctxt->input->cur++;
457 }
458 } else if ((RAW == '&') && (NXT(1) == '#')) {
459 SKIP(2);
460 GROW;
461 while (RAW != ';') { /* loop blocked by count */
462 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
463 val = val * 10 + (CUR - '0');
464 else {
465 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
467 ctxt->sax->error(ctxt->userData,
468 "xmlParseCharRef: invalid decimal value\n");
469 ctxt->wellFormed = 0;
470 ctxt->disableSAX = 1;
471 val = 0;
472 break;
473 }
474 NEXT;
475 count++;
476 }
477 if (RAW == ';') {
478 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
479 ctxt->nbChars ++;
480 ctxt->input->cur++;
481 }
482 } else {
483 ctxt->errNo = XML_ERR_INVALID_CHARREF;
484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
485 ctxt->sax->error(ctxt->userData,
486 "xmlParseCharRef: invalid value\n");
487 ctxt->wellFormed = 0;
488 ctxt->disableSAX = 1;
489 }
490
491 /*
492 * [ WFC: Legal Character ]
493 * Characters referred to using character references must match the
494 * production for Char.
495 */
496 if (IS_CHAR(val)) {
497 return(val);
498 } else {
499 ctxt->errNo = XML_ERR_INVALID_CHAR;
500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
501 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
502 val);
503 ctxt->wellFormed = 0;
504 ctxt->disableSAX = 1;
505 }
506 return(0);
507}
508
509/**
510 * xmlParseStringCharRef:
511 * @ctxt: an XML parser context
512 * @str: a pointer to an index in the string
513 *
514 * parse Reference declarations, variant parsing from a string rather
515 * than an an input flow.
516 *
517 * [66] CharRef ::= '&#' [0-9]+ ';' |
518 * '&#x' [0-9a-fA-F]+ ';'
519 *
520 * [ WFC: Legal Character ]
521 * Characters referred to using character references must match the
522 * production for Char.
523 *
524 * Returns the value parsed (as an int), 0 in case of error, str will be
525 * updated to the current value of the index
526 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000527static int
Owen Taylor3473f882001-02-23 17:55:21 +0000528xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
529 const xmlChar *ptr;
530 xmlChar cur;
531 int val = 0;
532
533 if ((str == NULL) || (*str == NULL)) return(0);
534 ptr = *str;
535 cur = *ptr;
536 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
537 ptr += 3;
538 cur = *ptr;
539 while (cur != ';') { /* Non input consuming loop */
540 if ((cur >= '0') && (cur <= '9'))
541 val = val * 16 + (cur - '0');
542 else if ((cur >= 'a') && (cur <= 'f'))
543 val = val * 16 + (cur - 'a') + 10;
544 else if ((cur >= 'A') && (cur <= 'F'))
545 val = val * 16 + (cur - 'A') + 10;
546 else {
547 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
549 ctxt->sax->error(ctxt->userData,
550 "xmlParseStringCharRef: invalid hexadecimal value\n");
551 ctxt->wellFormed = 0;
552 ctxt->disableSAX = 1;
553 val = 0;
554 break;
555 }
556 ptr++;
557 cur = *ptr;
558 }
559 if (cur == ';')
560 ptr++;
561 } else if ((cur == '&') && (ptr[1] == '#')){
562 ptr += 2;
563 cur = *ptr;
564 while (cur != ';') { /* Non input consuming loops */
565 if ((cur >= '0') && (cur <= '9'))
566 val = val * 10 + (cur - '0');
567 else {
568 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
570 ctxt->sax->error(ctxt->userData,
571 "xmlParseStringCharRef: invalid decimal value\n");
572 ctxt->wellFormed = 0;
573 ctxt->disableSAX = 1;
574 val = 0;
575 break;
576 }
577 ptr++;
578 cur = *ptr;
579 }
580 if (cur == ';')
581 ptr++;
582 } else {
583 ctxt->errNo = XML_ERR_INVALID_CHARREF;
584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
585 ctxt->sax->error(ctxt->userData,
586 "xmlParseCharRef: invalid value\n");
587 ctxt->wellFormed = 0;
588 ctxt->disableSAX = 1;
589 return(0);
590 }
591 *str = ptr;
592
593 /*
594 * [ WFC: Legal Character ]
595 * Characters referred to using character references must match the
596 * production for Char.
597 */
598 if (IS_CHAR(val)) {
599 return(val);
600 } else {
601 ctxt->errNo = XML_ERR_INVALID_CHAR;
602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
603 ctxt->sax->error(ctxt->userData,
604 "CharRef: invalid xmlChar value %d\n", val);
605 ctxt->wellFormed = 0;
606 ctxt->disableSAX = 1;
607 }
608 return(0);
609}
610
611/**
612 * xmlParserHandlePEReference:
613 * @ctxt: the parser context
614 *
615 * [69] PEReference ::= '%' Name ';'
616 *
617 * [ WFC: No Recursion ]
618 * A parsed entity must not contain a recursive
619 * reference to itself, either directly or indirectly.
620 *
621 * [ WFC: Entity Declared ]
622 * In a document without any DTD, a document with only an internal DTD
623 * subset which contains no parameter entity references, or a document
624 * with "standalone='yes'", ... ... The declaration of a parameter
625 * entity must precede any reference to it...
626 *
627 * [ VC: Entity Declared ]
628 * In a document with an external subset or external parameter entities
629 * with "standalone='no'", ... ... The declaration of a parameter entity
630 * must precede any reference to it...
631 *
632 * [ WFC: In DTD ]
633 * Parameter-entity references may only appear in the DTD.
634 * NOTE: misleading but this is handled.
635 *
636 * A PEReference may have been detected in the current input stream
637 * the handling is done accordingly to
638 * http://www.w3.org/TR/REC-xml#entproc
639 * i.e.
640 * - Included in literal in entity values
641 * - Included as Paraemeter Entity reference within DTDs
642 */
643void
644xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
645 xmlChar *name;
646 xmlEntityPtr entity = NULL;
647 xmlParserInputPtr input;
648
649 if (ctxt->token != 0) {
650 return;
651 }
652 if (RAW != '%') return;
653 switch(ctxt->instate) {
654 case XML_PARSER_CDATA_SECTION:
655 return;
656 case XML_PARSER_COMMENT:
657 return;
658 case XML_PARSER_START_TAG:
659 return;
660 case XML_PARSER_END_TAG:
661 return;
662 case XML_PARSER_EOF:
663 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
665 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
666 ctxt->wellFormed = 0;
667 ctxt->disableSAX = 1;
668 return;
669 case XML_PARSER_PROLOG:
670 case XML_PARSER_START:
671 case XML_PARSER_MISC:
672 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
674 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
675 ctxt->wellFormed = 0;
676 ctxt->disableSAX = 1;
677 return;
678 case XML_PARSER_ENTITY_DECL:
679 case XML_PARSER_CONTENT:
680 case XML_PARSER_ATTRIBUTE_VALUE:
681 case XML_PARSER_PI:
682 case XML_PARSER_SYSTEM_LITERAL:
683 /* we just ignore it there */
684 return;
685 case XML_PARSER_EPILOG:
686 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
688 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
689 ctxt->wellFormed = 0;
690 ctxt->disableSAX = 1;
691 return;
692 case XML_PARSER_ENTITY_VALUE:
693 /*
694 * NOTE: in the case of entity values, we don't do the
695 * substitution here since we need the literal
696 * entity value to be able to save the internal
697 * subset of the document.
698 * This will be handled by xmlStringDecodeEntities
699 */
700 return;
701 case XML_PARSER_DTD:
702 /*
703 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
704 * In the internal DTD subset, parameter-entity references
705 * can occur only where markup declarations can occur, not
706 * within markup declarations.
707 * In that case this is handled in xmlParseMarkupDecl
708 */
709 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
710 return;
711 break;
712 case XML_PARSER_IGNORE:
713 return;
714 }
715
716 NEXT;
717 name = xmlParseName(ctxt);
718 if (xmlParserDebugEntities)
719 xmlGenericError(xmlGenericErrorContext,
720 "PE Reference: %s\n", name);
721 if (name == NULL) {
722 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
724 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
725 ctxt->wellFormed = 0;
726 ctxt->disableSAX = 1;
727 } else {
728 if (RAW == ';') {
729 NEXT;
730 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
731 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
732 if (entity == NULL) {
733
734 /*
735 * [ WFC: Entity Declared ]
736 * In a document without any DTD, a document with only an
737 * internal DTD subset which contains no parameter entity
738 * references, or a document with "standalone='yes'", ...
739 * ... The declaration of a parameter entity must precede
740 * any reference to it...
741 */
742 if ((ctxt->standalone == 1) ||
743 ((ctxt->hasExternalSubset == 0) &&
744 (ctxt->hasPErefs == 0))) {
745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
746 ctxt->sax->error(ctxt->userData,
747 "PEReference: %%%s; not found\n", name);
748 ctxt->wellFormed = 0;
749 ctxt->disableSAX = 1;
750 } else {
751 /*
752 * [ VC: Entity Declared ]
753 * In a document with an external subset or external
754 * parameter entities with "standalone='no'", ...
755 * ... The declaration of a parameter entity must precede
756 * any reference to it...
757 */
758 if ((!ctxt->disableSAX) &&
759 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
760 ctxt->vctxt.error(ctxt->vctxt.userData,
761 "PEReference: %%%s; not found\n", name);
762 } else if ((!ctxt->disableSAX) &&
763 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
764 ctxt->sax->warning(ctxt->userData,
765 "PEReference: %%%s; not found\n", name);
766 ctxt->valid = 0;
767 }
768 } else {
769 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
770 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000771 xmlChar start[4];
772 xmlCharEncoding enc;
773
Owen Taylor3473f882001-02-23 17:55:21 +0000774 /*
775 * handle the extra spaces added before and after
776 * c.f. http://www.w3.org/TR/REC-xml#as-PE
777 * this is done independantly.
778 */
779 input = xmlNewEntityInputStream(ctxt, entity);
780 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000781
782 /*
783 * Get the 4 first bytes and decode the charset
784 * if enc != XML_CHAR_ENCODING_NONE
785 * plug some encoding conversion routines.
786 */
787 GROW
788 start[0] = RAW;
789 start[1] = NXT(1);
790 start[2] = NXT(2);
791 start[3] = NXT(3);
792 enc = xmlDetectCharEncoding(start, 4);
793 if (enc != XML_CHAR_ENCODING_NONE) {
794 xmlSwitchEncoding(ctxt, enc);
795 }
796
Owen Taylor3473f882001-02-23 17:55:21 +0000797 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
798 (RAW == '<') && (NXT(1) == '?') &&
799 (NXT(2) == 'x') && (NXT(3) == 'm') &&
800 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
801 xmlParseTextDecl(ctxt);
802 }
803 if (ctxt->token == 0)
804 ctxt->token = ' ';
805 } else {
806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
807 ctxt->sax->error(ctxt->userData,
808 "xmlHandlePEReference: %s is not a parameter entity\n",
809 name);
810 ctxt->wellFormed = 0;
811 ctxt->disableSAX = 1;
812 }
813 }
814 } else {
815 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
817 ctxt->sax->error(ctxt->userData,
818 "xmlHandlePEReference: expecting ';'\n");
819 ctxt->wellFormed = 0;
820 ctxt->disableSAX = 1;
821 }
822 xmlFree(name);
823 }
824}
825
826/*
827 * Macro used to grow the current buffer.
828 */
829#define growBuffer(buffer) { \
830 buffer##_size *= 2; \
831 buffer = (xmlChar *) \
832 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
833 if (buffer == NULL) { \
834 perror("realloc failed"); \
835 return(NULL); \
836 } \
837}
838
839/**
840 * xmlStringDecodeEntities:
841 * @ctxt: the parser context
842 * @str: the input string
843 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
844 * @end: an end marker xmlChar, 0 if none
845 * @end2: an end marker xmlChar, 0 if none
846 * @end3: an end marker xmlChar, 0 if none
847 *
848 * Takes a entity string content and process to do the adequate subtitutions.
849 *
850 * [67] Reference ::= EntityRef | CharRef
851 *
852 * [69] PEReference ::= '%' Name ';'
853 *
854 * Returns A newly allocated string with the substitution done. The caller
855 * must deallocate it !
856 */
857xmlChar *
858xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
859 xmlChar end, xmlChar end2, xmlChar end3) {
860 xmlChar *buffer = NULL;
861 int buffer_size = 0;
862
863 xmlChar *current = NULL;
864 xmlEntityPtr ent;
865 int c,l;
866 int nbchars = 0;
867
868 if (str == NULL)
869 return(NULL);
870
871 if (ctxt->depth > 40) {
872 ctxt->errNo = XML_ERR_ENTITY_LOOP;
873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
874 ctxt->sax->error(ctxt->userData,
875 "Detected entity reference loop\n");
876 ctxt->wellFormed = 0;
877 ctxt->disableSAX = 1;
878 return(NULL);
879 }
880
881 /*
882 * allocate a translation buffer.
883 */
884 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
885 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
886 if (buffer == NULL) {
887 perror("xmlDecodeEntities: malloc failed");
888 return(NULL);
889 }
890
891 /*
892 * Ok loop until we reach one of the ending char or a size limit.
893 * we are operating on already parsed values.
894 */
895 c = CUR_SCHAR(str, l);
896 while ((c != 0) && (c != end) && /* non input consuming loop */
897 (c != end2) && (c != end3)) {
898
899 if (c == 0) break;
900 if ((c == '&') && (str[1] == '#')) {
901 int val = xmlParseStringCharRef(ctxt, &str);
902 if (val != 0) {
903 COPY_BUF(0,buffer,nbchars,val);
904 }
905 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
906 if (xmlParserDebugEntities)
907 xmlGenericError(xmlGenericErrorContext,
908 "String decoding Entity Reference: %.30s\n",
909 str);
910 ent = xmlParseStringEntityRef(ctxt, &str);
911 if ((ent != NULL) &&
912 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
913 if (ent->content != NULL) {
914 COPY_BUF(0,buffer,nbchars,ent->content[0]);
915 } else {
916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
917 ctxt->sax->error(ctxt->userData,
918 "internal error entity has no content\n");
919 }
920 } else if ((ent != NULL) && (ent->content != NULL)) {
921 xmlChar *rep;
922
923 ctxt->depth++;
924 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
925 0, 0, 0);
926 ctxt->depth--;
927 if (rep != NULL) {
928 current = rep;
929 while (*current != 0) { /* non input consuming loop */
930 buffer[nbchars++] = *current++;
931 if (nbchars >
932 buffer_size - XML_PARSER_BUFFER_SIZE) {
933 growBuffer(buffer);
934 }
935 }
936 xmlFree(rep);
937 }
938 } else if (ent != NULL) {
939 int i = xmlStrlen(ent->name);
940 const xmlChar *cur = ent->name;
941
942 buffer[nbchars++] = '&';
943 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
944 growBuffer(buffer);
945 }
946 for (;i > 0;i--)
947 buffer[nbchars++] = *cur++;
948 buffer[nbchars++] = ';';
949 }
950 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
951 if (xmlParserDebugEntities)
952 xmlGenericError(xmlGenericErrorContext,
953 "String decoding PE Reference: %.30s\n", str);
954 ent = xmlParseStringPEReference(ctxt, &str);
955 if (ent != NULL) {
956 xmlChar *rep;
957
958 ctxt->depth++;
959 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
960 0, 0, 0);
961 ctxt->depth--;
962 if (rep != NULL) {
963 current = rep;
964 while (*current != 0) { /* non input consuming loop */
965 buffer[nbchars++] = *current++;
966 if (nbchars >
967 buffer_size - XML_PARSER_BUFFER_SIZE) {
968 growBuffer(buffer);
969 }
970 }
971 xmlFree(rep);
972 }
973 }
974 } else {
975 COPY_BUF(l,buffer,nbchars,c);
976 str += l;
977 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
978 growBuffer(buffer);
979 }
980 }
981 c = CUR_SCHAR(str, l);
982 }
983 buffer[nbchars++] = 0;
984 return(buffer);
985}
986
987
988/************************************************************************
989 * *
990 * Commodity functions to handle xmlChars *
991 * *
992 ************************************************************************/
993
994/**
995 * xmlStrndup:
996 * @cur: the input xmlChar *
997 * @len: the len of @cur
998 *
999 * a strndup for array of xmlChar's
1000 *
1001 * Returns a new xmlChar * or NULL
1002 */
1003xmlChar *
1004xmlStrndup(const xmlChar *cur, int len) {
1005 xmlChar *ret;
1006
1007 if ((cur == NULL) || (len < 0)) return(NULL);
1008 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1009 if (ret == NULL) {
1010 xmlGenericError(xmlGenericErrorContext,
1011 "malloc of %ld byte failed\n",
1012 (len + 1) * (long)sizeof(xmlChar));
1013 return(NULL);
1014 }
1015 memcpy(ret, cur, len * sizeof(xmlChar));
1016 ret[len] = 0;
1017 return(ret);
1018}
1019
1020/**
1021 * xmlStrdup:
1022 * @cur: the input xmlChar *
1023 *
1024 * a strdup for array of xmlChar's. Since they are supposed to be
1025 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1026 * a termination mark of '0'.
1027 *
1028 * Returns a new xmlChar * or NULL
1029 */
1030xmlChar *
1031xmlStrdup(const xmlChar *cur) {
1032 const xmlChar *p = cur;
1033
1034 if (cur == NULL) return(NULL);
1035 while (*p != 0) p++; /* non input consuming */
1036 return(xmlStrndup(cur, p - cur));
1037}
1038
1039/**
1040 * xmlCharStrndup:
1041 * @cur: the input char *
1042 * @len: the len of @cur
1043 *
1044 * a strndup for char's to xmlChar's
1045 *
1046 * Returns a new xmlChar * or NULL
1047 */
1048
1049xmlChar *
1050xmlCharStrndup(const char *cur, int len) {
1051 int i;
1052 xmlChar *ret;
1053
1054 if ((cur == NULL) || (len < 0)) return(NULL);
1055 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1056 if (ret == NULL) {
1057 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1058 (len + 1) * (long)sizeof(xmlChar));
1059 return(NULL);
1060 }
1061 for (i = 0;i < len;i++)
1062 ret[i] = (xmlChar) cur[i];
1063 ret[len] = 0;
1064 return(ret);
1065}
1066
1067/**
1068 * xmlCharStrdup:
1069 * @cur: the input char *
1070 * @len: the len of @cur
1071 *
1072 * a strdup for char's to xmlChar's
1073 *
1074 * Returns a new xmlChar * or NULL
1075 */
1076
1077xmlChar *
1078xmlCharStrdup(const char *cur) {
1079 const char *p = cur;
1080
1081 if (cur == NULL) return(NULL);
1082 while (*p != '\0') p++; /* non input consuming */
1083 return(xmlCharStrndup(cur, p - cur));
1084}
1085
1086/**
1087 * xmlStrcmp:
1088 * @str1: the first xmlChar *
1089 * @str2: the second xmlChar *
1090 *
1091 * a strcmp for xmlChar's
1092 *
1093 * Returns the integer result of the comparison
1094 */
1095
1096int
1097xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1098 register int tmp;
1099
1100 if (str1 == str2) return(0);
1101 if (str1 == NULL) return(-1);
1102 if (str2 == NULL) return(1);
1103 do {
1104 tmp = *str1++ - *str2;
1105 if (tmp != 0) return(tmp);
1106 } while (*str2++ != 0);
1107 return 0;
1108}
1109
1110/**
1111 * xmlStrEqual:
1112 * @str1: the first xmlChar *
1113 * @str2: the second xmlChar *
1114 *
1115 * Check if both string are equal of have same content
1116 * Should be a bit more readable and faster than xmlStrEqual()
1117 *
1118 * Returns 1 if they are equal, 0 if they are different
1119 */
1120
1121int
1122xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1123 if (str1 == str2) return(1);
1124 if (str1 == NULL) return(0);
1125 if (str2 == NULL) return(0);
1126 do {
1127 if (*str1++ != *str2) return(0);
1128 } while (*str2++);
1129 return(1);
1130}
1131
1132/**
1133 * xmlStrncmp:
1134 * @str1: the first xmlChar *
1135 * @str2: the second xmlChar *
1136 * @len: the max comparison length
1137 *
1138 * a strncmp for xmlChar's
1139 *
1140 * Returns the integer result of the comparison
1141 */
1142
1143int
1144xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1145 register int tmp;
1146
1147 if (len <= 0) return(0);
1148 if (str1 == str2) return(0);
1149 if (str1 == NULL) return(-1);
1150 if (str2 == NULL) return(1);
1151 do {
1152 tmp = *str1++ - *str2;
1153 if (tmp != 0 || --len == 0) return(tmp);
1154 } while (*str2++ != 0);
1155 return 0;
1156}
1157
1158static xmlChar casemap[256] = {
1159 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1160 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1161 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1162 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1163 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1164 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1165 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1166 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1167 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1168 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1169 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1170 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1171 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1172 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1173 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1174 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1175 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1176 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1177 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1178 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1179 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1180 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1181 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1182 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1183 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1184 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1185 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1186 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1187 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1188 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1189 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1190 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1191};
1192
1193/**
1194 * xmlStrcasecmp:
1195 * @str1: the first xmlChar *
1196 * @str2: the second xmlChar *
1197 *
1198 * a strcasecmp for xmlChar's
1199 *
1200 * Returns the integer result of the comparison
1201 */
1202
1203int
1204xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1205 register int tmp;
1206
1207 if (str1 == str2) return(0);
1208 if (str1 == NULL) return(-1);
1209 if (str2 == NULL) return(1);
1210 do {
1211 tmp = casemap[*str1++] - casemap[*str2];
1212 if (tmp != 0) return(tmp);
1213 } while (*str2++ != 0);
1214 return 0;
1215}
1216
1217/**
1218 * xmlStrncasecmp:
1219 * @str1: the first xmlChar *
1220 * @str2: the second xmlChar *
1221 * @len: the max comparison length
1222 *
1223 * a strncasecmp for xmlChar's
1224 *
1225 * Returns the integer result of the comparison
1226 */
1227
1228int
1229xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1230 register int tmp;
1231
1232 if (len <= 0) return(0);
1233 if (str1 == str2) return(0);
1234 if (str1 == NULL) return(-1);
1235 if (str2 == NULL) return(1);
1236 do {
1237 tmp = casemap[*str1++] - casemap[*str2];
1238 if (tmp != 0 || --len == 0) return(tmp);
1239 } while (*str2++ != 0);
1240 return 0;
1241}
1242
1243/**
1244 * xmlStrchr:
1245 * @str: the xmlChar * array
1246 * @val: the xmlChar to search
1247 *
1248 * a strchr for xmlChar's
1249 *
1250 * Returns the xmlChar * for the first occurence or NULL.
1251 */
1252
1253const xmlChar *
1254xmlStrchr(const xmlChar *str, xmlChar val) {
1255 if (str == NULL) return(NULL);
1256 while (*str != 0) { /* non input consuming */
1257 if (*str == val) return((xmlChar *) str);
1258 str++;
1259 }
1260 return(NULL);
1261}
1262
1263/**
1264 * xmlStrstr:
1265 * @str: the xmlChar * array (haystack)
1266 * @val: the xmlChar to search (needle)
1267 *
1268 * a strstr for xmlChar's
1269 *
1270 * Returns the xmlChar * for the first occurence or NULL.
1271 */
1272
1273const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001274xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001275 int n;
1276
1277 if (str == NULL) return(NULL);
1278 if (val == NULL) return(NULL);
1279 n = xmlStrlen(val);
1280
1281 if (n == 0) return(str);
1282 while (*str != 0) { /* non input consuming */
1283 if (*str == *val) {
1284 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1285 }
1286 str++;
1287 }
1288 return(NULL);
1289}
1290
1291/**
1292 * xmlStrcasestr:
1293 * @str: the xmlChar * array (haystack)
1294 * @val: the xmlChar to search (needle)
1295 *
1296 * a case-ignoring strstr for xmlChar's
1297 *
1298 * Returns the xmlChar * for the first occurence or NULL.
1299 */
1300
1301const xmlChar *
1302xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1303 int n;
1304
1305 if (str == NULL) return(NULL);
1306 if (val == NULL) return(NULL);
1307 n = xmlStrlen(val);
1308
1309 if (n == 0) return(str);
1310 while (*str != 0) { /* non input consuming */
1311 if (casemap[*str] == casemap[*val])
1312 if (!xmlStrncasecmp(str, val, n)) return(str);
1313 str++;
1314 }
1315 return(NULL);
1316}
1317
1318/**
1319 * xmlStrsub:
1320 * @str: the xmlChar * array (haystack)
1321 * @start: the index of the first char (zero based)
1322 * @len: the length of the substring
1323 *
1324 * Extract a substring of a given string
1325 *
1326 * Returns the xmlChar * for the first occurence or NULL.
1327 */
1328
1329xmlChar *
1330xmlStrsub(const xmlChar *str, int start, int len) {
1331 int i;
1332
1333 if (str == NULL) return(NULL);
1334 if (start < 0) return(NULL);
1335 if (len < 0) return(NULL);
1336
1337 for (i = 0;i < start;i++) {
1338 if (*str == 0) return(NULL);
1339 str++;
1340 }
1341 if (*str == 0) return(NULL);
1342 return(xmlStrndup(str, len));
1343}
1344
1345/**
1346 * xmlStrlen:
1347 * @str: the xmlChar * array
1348 *
1349 * length of a xmlChar's string
1350 *
1351 * Returns the number of xmlChar contained in the ARRAY.
1352 */
1353
1354int
1355xmlStrlen(const xmlChar *str) {
1356 int len = 0;
1357
1358 if (str == NULL) return(0);
1359 while (*str != 0) { /* non input consuming */
1360 str++;
1361 len++;
1362 }
1363 return(len);
1364}
1365
1366/**
1367 * xmlStrncat:
1368 * @cur: the original xmlChar * array
1369 * @add: the xmlChar * array added
1370 * @len: the length of @add
1371 *
1372 * a strncat for array of xmlChar's, it will extend cur with the len
1373 * first bytes of @add.
1374 *
1375 * Returns a new xmlChar *, the original @cur is reallocated if needed
1376 * and should not be freed
1377 */
1378
1379xmlChar *
1380xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1381 int size;
1382 xmlChar *ret;
1383
1384 if ((add == NULL) || (len == 0))
1385 return(cur);
1386 if (cur == NULL)
1387 return(xmlStrndup(add, len));
1388
1389 size = xmlStrlen(cur);
1390 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1391 if (ret == NULL) {
1392 xmlGenericError(xmlGenericErrorContext,
1393 "xmlStrncat: realloc of %ld byte failed\n",
1394 (size + len + 1) * (long)sizeof(xmlChar));
1395 return(cur);
1396 }
1397 memcpy(&ret[size], add, len * sizeof(xmlChar));
1398 ret[size + len] = 0;
1399 return(ret);
1400}
1401
1402/**
1403 * xmlStrcat:
1404 * @cur: the original xmlChar * array
1405 * @add: the xmlChar * array added
1406 *
1407 * a strcat for array of xmlChar's. Since they are supposed to be
1408 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1409 * a termination mark of '0'.
1410 *
1411 * Returns a new xmlChar * containing the concatenated string.
1412 */
1413xmlChar *
1414xmlStrcat(xmlChar *cur, const xmlChar *add) {
1415 const xmlChar *p = add;
1416
1417 if (add == NULL) return(cur);
1418 if (cur == NULL)
1419 return(xmlStrdup(add));
1420
1421 while (*p != 0) p++; /* non input consuming */
1422 return(xmlStrncat(cur, add, p - add));
1423}
1424
1425/************************************************************************
1426 * *
1427 * Commodity functions, cleanup needed ? *
1428 * *
1429 ************************************************************************/
1430
1431/**
1432 * areBlanks:
1433 * @ctxt: an XML parser context
1434 * @str: a xmlChar *
1435 * @len: the size of @str
1436 *
1437 * Is this a sequence of blank chars that one can ignore ?
1438 *
1439 * Returns 1 if ignorable 0 otherwise.
1440 */
1441
1442static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1443 int i, ret;
1444 xmlNodePtr lastChild;
1445
Daniel Veillard2f362242001-03-02 17:36:21 +00001446 if (ctxt->keepBlanks)
1447 return(0);
1448
Owen Taylor3473f882001-02-23 17:55:21 +00001449 /*
1450 * Check for xml:space value.
1451 */
1452 if (*(ctxt->space) == 1)
1453 return(0);
1454
1455 /*
1456 * Check that the string is made of blanks
1457 */
1458 for (i = 0;i < len;i++)
1459 if (!(IS_BLANK(str[i]))) return(0);
1460
1461 /*
1462 * Look if the element is mixed content in the Dtd if available
1463 */
1464 if (ctxt->myDoc != NULL) {
1465 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1466 if (ret == 0) return(1);
1467 if (ret == 1) return(0);
1468 }
1469
1470 /*
1471 * Otherwise, heuristic :-\
1472 */
Owen Taylor3473f882001-02-23 17:55:21 +00001473 if (RAW != '<') return(0);
1474 if (ctxt->node == NULL) return(0);
1475 if ((ctxt->node->children == NULL) &&
1476 (RAW == '<') && (NXT(1) == '/')) return(0);
1477
1478 lastChild = xmlGetLastChild(ctxt->node);
1479 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001480 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1481 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001482 } else if (xmlNodeIsText(lastChild))
1483 return(0);
1484 else if ((ctxt->node->children != NULL) &&
1485 (xmlNodeIsText(ctxt->node->children)))
1486 return(0);
1487 return(1);
1488}
1489
1490/*
1491 * Forward definition for recusive behaviour.
1492 */
1493void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1494void xmlParseReference(xmlParserCtxtPtr ctxt);
1495
1496/************************************************************************
1497 * *
1498 * Extra stuff for namespace support *
1499 * Relates to http://www.w3.org/TR/WD-xml-names *
1500 * *
1501 ************************************************************************/
1502
1503/**
1504 * xmlSplitQName:
1505 * @ctxt: an XML parser context
1506 * @name: an XML parser context
1507 * @prefix: a xmlChar **
1508 *
1509 * parse an UTF8 encoded XML qualified name string
1510 *
1511 * [NS 5] QName ::= (Prefix ':')? LocalPart
1512 *
1513 * [NS 6] Prefix ::= NCName
1514 *
1515 * [NS 7] LocalPart ::= NCName
1516 *
1517 * Returns the local part, and prefix is updated
1518 * to get the Prefix if any.
1519 */
1520
1521xmlChar *
1522xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1523 xmlChar buf[XML_MAX_NAMELEN + 5];
1524 xmlChar *buffer = NULL;
1525 int len = 0;
1526 int max = XML_MAX_NAMELEN;
1527 xmlChar *ret = NULL;
1528 const xmlChar *cur = name;
1529 int c;
1530
1531 *prefix = NULL;
1532
1533#ifndef XML_XML_NAMESPACE
1534 /* xml: prefix is not really a namespace */
1535 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1536 (cur[2] == 'l') && (cur[3] == ':'))
1537 return(xmlStrdup(name));
1538#endif
1539
1540 /* nasty but valid */
1541 if (cur[0] == ':')
1542 return(xmlStrdup(name));
1543
1544 c = *cur++;
1545 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1546 buf[len++] = c;
1547 c = *cur++;
1548 }
1549 if (len >= max) {
1550 /*
1551 * Okay someone managed to make a huge name, so he's ready to pay
1552 * for the processing speed.
1553 */
1554 max = len * 2;
1555
1556 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1557 if (buffer == NULL) {
1558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1559 ctxt->sax->error(ctxt->userData,
1560 "xmlSplitQName: out of memory\n");
1561 return(NULL);
1562 }
1563 memcpy(buffer, buf, len);
1564 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1565 if (len + 10 > max) {
1566 max *= 2;
1567 buffer = (xmlChar *) xmlRealloc(buffer,
1568 max * sizeof(xmlChar));
1569 if (buffer == NULL) {
1570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1571 ctxt->sax->error(ctxt->userData,
1572 "xmlSplitQName: out of memory\n");
1573 return(NULL);
1574 }
1575 }
1576 buffer[len++] = c;
1577 c = *cur++;
1578 }
1579 buffer[len] = 0;
1580 }
1581
1582 if (buffer == NULL)
1583 ret = xmlStrndup(buf, len);
1584 else {
1585 ret = buffer;
1586 buffer = NULL;
1587 max = XML_MAX_NAMELEN;
1588 }
1589
1590
1591 if (c == ':') {
1592 c = *cur++;
1593 if (c == 0) return(ret);
1594 *prefix = ret;
1595 len = 0;
1596
1597 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1598 buf[len++] = c;
1599 c = *cur++;
1600 }
1601 if (len >= max) {
1602 /*
1603 * Okay someone managed to make a huge name, so he's ready to pay
1604 * for the processing speed.
1605 */
1606 max = len * 2;
1607
1608 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1609 if (buffer == NULL) {
1610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1611 ctxt->sax->error(ctxt->userData,
1612 "xmlSplitQName: out of memory\n");
1613 return(NULL);
1614 }
1615 memcpy(buffer, buf, len);
1616 while (c != 0) { /* tested bigname2.xml */
1617 if (len + 10 > max) {
1618 max *= 2;
1619 buffer = (xmlChar *) xmlRealloc(buffer,
1620 max * sizeof(xmlChar));
1621 if (buffer == NULL) {
1622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1623 ctxt->sax->error(ctxt->userData,
1624 "xmlSplitQName: out of memory\n");
1625 return(NULL);
1626 }
1627 }
1628 buffer[len++] = c;
1629 c = *cur++;
1630 }
1631 buffer[len] = 0;
1632 }
1633
1634 if (buffer == NULL)
1635 ret = xmlStrndup(buf, len);
1636 else {
1637 ret = buffer;
1638 }
1639 }
1640
1641 return(ret);
1642}
1643
1644/************************************************************************
1645 * *
1646 * The parser itself *
1647 * Relates to http://www.w3.org/TR/REC-xml *
1648 * *
1649 ************************************************************************/
1650
Daniel Veillard76d66f42001-05-16 21:05:17 +00001651static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001652/**
1653 * xmlParseName:
1654 * @ctxt: an XML parser context
1655 *
1656 * parse an XML name.
1657 *
1658 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1659 * CombiningChar | Extender
1660 *
1661 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1662 *
1663 * [6] Names ::= Name (S Name)*
1664 *
1665 * Returns the Name parsed or NULL
1666 */
1667
1668xmlChar *
1669xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001670 const xmlChar *in;
1671 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001672 int count = 0;
1673
1674 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001675
1676 /*
1677 * Accelerator for simple ASCII names
1678 */
1679 in = ctxt->input->cur;
1680 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1681 ((*in >= 0x41) && (*in <= 0x5A)) ||
1682 (*in == '_') || (*in == ':')) {
1683 in++;
1684 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1685 ((*in >= 0x41) && (*in <= 0x5A)) ||
1686 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001687 (*in == '_') || (*in == '-') ||
1688 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001689 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001690 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001691 count = in - ctxt->input->cur;
1692 ret = xmlStrndup(ctxt->input->cur, count);
1693 ctxt->input->cur = in;
1694 return(ret);
1695 }
1696 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001697 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001698}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001699
Daniel Veillard76d66f42001-05-16 21:05:17 +00001700static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001701xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1702 xmlChar buf[XML_MAX_NAMELEN + 5];
1703 int len = 0, l;
1704 int c;
1705 int count = 0;
1706
1707 /*
1708 * Handler for more complex cases
1709 */
1710 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001711 c = CUR_CHAR(l);
1712 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1713 (!IS_LETTER(c) && (c != '_') &&
1714 (c != ':'))) {
1715 return(NULL);
1716 }
1717
1718 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1719 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1720 (c == '.') || (c == '-') ||
1721 (c == '_') || (c == ':') ||
1722 (IS_COMBINING(c)) ||
1723 (IS_EXTENDER(c)))) {
1724 if (count++ > 100) {
1725 count = 0;
1726 GROW;
1727 }
1728 COPY_BUF(l,buf,len,c);
1729 NEXTL(l);
1730 c = CUR_CHAR(l);
1731 if (len >= XML_MAX_NAMELEN) {
1732 /*
1733 * Okay someone managed to make a huge name, so he's ready to pay
1734 * for the processing speed.
1735 */
1736 xmlChar *buffer;
1737 int max = len * 2;
1738
1739 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1740 if (buffer == NULL) {
1741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1742 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001743 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001744 return(NULL);
1745 }
1746 memcpy(buffer, buf, len);
1747 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1748 (c == '.') || (c == '-') ||
1749 (c == '_') || (c == ':') ||
1750 (IS_COMBINING(c)) ||
1751 (IS_EXTENDER(c))) {
1752 if (count++ > 100) {
1753 count = 0;
1754 GROW;
1755 }
1756 if (len + 10 > max) {
1757 max *= 2;
1758 buffer = (xmlChar *) xmlRealloc(buffer,
1759 max * sizeof(xmlChar));
1760 if (buffer == NULL) {
1761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1762 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001763 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001764 return(NULL);
1765 }
1766 }
1767 COPY_BUF(l,buffer,len,c);
1768 NEXTL(l);
1769 c = CUR_CHAR(l);
1770 }
1771 buffer[len] = 0;
1772 return(buffer);
1773 }
1774 }
1775 return(xmlStrndup(buf, len));
1776}
1777
1778/**
1779 * xmlParseStringName:
1780 * @ctxt: an XML parser context
1781 * @str: a pointer to the string pointer (IN/OUT)
1782 *
1783 * parse an XML name.
1784 *
1785 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1786 * CombiningChar | Extender
1787 *
1788 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1789 *
1790 * [6] Names ::= Name (S Name)*
1791 *
1792 * Returns the Name parsed or NULL. The str pointer
1793 * is updated to the current location in the string.
1794 */
1795
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001796static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001797xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1798 xmlChar buf[XML_MAX_NAMELEN + 5];
1799 const xmlChar *cur = *str;
1800 int len = 0, l;
1801 int c;
1802
1803 c = CUR_SCHAR(cur, l);
1804 if (!IS_LETTER(c) && (c != '_') &&
1805 (c != ':')) {
1806 return(NULL);
1807 }
1808
1809 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1810 (c == '.') || (c == '-') ||
1811 (c == '_') || (c == ':') ||
1812 (IS_COMBINING(c)) ||
1813 (IS_EXTENDER(c))) {
1814 COPY_BUF(l,buf,len,c);
1815 cur += l;
1816 c = CUR_SCHAR(cur, l);
1817 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1818 /*
1819 * Okay someone managed to make a huge name, so he's ready to pay
1820 * for the processing speed.
1821 */
1822 xmlChar *buffer;
1823 int max = len * 2;
1824
1825 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1826 if (buffer == NULL) {
1827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1828 ctxt->sax->error(ctxt->userData,
1829 "xmlParseStringName: out of memory\n");
1830 return(NULL);
1831 }
1832 memcpy(buffer, buf, len);
1833 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1834 (c == '.') || (c == '-') ||
1835 (c == '_') || (c == ':') ||
1836 (IS_COMBINING(c)) ||
1837 (IS_EXTENDER(c))) {
1838 if (len + 10 > max) {
1839 max *= 2;
1840 buffer = (xmlChar *) xmlRealloc(buffer,
1841 max * sizeof(xmlChar));
1842 if (buffer == NULL) {
1843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1844 ctxt->sax->error(ctxt->userData,
1845 "xmlParseStringName: out of memory\n");
1846 return(NULL);
1847 }
1848 }
1849 COPY_BUF(l,buffer,len,c);
1850 cur += l;
1851 c = CUR_SCHAR(cur, l);
1852 }
1853 buffer[len] = 0;
1854 *str = cur;
1855 return(buffer);
1856 }
1857 }
1858 *str = cur;
1859 return(xmlStrndup(buf, len));
1860}
1861
1862/**
1863 * xmlParseNmtoken:
1864 * @ctxt: an XML parser context
1865 *
1866 * parse an XML Nmtoken.
1867 *
1868 * [7] Nmtoken ::= (NameChar)+
1869 *
1870 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1871 *
1872 * Returns the Nmtoken parsed or NULL
1873 */
1874
1875xmlChar *
1876xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1877 xmlChar buf[XML_MAX_NAMELEN + 5];
1878 int len = 0, l;
1879 int c;
1880 int count = 0;
1881
1882 GROW;
1883 c = CUR_CHAR(l);
1884
1885 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1886 (c == '.') || (c == '-') ||
1887 (c == '_') || (c == ':') ||
1888 (IS_COMBINING(c)) ||
1889 (IS_EXTENDER(c))) {
1890 if (count++ > 100) {
1891 count = 0;
1892 GROW;
1893 }
1894 COPY_BUF(l,buf,len,c);
1895 NEXTL(l);
1896 c = CUR_CHAR(l);
1897 if (len >= XML_MAX_NAMELEN) {
1898 /*
1899 * Okay someone managed to make a huge token, so he's ready to pay
1900 * for the processing speed.
1901 */
1902 xmlChar *buffer;
1903 int max = len * 2;
1904
1905 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1906 if (buffer == NULL) {
1907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1908 ctxt->sax->error(ctxt->userData,
1909 "xmlParseNmtoken: out of memory\n");
1910 return(NULL);
1911 }
1912 memcpy(buffer, buf, len);
1913 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1914 (c == '.') || (c == '-') ||
1915 (c == '_') || (c == ':') ||
1916 (IS_COMBINING(c)) ||
1917 (IS_EXTENDER(c))) {
1918 if (count++ > 100) {
1919 count = 0;
1920 GROW;
1921 }
1922 if (len + 10 > max) {
1923 max *= 2;
1924 buffer = (xmlChar *) xmlRealloc(buffer,
1925 max * sizeof(xmlChar));
1926 if (buffer == NULL) {
1927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1928 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001929 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001930 return(NULL);
1931 }
1932 }
1933 COPY_BUF(l,buffer,len,c);
1934 NEXTL(l);
1935 c = CUR_CHAR(l);
1936 }
1937 buffer[len] = 0;
1938 return(buffer);
1939 }
1940 }
1941 if (len == 0)
1942 return(NULL);
1943 return(xmlStrndup(buf, len));
1944}
1945
1946/**
1947 * xmlParseEntityValue:
1948 * @ctxt: an XML parser context
1949 * @orig: if non-NULL store a copy of the original entity value
1950 *
1951 * parse a value for ENTITY declarations
1952 *
1953 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1954 * "'" ([^%&'] | PEReference | Reference)* "'"
1955 *
1956 * Returns the EntityValue parsed with reference substitued or NULL
1957 */
1958
1959xmlChar *
1960xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1961 xmlChar *buf = NULL;
1962 int len = 0;
1963 int size = XML_PARSER_BUFFER_SIZE;
1964 int c, l;
1965 xmlChar stop;
1966 xmlChar *ret = NULL;
1967 const xmlChar *cur = NULL;
1968 xmlParserInputPtr input;
1969
1970 if (RAW == '"') stop = '"';
1971 else if (RAW == '\'') stop = '\'';
1972 else {
1973 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1975 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1976 ctxt->wellFormed = 0;
1977 ctxt->disableSAX = 1;
1978 return(NULL);
1979 }
1980 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1981 if (buf == NULL) {
1982 xmlGenericError(xmlGenericErrorContext,
1983 "malloc of %d byte failed\n", size);
1984 return(NULL);
1985 }
1986
1987 /*
1988 * The content of the entity definition is copied in a buffer.
1989 */
1990
1991 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1992 input = ctxt->input;
1993 GROW;
1994 NEXT;
1995 c = CUR_CHAR(l);
1996 /*
1997 * NOTE: 4.4.5 Included in Literal
1998 * When a parameter entity reference appears in a literal entity
1999 * value, ... a single or double quote character in the replacement
2000 * text is always treated as a normal data character and will not
2001 * terminate the literal.
2002 * In practice it means we stop the loop only when back at parsing
2003 * the initial entity and the quote is found
2004 */
2005 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2006 (ctxt->input != input))) {
2007 if (len + 5 >= size) {
2008 size *= 2;
2009 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2010 if (buf == NULL) {
2011 xmlGenericError(xmlGenericErrorContext,
2012 "realloc of %d byte failed\n", size);
2013 return(NULL);
2014 }
2015 }
2016 COPY_BUF(l,buf,len,c);
2017 NEXTL(l);
2018 /*
2019 * Pop-up of finished entities.
2020 */
2021 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2022 xmlPopInput(ctxt);
2023
2024 GROW;
2025 c = CUR_CHAR(l);
2026 if (c == 0) {
2027 GROW;
2028 c = CUR_CHAR(l);
2029 }
2030 }
2031 buf[len] = 0;
2032
2033 /*
2034 * Raise problem w.r.t. '&' and '%' being used in non-entities
2035 * reference constructs. Note Charref will be handled in
2036 * xmlStringDecodeEntities()
2037 */
2038 cur = buf;
2039 while (*cur != 0) { /* non input consuming */
2040 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2041 xmlChar *name;
2042 xmlChar tmp = *cur;
2043
2044 cur++;
2045 name = xmlParseStringName(ctxt, &cur);
2046 if ((name == NULL) || (*cur != ';')) {
2047 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2049 ctxt->sax->error(ctxt->userData,
2050 "EntityValue: '%c' forbidden except for entities references\n",
2051 tmp);
2052 ctxt->wellFormed = 0;
2053 ctxt->disableSAX = 1;
2054 }
2055 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2056 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2058 ctxt->sax->error(ctxt->userData,
2059 "EntityValue: PEReferences forbidden in internal subset\n",
2060 tmp);
2061 ctxt->wellFormed = 0;
2062 ctxt->disableSAX = 1;
2063 }
2064 if (name != NULL)
2065 xmlFree(name);
2066 }
2067 cur++;
2068 }
2069
2070 /*
2071 * Then PEReference entities are substituted.
2072 */
2073 if (c != stop) {
2074 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2076 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2077 ctxt->wellFormed = 0;
2078 ctxt->disableSAX = 1;
2079 xmlFree(buf);
2080 } else {
2081 NEXT;
2082 /*
2083 * NOTE: 4.4.7 Bypassed
2084 * When a general entity reference appears in the EntityValue in
2085 * an entity declaration, it is bypassed and left as is.
2086 * so XML_SUBSTITUTE_REF is not set here.
2087 */
2088 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2089 0, 0, 0);
2090 if (orig != NULL)
2091 *orig = buf;
2092 else
2093 xmlFree(buf);
2094 }
2095
2096 return(ret);
2097}
2098
2099/**
2100 * xmlParseAttValue:
2101 * @ctxt: an XML parser context
2102 *
2103 * parse a value for an attribute
2104 * Note: the parser won't do substitution of entities here, this
2105 * will be handled later in xmlStringGetNodeList
2106 *
2107 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2108 * "'" ([^<&'] | Reference)* "'"
2109 *
2110 * 3.3.3 Attribute-Value Normalization:
2111 * Before the value of an attribute is passed to the application or
2112 * checked for validity, the XML processor must normalize it as follows:
2113 * - a character reference is processed by appending the referenced
2114 * character to the attribute value
2115 * - an entity reference is processed by recursively processing the
2116 * replacement text of the entity
2117 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2118 * appending #x20 to the normalized value, except that only a single
2119 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2120 * parsed entity or the literal entity value of an internal parsed entity
2121 * - other characters are processed by appending them to the normalized value
2122 * If the declared value is not CDATA, then the XML processor must further
2123 * process the normalized attribute value by discarding any leading and
2124 * trailing space (#x20) characters, and by replacing sequences of space
2125 * (#x20) characters by a single space (#x20) character.
2126 * All attributes for which no declaration has been read should be treated
2127 * by a non-validating parser as if declared CDATA.
2128 *
2129 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2130 */
2131
2132xmlChar *
2133xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2134 xmlChar limit = 0;
2135 xmlChar *buf = NULL;
2136 int len = 0;
2137 int buf_size = 0;
2138 int c, l;
2139 xmlChar *current = NULL;
2140 xmlEntityPtr ent;
2141
2142
2143 SHRINK;
2144 if (NXT(0) == '"') {
2145 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2146 limit = '"';
2147 NEXT;
2148 } else if (NXT(0) == '\'') {
2149 limit = '\'';
2150 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2151 NEXT;
2152 } else {
2153 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2154 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2155 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2156 ctxt->wellFormed = 0;
2157 ctxt->disableSAX = 1;
2158 return(NULL);
2159 }
2160
2161 /*
2162 * allocate a translation buffer.
2163 */
2164 buf_size = XML_PARSER_BUFFER_SIZE;
2165 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2166 if (buf == NULL) {
2167 perror("xmlParseAttValue: malloc failed");
2168 return(NULL);
2169 }
2170
2171 /*
2172 * Ok loop until we reach one of the ending char or a size limit.
2173 */
2174 c = CUR_CHAR(l);
2175 while (((NXT(0) != limit) && /* checked */
2176 (c != '<')) || (ctxt->token != 0)) {
2177 if (c == 0) break;
2178 if (ctxt->token == '&') {
2179 /*
2180 * The reparsing will be done in xmlStringGetNodeList()
2181 * called by the attribute() function in SAX.c
2182 */
2183 static xmlChar buffer[6] = "&#38;";
2184
2185 if (len > buf_size - 10) {
2186 growBuffer(buf);
2187 }
2188 current = &buffer[0];
2189 while (*current != 0) { /* non input consuming */
2190 buf[len++] = *current++;
2191 }
2192 ctxt->token = 0;
2193 } else if (c == '&') {
2194 if (NXT(1) == '#') {
2195 int val = xmlParseCharRef(ctxt);
2196 if (val == '&') {
2197 /*
2198 * The reparsing will be done in xmlStringGetNodeList()
2199 * called by the attribute() function in SAX.c
2200 */
2201 static xmlChar buffer[6] = "&#38;";
2202
2203 if (len > buf_size - 10) {
2204 growBuffer(buf);
2205 }
2206 current = &buffer[0];
2207 while (*current != 0) { /* non input consuming */
2208 buf[len++] = *current++;
2209 }
2210 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002211 if (len > buf_size - 10) {
2212 growBuffer(buf);
2213 }
Owen Taylor3473f882001-02-23 17:55:21 +00002214 len += xmlCopyChar(0, &buf[len], val);
2215 }
2216 } else {
2217 ent = xmlParseEntityRef(ctxt);
2218 if ((ent != NULL) &&
2219 (ctxt->replaceEntities != 0)) {
2220 xmlChar *rep;
2221
2222 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2223 rep = xmlStringDecodeEntities(ctxt, ent->content,
2224 XML_SUBSTITUTE_REF, 0, 0, 0);
2225 if (rep != NULL) {
2226 current = rep;
2227 while (*current != 0) { /* non input consuming */
2228 buf[len++] = *current++;
2229 if (len > buf_size - 10) {
2230 growBuffer(buf);
2231 }
2232 }
2233 xmlFree(rep);
2234 }
2235 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002236 if (len > buf_size - 10) {
2237 growBuffer(buf);
2238 }
Owen Taylor3473f882001-02-23 17:55:21 +00002239 if (ent->content != NULL)
2240 buf[len++] = ent->content[0];
2241 }
2242 } else if (ent != NULL) {
2243 int i = xmlStrlen(ent->name);
2244 const xmlChar *cur = ent->name;
2245
2246 /*
2247 * This may look absurd but is needed to detect
2248 * entities problems
2249 */
2250 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2251 (ent->content != NULL)) {
2252 xmlChar *rep;
2253 rep = xmlStringDecodeEntities(ctxt, ent->content,
2254 XML_SUBSTITUTE_REF, 0, 0, 0);
2255 if (rep != NULL)
2256 xmlFree(rep);
2257 }
2258
2259 /*
2260 * Just output the reference
2261 */
2262 buf[len++] = '&';
2263 if (len > buf_size - i - 10) {
2264 growBuffer(buf);
2265 }
2266 for (;i > 0;i--)
2267 buf[len++] = *cur++;
2268 buf[len++] = ';';
2269 }
2270 }
2271 } else {
2272 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2273 COPY_BUF(l,buf,len,0x20);
2274 if (len > buf_size - 10) {
2275 growBuffer(buf);
2276 }
2277 } else {
2278 COPY_BUF(l,buf,len,c);
2279 if (len > buf_size - 10) {
2280 growBuffer(buf);
2281 }
2282 }
2283 NEXTL(l);
2284 }
2285 GROW;
2286 c = CUR_CHAR(l);
2287 }
2288 buf[len++] = 0;
2289 if (RAW == '<') {
2290 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2292 ctxt->sax->error(ctxt->userData,
2293 "Unescaped '<' not allowed in attributes values\n");
2294 ctxt->wellFormed = 0;
2295 ctxt->disableSAX = 1;
2296 } else if (RAW != limit) {
2297 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2298 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2299 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2300 ctxt->wellFormed = 0;
2301 ctxt->disableSAX = 1;
2302 } else
2303 NEXT;
2304 return(buf);
2305}
2306
2307/**
2308 * xmlParseSystemLiteral:
2309 * @ctxt: an XML parser context
2310 *
2311 * parse an XML Literal
2312 *
2313 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2314 *
2315 * Returns the SystemLiteral parsed or NULL
2316 */
2317
2318xmlChar *
2319xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2320 xmlChar *buf = NULL;
2321 int len = 0;
2322 int size = XML_PARSER_BUFFER_SIZE;
2323 int cur, l;
2324 xmlChar stop;
2325 int state = ctxt->instate;
2326 int count = 0;
2327
2328 SHRINK;
2329 if (RAW == '"') {
2330 NEXT;
2331 stop = '"';
2332 } else if (RAW == '\'') {
2333 NEXT;
2334 stop = '\'';
2335 } else {
2336 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2338 ctxt->sax->error(ctxt->userData,
2339 "SystemLiteral \" or ' expected\n");
2340 ctxt->wellFormed = 0;
2341 ctxt->disableSAX = 1;
2342 return(NULL);
2343 }
2344
2345 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2346 if (buf == NULL) {
2347 xmlGenericError(xmlGenericErrorContext,
2348 "malloc of %d byte failed\n", size);
2349 return(NULL);
2350 }
2351 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2352 cur = CUR_CHAR(l);
2353 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2354 if (len + 5 >= size) {
2355 size *= 2;
2356 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2357 if (buf == NULL) {
2358 xmlGenericError(xmlGenericErrorContext,
2359 "realloc of %d byte failed\n", size);
2360 ctxt->instate = (xmlParserInputState) state;
2361 return(NULL);
2362 }
2363 }
2364 count++;
2365 if (count > 50) {
2366 GROW;
2367 count = 0;
2368 }
2369 COPY_BUF(l,buf,len,cur);
2370 NEXTL(l);
2371 cur = CUR_CHAR(l);
2372 if (cur == 0) {
2373 GROW;
2374 SHRINK;
2375 cur = CUR_CHAR(l);
2376 }
2377 }
2378 buf[len] = 0;
2379 ctxt->instate = (xmlParserInputState) state;
2380 if (!IS_CHAR(cur)) {
2381 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2383 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2384 ctxt->wellFormed = 0;
2385 ctxt->disableSAX = 1;
2386 } else {
2387 NEXT;
2388 }
2389 return(buf);
2390}
2391
2392/**
2393 * xmlParsePubidLiteral:
2394 * @ctxt: an XML parser context
2395 *
2396 * parse an XML public literal
2397 *
2398 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2399 *
2400 * Returns the PubidLiteral parsed or NULL.
2401 */
2402
2403xmlChar *
2404xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2405 xmlChar *buf = NULL;
2406 int len = 0;
2407 int size = XML_PARSER_BUFFER_SIZE;
2408 xmlChar cur;
2409 xmlChar stop;
2410 int count = 0;
2411
2412 SHRINK;
2413 if (RAW == '"') {
2414 NEXT;
2415 stop = '"';
2416 } else if (RAW == '\'') {
2417 NEXT;
2418 stop = '\'';
2419 } else {
2420 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2421 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2422 ctxt->sax->error(ctxt->userData,
2423 "SystemLiteral \" or ' expected\n");
2424 ctxt->wellFormed = 0;
2425 ctxt->disableSAX = 1;
2426 return(NULL);
2427 }
2428 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2429 if (buf == NULL) {
2430 xmlGenericError(xmlGenericErrorContext,
2431 "malloc of %d byte failed\n", size);
2432 return(NULL);
2433 }
2434 cur = CUR;
2435 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2436 if (len + 1 >= size) {
2437 size *= 2;
2438 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2439 if (buf == NULL) {
2440 xmlGenericError(xmlGenericErrorContext,
2441 "realloc of %d byte failed\n", size);
2442 return(NULL);
2443 }
2444 }
2445 buf[len++] = cur;
2446 count++;
2447 if (count > 50) {
2448 GROW;
2449 count = 0;
2450 }
2451 NEXT;
2452 cur = CUR;
2453 if (cur == 0) {
2454 GROW;
2455 SHRINK;
2456 cur = CUR;
2457 }
2458 }
2459 buf[len] = 0;
2460 if (cur != stop) {
2461 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2463 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2464 ctxt->wellFormed = 0;
2465 ctxt->disableSAX = 1;
2466 } else {
2467 NEXT;
2468 }
2469 return(buf);
2470}
2471
Daniel Veillard48b2f892001-02-25 16:11:03 +00002472void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002473/**
2474 * xmlParseCharData:
2475 * @ctxt: an XML parser context
2476 * @cdata: int indicating whether we are within a CDATA section
2477 *
2478 * parse a CharData section.
2479 * if we are within a CDATA section ']]>' marks an end of section.
2480 *
2481 * The right angle bracket (>) may be represented using the string "&gt;",
2482 * and must, for compatibility, be escaped using "&gt;" or a character
2483 * reference when it appears in the string "]]>" in content, when that
2484 * string is not marking the end of a CDATA section.
2485 *
2486 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2487 */
2488
2489void
2490xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002491 const xmlChar *in;
2492 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002493 int line = ctxt->input->line;
2494 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002495
2496 SHRINK;
2497 GROW;
2498 /*
2499 * Accelerated common case where input don't need to be
2500 * modified before passing it to the handler.
2501 */
2502 if ((ctxt->token == 0) && (!cdata)) {
2503 in = ctxt->input->cur;
2504 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002505get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002506 while (((*in >= 0x20) && (*in != '<') &&
2507 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2508 in++;
2509 if (*in == 0xA) {
2510 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002511 in++;
2512 while (*in == 0xA) {
2513 ctxt->input->line++;
2514 in++;
2515 }
2516 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002517 }
2518 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002519 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002520 if (IS_BLANK(*ctxt->input->cur)) {
2521 const xmlChar *tmp = ctxt->input->cur;
2522 ctxt->input->cur = in;
2523 if (areBlanks(ctxt, tmp, nbchar)) {
2524 if (ctxt->sax->ignorableWhitespace != NULL)
2525 ctxt->sax->ignorableWhitespace(ctxt->userData,
2526 tmp, nbchar);
2527 } else {
2528 if (ctxt->sax->characters != NULL)
2529 ctxt->sax->characters(ctxt->userData,
2530 tmp, nbchar);
2531 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002532 line = ctxt->input->line;
2533 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002534 } else {
2535 if (ctxt->sax->characters != NULL)
2536 ctxt->sax->characters(ctxt->userData,
2537 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002538 line = ctxt->input->line;
2539 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002540 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002541 }
2542 ctxt->input->cur = in;
2543 if (*in == 0xD) {
2544 in++;
2545 if (*in == 0xA) {
2546 ctxt->input->cur = in;
2547 in++;
2548 ctxt->input->line++;
2549 continue; /* while */
2550 }
2551 in--;
2552 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002553 if (*in == '<') {
2554 return;
2555 }
2556 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002557 return;
2558 }
2559 SHRINK;
2560 GROW;
2561 in = ctxt->input->cur;
2562 } while ((*in >= 0x20) && (*in <= 0x7F));
2563 nbchar = 0;
2564 }
Daniel Veillard50582112001-03-26 22:52:16 +00002565 ctxt->input->line = line;
2566 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002567 xmlParseCharDataComplex(ctxt, cdata);
2568}
2569
2570void
2571xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002572 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2573 int nbchar = 0;
2574 int cur, l;
2575 int count = 0;
2576
2577 SHRINK;
2578 GROW;
2579 cur = CUR_CHAR(l);
2580 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2581 ((cur != '&') || (ctxt->token == '&')) &&
2582 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2583 if ((cur == ']') && (NXT(1) == ']') &&
2584 (NXT(2) == '>')) {
2585 if (cdata) break;
2586 else {
2587 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2589 ctxt->sax->error(ctxt->userData,
2590 "Sequence ']]>' not allowed in content\n");
2591 /* Should this be relaxed ??? I see a "must here */
2592 ctxt->wellFormed = 0;
2593 ctxt->disableSAX = 1;
2594 }
2595 }
2596 COPY_BUF(l,buf,nbchar,cur);
2597 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2598 /*
2599 * Ok the segment is to be consumed as chars.
2600 */
2601 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2602 if (areBlanks(ctxt, buf, nbchar)) {
2603 if (ctxt->sax->ignorableWhitespace != NULL)
2604 ctxt->sax->ignorableWhitespace(ctxt->userData,
2605 buf, nbchar);
2606 } else {
2607 if (ctxt->sax->characters != NULL)
2608 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2609 }
2610 }
2611 nbchar = 0;
2612 }
2613 count++;
2614 if (count > 50) {
2615 GROW;
2616 count = 0;
2617 }
2618 NEXTL(l);
2619 cur = CUR_CHAR(l);
2620 }
2621 if (nbchar != 0) {
2622 /*
2623 * Ok the segment is to be consumed as chars.
2624 */
2625 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2626 if (areBlanks(ctxt, buf, nbchar)) {
2627 if (ctxt->sax->ignorableWhitespace != NULL)
2628 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2629 } else {
2630 if (ctxt->sax->characters != NULL)
2631 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2632 }
2633 }
2634 }
2635}
2636
2637/**
2638 * xmlParseExternalID:
2639 * @ctxt: an XML parser context
2640 * @publicID: a xmlChar** receiving PubidLiteral
2641 * @strict: indicate whether we should restrict parsing to only
2642 * production [75], see NOTE below
2643 *
2644 * Parse an External ID or a Public ID
2645 *
2646 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2647 * 'PUBLIC' S PubidLiteral S SystemLiteral
2648 *
2649 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2650 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2651 *
2652 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2653 *
2654 * Returns the function returns SystemLiteral and in the second
2655 * case publicID receives PubidLiteral, is strict is off
2656 * it is possible to return NULL and have publicID set.
2657 */
2658
2659xmlChar *
2660xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2661 xmlChar *URI = NULL;
2662
2663 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002664
2665 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002666 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2667 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2668 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2669 SKIP(6);
2670 if (!IS_BLANK(CUR)) {
2671 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2673 ctxt->sax->error(ctxt->userData,
2674 "Space required after 'SYSTEM'\n");
2675 ctxt->wellFormed = 0;
2676 ctxt->disableSAX = 1;
2677 }
2678 SKIP_BLANKS;
2679 URI = xmlParseSystemLiteral(ctxt);
2680 if (URI == NULL) {
2681 ctxt->errNo = XML_ERR_URI_REQUIRED;
2682 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2683 ctxt->sax->error(ctxt->userData,
2684 "xmlParseExternalID: SYSTEM, no URI\n");
2685 ctxt->wellFormed = 0;
2686 ctxt->disableSAX = 1;
2687 }
2688 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2689 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2690 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2691 SKIP(6);
2692 if (!IS_BLANK(CUR)) {
2693 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2695 ctxt->sax->error(ctxt->userData,
2696 "Space required after 'PUBLIC'\n");
2697 ctxt->wellFormed = 0;
2698 ctxt->disableSAX = 1;
2699 }
2700 SKIP_BLANKS;
2701 *publicID = xmlParsePubidLiteral(ctxt);
2702 if (*publicID == NULL) {
2703 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2705 ctxt->sax->error(ctxt->userData,
2706 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2707 ctxt->wellFormed = 0;
2708 ctxt->disableSAX = 1;
2709 }
2710 if (strict) {
2711 /*
2712 * We don't handle [83] so "S SystemLiteral" is required.
2713 */
2714 if (!IS_BLANK(CUR)) {
2715 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2717 ctxt->sax->error(ctxt->userData,
2718 "Space required after the Public Identifier\n");
2719 ctxt->wellFormed = 0;
2720 ctxt->disableSAX = 1;
2721 }
2722 } else {
2723 /*
2724 * We handle [83] so we return immediately, if
2725 * "S SystemLiteral" is not detected. From a purely parsing
2726 * point of view that's a nice mess.
2727 */
2728 const xmlChar *ptr;
2729 GROW;
2730
2731 ptr = CUR_PTR;
2732 if (!IS_BLANK(*ptr)) return(NULL);
2733
2734 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2735 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2736 }
2737 SKIP_BLANKS;
2738 URI = xmlParseSystemLiteral(ctxt);
2739 if (URI == NULL) {
2740 ctxt->errNo = XML_ERR_URI_REQUIRED;
2741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2742 ctxt->sax->error(ctxt->userData,
2743 "xmlParseExternalID: PUBLIC, no URI\n");
2744 ctxt->wellFormed = 0;
2745 ctxt->disableSAX = 1;
2746 }
2747 }
2748 return(URI);
2749}
2750
2751/**
2752 * xmlParseComment:
2753 * @ctxt: an XML parser context
2754 *
2755 * Skip an XML (SGML) comment <!-- .... -->
2756 * The spec says that "For compatibility, the string "--" (double-hyphen)
2757 * must not occur within comments. "
2758 *
2759 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2760 */
2761void
2762xmlParseComment(xmlParserCtxtPtr ctxt) {
2763 xmlChar *buf = NULL;
2764 int len;
2765 int size = XML_PARSER_BUFFER_SIZE;
2766 int q, ql;
2767 int r, rl;
2768 int cur, l;
2769 xmlParserInputState state;
2770 xmlParserInputPtr input = ctxt->input;
2771 int count = 0;
2772
2773 /*
2774 * Check that there is a comment right here.
2775 */
2776 if ((RAW != '<') || (NXT(1) != '!') ||
2777 (NXT(2) != '-') || (NXT(3) != '-')) return;
2778
2779 state = ctxt->instate;
2780 ctxt->instate = XML_PARSER_COMMENT;
2781 SHRINK;
2782 SKIP(4);
2783 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2784 if (buf == NULL) {
2785 xmlGenericError(xmlGenericErrorContext,
2786 "malloc of %d byte failed\n", size);
2787 ctxt->instate = state;
2788 return;
2789 }
2790 q = CUR_CHAR(ql);
2791 NEXTL(ql);
2792 r = CUR_CHAR(rl);
2793 NEXTL(rl);
2794 cur = CUR_CHAR(l);
2795 len = 0;
2796 while (IS_CHAR(cur) && /* checked */
2797 ((cur != '>') ||
2798 (r != '-') || (q != '-'))) {
2799 if ((r == '-') && (q == '-') && (len > 1)) {
2800 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2802 ctxt->sax->error(ctxt->userData,
2803 "Comment must not contain '--' (double-hyphen)`\n");
2804 ctxt->wellFormed = 0;
2805 ctxt->disableSAX = 1;
2806 }
2807 if (len + 5 >= size) {
2808 size *= 2;
2809 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2810 if (buf == NULL) {
2811 xmlGenericError(xmlGenericErrorContext,
2812 "realloc of %d byte failed\n", size);
2813 ctxt->instate = state;
2814 return;
2815 }
2816 }
2817 COPY_BUF(ql,buf,len,q);
2818 q = r;
2819 ql = rl;
2820 r = cur;
2821 rl = l;
2822
2823 count++;
2824 if (count > 50) {
2825 GROW;
2826 count = 0;
2827 }
2828 NEXTL(l);
2829 cur = CUR_CHAR(l);
2830 if (cur == 0) {
2831 SHRINK;
2832 GROW;
2833 cur = CUR_CHAR(l);
2834 }
2835 }
2836 buf[len] = 0;
2837 if (!IS_CHAR(cur)) {
2838 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2839 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2840 ctxt->sax->error(ctxt->userData,
2841 "Comment not terminated \n<!--%.50s\n", buf);
2842 ctxt->wellFormed = 0;
2843 ctxt->disableSAX = 1;
2844 xmlFree(buf);
2845 } else {
2846 if (input != ctxt->input) {
2847 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2849 ctxt->sax->error(ctxt->userData,
2850"Comment doesn't start and stop in the same entity\n");
2851 ctxt->wellFormed = 0;
2852 ctxt->disableSAX = 1;
2853 }
2854 NEXT;
2855 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2856 (!ctxt->disableSAX))
2857 ctxt->sax->comment(ctxt->userData, buf);
2858 xmlFree(buf);
2859 }
2860 ctxt->instate = state;
2861}
2862
2863/**
2864 * xmlParsePITarget:
2865 * @ctxt: an XML parser context
2866 *
2867 * parse the name of a PI
2868 *
2869 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2870 *
2871 * Returns the PITarget name or NULL
2872 */
2873
2874xmlChar *
2875xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2876 xmlChar *name;
2877
2878 name = xmlParseName(ctxt);
2879 if ((name != NULL) &&
2880 ((name[0] == 'x') || (name[0] == 'X')) &&
2881 ((name[1] == 'm') || (name[1] == 'M')) &&
2882 ((name[2] == 'l') || (name[2] == 'L'))) {
2883 int i;
2884 if ((name[0] == 'x') && (name[1] == 'm') &&
2885 (name[2] == 'l') && (name[3] == 0)) {
2886 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2888 ctxt->sax->error(ctxt->userData,
2889 "XML declaration allowed only at the start of the document\n");
2890 ctxt->wellFormed = 0;
2891 ctxt->disableSAX = 1;
2892 return(name);
2893 } else if (name[3] == 0) {
2894 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2896 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2897 ctxt->wellFormed = 0;
2898 ctxt->disableSAX = 1;
2899 return(name);
2900 }
2901 for (i = 0;;i++) {
2902 if (xmlW3CPIs[i] == NULL) break;
2903 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2904 return(name);
2905 }
2906 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2907 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2908 ctxt->sax->warning(ctxt->userData,
2909 "xmlParsePItarget: invalid name prefix 'xml'\n");
2910 }
2911 }
2912 return(name);
2913}
2914
2915/**
2916 * xmlParsePI:
2917 * @ctxt: an XML parser context
2918 *
2919 * parse an XML Processing Instruction.
2920 *
2921 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2922 *
2923 * The processing is transfered to SAX once parsed.
2924 */
2925
2926void
2927xmlParsePI(xmlParserCtxtPtr ctxt) {
2928 xmlChar *buf = NULL;
2929 int len = 0;
2930 int size = XML_PARSER_BUFFER_SIZE;
2931 int cur, l;
2932 xmlChar *target;
2933 xmlParserInputState state;
2934 int count = 0;
2935
2936 if ((RAW == '<') && (NXT(1) == '?')) {
2937 xmlParserInputPtr input = ctxt->input;
2938 state = ctxt->instate;
2939 ctxt->instate = XML_PARSER_PI;
2940 /*
2941 * this is a Processing Instruction.
2942 */
2943 SKIP(2);
2944 SHRINK;
2945
2946 /*
2947 * Parse the target name and check for special support like
2948 * namespace.
2949 */
2950 target = xmlParsePITarget(ctxt);
2951 if (target != NULL) {
2952 if ((RAW == '?') && (NXT(1) == '>')) {
2953 if (input != ctxt->input) {
2954 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2955 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2956 ctxt->sax->error(ctxt->userData,
2957 "PI declaration doesn't start and stop in the same entity\n");
2958 ctxt->wellFormed = 0;
2959 ctxt->disableSAX = 1;
2960 }
2961 SKIP(2);
2962
2963 /*
2964 * SAX: PI detected.
2965 */
2966 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2967 (ctxt->sax->processingInstruction != NULL))
2968 ctxt->sax->processingInstruction(ctxt->userData,
2969 target, NULL);
2970 ctxt->instate = state;
2971 xmlFree(target);
2972 return;
2973 }
2974 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2975 if (buf == NULL) {
2976 xmlGenericError(xmlGenericErrorContext,
2977 "malloc of %d byte failed\n", size);
2978 ctxt->instate = state;
2979 return;
2980 }
2981 cur = CUR;
2982 if (!IS_BLANK(cur)) {
2983 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2985 ctxt->sax->error(ctxt->userData,
2986 "xmlParsePI: PI %s space expected\n", target);
2987 ctxt->wellFormed = 0;
2988 ctxt->disableSAX = 1;
2989 }
2990 SKIP_BLANKS;
2991 cur = CUR_CHAR(l);
2992 while (IS_CHAR(cur) && /* checked */
2993 ((cur != '?') || (NXT(1) != '>'))) {
2994 if (len + 5 >= size) {
2995 size *= 2;
2996 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2997 if (buf == NULL) {
2998 xmlGenericError(xmlGenericErrorContext,
2999 "realloc of %d byte failed\n", size);
3000 ctxt->instate = state;
3001 return;
3002 }
3003 }
3004 count++;
3005 if (count > 50) {
3006 GROW;
3007 count = 0;
3008 }
3009 COPY_BUF(l,buf,len,cur);
3010 NEXTL(l);
3011 cur = CUR_CHAR(l);
3012 if (cur == 0) {
3013 SHRINK;
3014 GROW;
3015 cur = CUR_CHAR(l);
3016 }
3017 }
3018 buf[len] = 0;
3019 if (cur != '?') {
3020 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3022 ctxt->sax->error(ctxt->userData,
3023 "xmlParsePI: PI %s never end ...\n", target);
3024 ctxt->wellFormed = 0;
3025 ctxt->disableSAX = 1;
3026 } else {
3027 if (input != ctxt->input) {
3028 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3030 ctxt->sax->error(ctxt->userData,
3031 "PI declaration doesn't start and stop in the same entity\n");
3032 ctxt->wellFormed = 0;
3033 ctxt->disableSAX = 1;
3034 }
3035 SKIP(2);
3036
3037 /*
3038 * SAX: PI detected.
3039 */
3040 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3041 (ctxt->sax->processingInstruction != NULL))
3042 ctxt->sax->processingInstruction(ctxt->userData,
3043 target, buf);
3044 }
3045 xmlFree(buf);
3046 xmlFree(target);
3047 } else {
3048 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3050 ctxt->sax->error(ctxt->userData,
3051 "xmlParsePI : no target name\n");
3052 ctxt->wellFormed = 0;
3053 ctxt->disableSAX = 1;
3054 }
3055 ctxt->instate = state;
3056 }
3057}
3058
3059/**
3060 * xmlParseNotationDecl:
3061 * @ctxt: an XML parser context
3062 *
3063 * parse a notation declaration
3064 *
3065 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3066 *
3067 * Hence there is actually 3 choices:
3068 * 'PUBLIC' S PubidLiteral
3069 * 'PUBLIC' S PubidLiteral S SystemLiteral
3070 * and 'SYSTEM' S SystemLiteral
3071 *
3072 * See the NOTE on xmlParseExternalID().
3073 */
3074
3075void
3076xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3077 xmlChar *name;
3078 xmlChar *Pubid;
3079 xmlChar *Systemid;
3080
3081 if ((RAW == '<') && (NXT(1) == '!') &&
3082 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3083 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3084 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3085 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3086 xmlParserInputPtr input = ctxt->input;
3087 SHRINK;
3088 SKIP(10);
3089 if (!IS_BLANK(CUR)) {
3090 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3091 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3092 ctxt->sax->error(ctxt->userData,
3093 "Space required after '<!NOTATION'\n");
3094 ctxt->wellFormed = 0;
3095 ctxt->disableSAX = 1;
3096 return;
3097 }
3098 SKIP_BLANKS;
3099
Daniel Veillard76d66f42001-05-16 21:05:17 +00003100 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003101 if (name == NULL) {
3102 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3104 ctxt->sax->error(ctxt->userData,
3105 "NOTATION: Name expected here\n");
3106 ctxt->wellFormed = 0;
3107 ctxt->disableSAX = 1;
3108 return;
3109 }
3110 if (!IS_BLANK(CUR)) {
3111 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3112 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3113 ctxt->sax->error(ctxt->userData,
3114 "Space required after the NOTATION name'\n");
3115 ctxt->wellFormed = 0;
3116 ctxt->disableSAX = 1;
3117 return;
3118 }
3119 SKIP_BLANKS;
3120
3121 /*
3122 * Parse the IDs.
3123 */
3124 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3125 SKIP_BLANKS;
3126
3127 if (RAW == '>') {
3128 if (input != ctxt->input) {
3129 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3130 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3131 ctxt->sax->error(ctxt->userData,
3132"Notation declaration doesn't start and stop in the same entity\n");
3133 ctxt->wellFormed = 0;
3134 ctxt->disableSAX = 1;
3135 }
3136 NEXT;
3137 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3138 (ctxt->sax->notationDecl != NULL))
3139 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3140 } else {
3141 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3143 ctxt->sax->error(ctxt->userData,
3144 "'>' required to close NOTATION declaration\n");
3145 ctxt->wellFormed = 0;
3146 ctxt->disableSAX = 1;
3147 }
3148 xmlFree(name);
3149 if (Systemid != NULL) xmlFree(Systemid);
3150 if (Pubid != NULL) xmlFree(Pubid);
3151 }
3152}
3153
3154/**
3155 * xmlParseEntityDecl:
3156 * @ctxt: an XML parser context
3157 *
3158 * parse <!ENTITY declarations
3159 *
3160 * [70] EntityDecl ::= GEDecl | PEDecl
3161 *
3162 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3163 *
3164 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3165 *
3166 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3167 *
3168 * [74] PEDef ::= EntityValue | ExternalID
3169 *
3170 * [76] NDataDecl ::= S 'NDATA' S Name
3171 *
3172 * [ VC: Notation Declared ]
3173 * The Name must match the declared name of a notation.
3174 */
3175
3176void
3177xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3178 xmlChar *name = NULL;
3179 xmlChar *value = NULL;
3180 xmlChar *URI = NULL, *literal = NULL;
3181 xmlChar *ndata = NULL;
3182 int isParameter = 0;
3183 xmlChar *orig = NULL;
3184
3185 GROW;
3186 if ((RAW == '<') && (NXT(1) == '!') &&
3187 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3188 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3189 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3190 xmlParserInputPtr input = ctxt->input;
3191 ctxt->instate = XML_PARSER_ENTITY_DECL;
3192 SHRINK;
3193 SKIP(8);
3194 if (!IS_BLANK(CUR)) {
3195 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3197 ctxt->sax->error(ctxt->userData,
3198 "Space required after '<!ENTITY'\n");
3199 ctxt->wellFormed = 0;
3200 ctxt->disableSAX = 1;
3201 }
3202 SKIP_BLANKS;
3203
3204 if (RAW == '%') {
3205 NEXT;
3206 if (!IS_BLANK(CUR)) {
3207 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3208 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3209 ctxt->sax->error(ctxt->userData,
3210 "Space required after '%'\n");
3211 ctxt->wellFormed = 0;
3212 ctxt->disableSAX = 1;
3213 }
3214 SKIP_BLANKS;
3215 isParameter = 1;
3216 }
3217
Daniel Veillard76d66f42001-05-16 21:05:17 +00003218 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003219 if (name == NULL) {
3220 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3221 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3222 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3223 ctxt->wellFormed = 0;
3224 ctxt->disableSAX = 1;
3225 return;
3226 }
3227 if (!IS_BLANK(CUR)) {
3228 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3230 ctxt->sax->error(ctxt->userData,
3231 "Space required after the entity name\n");
3232 ctxt->wellFormed = 0;
3233 ctxt->disableSAX = 1;
3234 }
3235 SKIP_BLANKS;
3236
3237 /*
3238 * handle the various case of definitions...
3239 */
3240 if (isParameter) {
3241 if ((RAW == '"') || (RAW == '\'')) {
3242 value = xmlParseEntityValue(ctxt, &orig);
3243 if (value) {
3244 if ((ctxt->sax != NULL) &&
3245 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3246 ctxt->sax->entityDecl(ctxt->userData, name,
3247 XML_INTERNAL_PARAMETER_ENTITY,
3248 NULL, NULL, value);
3249 }
3250 } else {
3251 URI = xmlParseExternalID(ctxt, &literal, 1);
3252 if ((URI == NULL) && (literal == NULL)) {
3253 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3254 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3255 ctxt->sax->error(ctxt->userData,
3256 "Entity value required\n");
3257 ctxt->wellFormed = 0;
3258 ctxt->disableSAX = 1;
3259 }
3260 if (URI) {
3261 xmlURIPtr uri;
3262
3263 uri = xmlParseURI((const char *) URI);
3264 if (uri == NULL) {
3265 ctxt->errNo = XML_ERR_INVALID_URI;
3266 if ((ctxt->sax != NULL) &&
3267 (!ctxt->disableSAX) &&
3268 (ctxt->sax->error != NULL))
3269 ctxt->sax->error(ctxt->userData,
3270 "Invalid URI: %s\n", URI);
3271 ctxt->wellFormed = 0;
3272 } else {
3273 if (uri->fragment != NULL) {
3274 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3275 if ((ctxt->sax != NULL) &&
3276 (!ctxt->disableSAX) &&
3277 (ctxt->sax->error != NULL))
3278 ctxt->sax->error(ctxt->userData,
3279 "Fragment not allowed: %s\n", URI);
3280 ctxt->wellFormed = 0;
3281 } else {
3282 if ((ctxt->sax != NULL) &&
3283 (!ctxt->disableSAX) &&
3284 (ctxt->sax->entityDecl != NULL))
3285 ctxt->sax->entityDecl(ctxt->userData, name,
3286 XML_EXTERNAL_PARAMETER_ENTITY,
3287 literal, URI, NULL);
3288 }
3289 xmlFreeURI(uri);
3290 }
3291 }
3292 }
3293 } else {
3294 if ((RAW == '"') || (RAW == '\'')) {
3295 value = xmlParseEntityValue(ctxt, &orig);
3296 if ((ctxt->sax != NULL) &&
3297 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3298 ctxt->sax->entityDecl(ctxt->userData, name,
3299 XML_INTERNAL_GENERAL_ENTITY,
3300 NULL, NULL, value);
3301 } else {
3302 URI = xmlParseExternalID(ctxt, &literal, 1);
3303 if ((URI == NULL) && (literal == NULL)) {
3304 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3306 ctxt->sax->error(ctxt->userData,
3307 "Entity value required\n");
3308 ctxt->wellFormed = 0;
3309 ctxt->disableSAX = 1;
3310 }
3311 if (URI) {
3312 xmlURIPtr uri;
3313
3314 uri = xmlParseURI((const char *)URI);
3315 if (uri == NULL) {
3316 ctxt->errNo = XML_ERR_INVALID_URI;
3317 if ((ctxt->sax != NULL) &&
3318 (!ctxt->disableSAX) &&
3319 (ctxt->sax->error != NULL))
3320 ctxt->sax->error(ctxt->userData,
3321 "Invalid URI: %s\n", URI);
3322 ctxt->wellFormed = 0;
3323 } else {
3324 if (uri->fragment != NULL) {
3325 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3326 if ((ctxt->sax != NULL) &&
3327 (!ctxt->disableSAX) &&
3328 (ctxt->sax->error != NULL))
3329 ctxt->sax->error(ctxt->userData,
3330 "Fragment not allowed: %s\n", URI);
3331 ctxt->wellFormed = 0;
3332 }
3333 xmlFreeURI(uri);
3334 }
3335 }
3336 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3337 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3339 ctxt->sax->error(ctxt->userData,
3340 "Space required before 'NDATA'\n");
3341 ctxt->wellFormed = 0;
3342 ctxt->disableSAX = 1;
3343 }
3344 SKIP_BLANKS;
3345 if ((RAW == 'N') && (NXT(1) == 'D') &&
3346 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3347 (NXT(4) == 'A')) {
3348 SKIP(5);
3349 if (!IS_BLANK(CUR)) {
3350 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3352 ctxt->sax->error(ctxt->userData,
3353 "Space required after 'NDATA'\n");
3354 ctxt->wellFormed = 0;
3355 ctxt->disableSAX = 1;
3356 }
3357 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003358 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003359 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3360 (ctxt->sax->unparsedEntityDecl != NULL))
3361 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3362 literal, URI, ndata);
3363 } else {
3364 if ((ctxt->sax != NULL) &&
3365 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3366 ctxt->sax->entityDecl(ctxt->userData, name,
3367 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3368 literal, URI, NULL);
3369 }
3370 }
3371 }
3372 SKIP_BLANKS;
3373 if (RAW != '>') {
3374 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3375 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3376 ctxt->sax->error(ctxt->userData,
3377 "xmlParseEntityDecl: entity %s not terminated\n", name);
3378 ctxt->wellFormed = 0;
3379 ctxt->disableSAX = 1;
3380 } else {
3381 if (input != ctxt->input) {
3382 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3384 ctxt->sax->error(ctxt->userData,
3385"Entity declaration doesn't start and stop in the same entity\n");
3386 ctxt->wellFormed = 0;
3387 ctxt->disableSAX = 1;
3388 }
3389 NEXT;
3390 }
3391 if (orig != NULL) {
3392 /*
3393 * Ugly mechanism to save the raw entity value.
3394 */
3395 xmlEntityPtr cur = NULL;
3396
3397 if (isParameter) {
3398 if ((ctxt->sax != NULL) &&
3399 (ctxt->sax->getParameterEntity != NULL))
3400 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3401 } else {
3402 if ((ctxt->sax != NULL) &&
3403 (ctxt->sax->getEntity != NULL))
3404 cur = ctxt->sax->getEntity(ctxt->userData, name);
3405 }
3406 if (cur != NULL) {
3407 if (cur->orig != NULL)
3408 xmlFree(orig);
3409 else
3410 cur->orig = orig;
3411 } else
3412 xmlFree(orig);
3413 }
3414 if (name != NULL) xmlFree(name);
3415 if (value != NULL) xmlFree(value);
3416 if (URI != NULL) xmlFree(URI);
3417 if (literal != NULL) xmlFree(literal);
3418 if (ndata != NULL) xmlFree(ndata);
3419 }
3420}
3421
3422/**
3423 * xmlParseDefaultDecl:
3424 * @ctxt: an XML parser context
3425 * @value: Receive a possible fixed default value for the attribute
3426 *
3427 * Parse an attribute default declaration
3428 *
3429 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3430 *
3431 * [ VC: Required Attribute ]
3432 * if the default declaration is the keyword #REQUIRED, then the
3433 * attribute must be specified for all elements of the type in the
3434 * attribute-list declaration.
3435 *
3436 * [ VC: Attribute Default Legal ]
3437 * The declared default value must meet the lexical constraints of
3438 * the declared attribute type c.f. xmlValidateAttributeDecl()
3439 *
3440 * [ VC: Fixed Attribute Default ]
3441 * if an attribute has a default value declared with the #FIXED
3442 * keyword, instances of that attribute must match the default value.
3443 *
3444 * [ WFC: No < in Attribute Values ]
3445 * handled in xmlParseAttValue()
3446 *
3447 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3448 * or XML_ATTRIBUTE_FIXED.
3449 */
3450
3451int
3452xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3453 int val;
3454 xmlChar *ret;
3455
3456 *value = NULL;
3457 if ((RAW == '#') && (NXT(1) == 'R') &&
3458 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3459 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3460 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3461 (NXT(8) == 'D')) {
3462 SKIP(9);
3463 return(XML_ATTRIBUTE_REQUIRED);
3464 }
3465 if ((RAW == '#') && (NXT(1) == 'I') &&
3466 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3467 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3468 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3469 SKIP(8);
3470 return(XML_ATTRIBUTE_IMPLIED);
3471 }
3472 val = XML_ATTRIBUTE_NONE;
3473 if ((RAW == '#') && (NXT(1) == 'F') &&
3474 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3475 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3476 SKIP(6);
3477 val = XML_ATTRIBUTE_FIXED;
3478 if (!IS_BLANK(CUR)) {
3479 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3480 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3481 ctxt->sax->error(ctxt->userData,
3482 "Space required after '#FIXED'\n");
3483 ctxt->wellFormed = 0;
3484 ctxt->disableSAX = 1;
3485 }
3486 SKIP_BLANKS;
3487 }
3488 ret = xmlParseAttValue(ctxt);
3489 ctxt->instate = XML_PARSER_DTD;
3490 if (ret == NULL) {
3491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3492 ctxt->sax->error(ctxt->userData,
3493 "Attribute default value declaration error\n");
3494 ctxt->wellFormed = 0;
3495 ctxt->disableSAX = 1;
3496 } else
3497 *value = ret;
3498 return(val);
3499}
3500
3501/**
3502 * xmlParseNotationType:
3503 * @ctxt: an XML parser context
3504 *
3505 * parse an Notation attribute type.
3506 *
3507 * Note: the leading 'NOTATION' S part has already being parsed...
3508 *
3509 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3510 *
3511 * [ VC: Notation Attributes ]
3512 * Values of this type must match one of the notation names included
3513 * in the declaration; all notation names in the declaration must be declared.
3514 *
3515 * Returns: the notation attribute tree built while parsing
3516 */
3517
3518xmlEnumerationPtr
3519xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3520 xmlChar *name;
3521 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3522
3523 if (RAW != '(') {
3524 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3526 ctxt->sax->error(ctxt->userData,
3527 "'(' required to start 'NOTATION'\n");
3528 ctxt->wellFormed = 0;
3529 ctxt->disableSAX = 1;
3530 return(NULL);
3531 }
3532 SHRINK;
3533 do {
3534 NEXT;
3535 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003536 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003537 if (name == NULL) {
3538 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3539 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3540 ctxt->sax->error(ctxt->userData,
3541 "Name expected in NOTATION declaration\n");
3542 ctxt->wellFormed = 0;
3543 ctxt->disableSAX = 1;
3544 return(ret);
3545 }
3546 cur = xmlCreateEnumeration(name);
3547 xmlFree(name);
3548 if (cur == NULL) return(ret);
3549 if (last == NULL) ret = last = cur;
3550 else {
3551 last->next = cur;
3552 last = cur;
3553 }
3554 SKIP_BLANKS;
3555 } while (RAW == '|');
3556 if (RAW != ')') {
3557 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3559 ctxt->sax->error(ctxt->userData,
3560 "')' required to finish NOTATION declaration\n");
3561 ctxt->wellFormed = 0;
3562 ctxt->disableSAX = 1;
3563 if ((last != NULL) && (last != ret))
3564 xmlFreeEnumeration(last);
3565 return(ret);
3566 }
3567 NEXT;
3568 return(ret);
3569}
3570
3571/**
3572 * xmlParseEnumerationType:
3573 * @ctxt: an XML parser context
3574 *
3575 * parse an Enumeration attribute type.
3576 *
3577 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3578 *
3579 * [ VC: Enumeration ]
3580 * Values of this type must match one of the Nmtoken tokens in
3581 * the declaration
3582 *
3583 * Returns: the enumeration attribute tree built while parsing
3584 */
3585
3586xmlEnumerationPtr
3587xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3588 xmlChar *name;
3589 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3590
3591 if (RAW != '(') {
3592 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3594 ctxt->sax->error(ctxt->userData,
3595 "'(' required to start ATTLIST enumeration\n");
3596 ctxt->wellFormed = 0;
3597 ctxt->disableSAX = 1;
3598 return(NULL);
3599 }
3600 SHRINK;
3601 do {
3602 NEXT;
3603 SKIP_BLANKS;
3604 name = xmlParseNmtoken(ctxt);
3605 if (name == NULL) {
3606 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3608 ctxt->sax->error(ctxt->userData,
3609 "NmToken expected in ATTLIST enumeration\n");
3610 ctxt->wellFormed = 0;
3611 ctxt->disableSAX = 1;
3612 return(ret);
3613 }
3614 cur = xmlCreateEnumeration(name);
3615 xmlFree(name);
3616 if (cur == NULL) return(ret);
3617 if (last == NULL) ret = last = cur;
3618 else {
3619 last->next = cur;
3620 last = cur;
3621 }
3622 SKIP_BLANKS;
3623 } while (RAW == '|');
3624 if (RAW != ')') {
3625 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3627 ctxt->sax->error(ctxt->userData,
3628 "')' required to finish ATTLIST enumeration\n");
3629 ctxt->wellFormed = 0;
3630 ctxt->disableSAX = 1;
3631 return(ret);
3632 }
3633 NEXT;
3634 return(ret);
3635}
3636
3637/**
3638 * xmlParseEnumeratedType:
3639 * @ctxt: an XML parser context
3640 * @tree: the enumeration tree built while parsing
3641 *
3642 * parse an Enumerated attribute type.
3643 *
3644 * [57] EnumeratedType ::= NotationType | Enumeration
3645 *
3646 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3647 *
3648 *
3649 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3650 */
3651
3652int
3653xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3654 if ((RAW == 'N') && (NXT(1) == 'O') &&
3655 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3656 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3657 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3658 SKIP(8);
3659 if (!IS_BLANK(CUR)) {
3660 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3662 ctxt->sax->error(ctxt->userData,
3663 "Space required after 'NOTATION'\n");
3664 ctxt->wellFormed = 0;
3665 ctxt->disableSAX = 1;
3666 return(0);
3667 }
3668 SKIP_BLANKS;
3669 *tree = xmlParseNotationType(ctxt);
3670 if (*tree == NULL) return(0);
3671 return(XML_ATTRIBUTE_NOTATION);
3672 }
3673 *tree = xmlParseEnumerationType(ctxt);
3674 if (*tree == NULL) return(0);
3675 return(XML_ATTRIBUTE_ENUMERATION);
3676}
3677
3678/**
3679 * xmlParseAttributeType:
3680 * @ctxt: an XML parser context
3681 * @tree: the enumeration tree built while parsing
3682 *
3683 * parse the Attribute list def for an element
3684 *
3685 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3686 *
3687 * [55] StringType ::= 'CDATA'
3688 *
3689 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3690 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3691 *
3692 * Validity constraints for attribute values syntax are checked in
3693 * xmlValidateAttributeValue()
3694 *
3695 * [ VC: ID ]
3696 * Values of type ID must match the Name production. A name must not
3697 * appear more than once in an XML document as a value of this type;
3698 * i.e., ID values must uniquely identify the elements which bear them.
3699 *
3700 * [ VC: One ID per Element Type ]
3701 * No element type may have more than one ID attribute specified.
3702 *
3703 * [ VC: ID Attribute Default ]
3704 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3705 *
3706 * [ VC: IDREF ]
3707 * Values of type IDREF must match the Name production, and values
3708 * of type IDREFS must match Names; each IDREF Name must match the value
3709 * of an ID attribute on some element in the XML document; i.e. IDREF
3710 * values must match the value of some ID attribute.
3711 *
3712 * [ VC: Entity Name ]
3713 * Values of type ENTITY must match the Name production, values
3714 * of type ENTITIES must match Names; each Entity Name must match the
3715 * name of an unparsed entity declared in the DTD.
3716 *
3717 * [ VC: Name Token ]
3718 * Values of type NMTOKEN must match the Nmtoken production; values
3719 * of type NMTOKENS must match Nmtokens.
3720 *
3721 * Returns the attribute type
3722 */
3723int
3724xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3725 SHRINK;
3726 if ((RAW == 'C') && (NXT(1) == 'D') &&
3727 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3728 (NXT(4) == 'A')) {
3729 SKIP(5);
3730 return(XML_ATTRIBUTE_CDATA);
3731 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3732 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3733 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3734 SKIP(6);
3735 return(XML_ATTRIBUTE_IDREFS);
3736 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3737 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3738 (NXT(4) == 'F')) {
3739 SKIP(5);
3740 return(XML_ATTRIBUTE_IDREF);
3741 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3742 SKIP(2);
3743 return(XML_ATTRIBUTE_ID);
3744 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3745 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3746 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3747 SKIP(6);
3748 return(XML_ATTRIBUTE_ENTITY);
3749 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3750 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3751 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3752 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3753 SKIP(8);
3754 return(XML_ATTRIBUTE_ENTITIES);
3755 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3756 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3757 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3758 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3759 SKIP(8);
3760 return(XML_ATTRIBUTE_NMTOKENS);
3761 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3762 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3763 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3764 (NXT(6) == 'N')) {
3765 SKIP(7);
3766 return(XML_ATTRIBUTE_NMTOKEN);
3767 }
3768 return(xmlParseEnumeratedType(ctxt, tree));
3769}
3770
3771/**
3772 * xmlParseAttributeListDecl:
3773 * @ctxt: an XML parser context
3774 *
3775 * : parse the Attribute list def for an element
3776 *
3777 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3778 *
3779 * [53] AttDef ::= S Name S AttType S DefaultDecl
3780 *
3781 */
3782void
3783xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3784 xmlChar *elemName;
3785 xmlChar *attrName;
3786 xmlEnumerationPtr tree;
3787
3788 if ((RAW == '<') && (NXT(1) == '!') &&
3789 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3790 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3791 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3792 (NXT(8) == 'T')) {
3793 xmlParserInputPtr input = ctxt->input;
3794
3795 SKIP(9);
3796 if (!IS_BLANK(CUR)) {
3797 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3799 ctxt->sax->error(ctxt->userData,
3800 "Space required after '<!ATTLIST'\n");
3801 ctxt->wellFormed = 0;
3802 ctxt->disableSAX = 1;
3803 }
3804 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003805 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003806 if (elemName == NULL) {
3807 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3809 ctxt->sax->error(ctxt->userData,
3810 "ATTLIST: no name for Element\n");
3811 ctxt->wellFormed = 0;
3812 ctxt->disableSAX = 1;
3813 return;
3814 }
3815 SKIP_BLANKS;
3816 GROW;
3817 while (RAW != '>') {
3818 const xmlChar *check = CUR_PTR;
3819 int type;
3820 int def;
3821 xmlChar *defaultValue = NULL;
3822
3823 GROW;
3824 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003825 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003826 if (attrName == NULL) {
3827 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3829 ctxt->sax->error(ctxt->userData,
3830 "ATTLIST: no name for Attribute\n");
3831 ctxt->wellFormed = 0;
3832 ctxt->disableSAX = 1;
3833 break;
3834 }
3835 GROW;
3836 if (!IS_BLANK(CUR)) {
3837 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3839 ctxt->sax->error(ctxt->userData,
3840 "Space required after the attribute name\n");
3841 ctxt->wellFormed = 0;
3842 ctxt->disableSAX = 1;
3843 if (attrName != NULL)
3844 xmlFree(attrName);
3845 if (defaultValue != NULL)
3846 xmlFree(defaultValue);
3847 break;
3848 }
3849 SKIP_BLANKS;
3850
3851 type = xmlParseAttributeType(ctxt, &tree);
3852 if (type <= 0) {
3853 if (attrName != NULL)
3854 xmlFree(attrName);
3855 if (defaultValue != NULL)
3856 xmlFree(defaultValue);
3857 break;
3858 }
3859
3860 GROW;
3861 if (!IS_BLANK(CUR)) {
3862 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3864 ctxt->sax->error(ctxt->userData,
3865 "Space required after the attribute type\n");
3866 ctxt->wellFormed = 0;
3867 ctxt->disableSAX = 1;
3868 if (attrName != NULL)
3869 xmlFree(attrName);
3870 if (defaultValue != NULL)
3871 xmlFree(defaultValue);
3872 if (tree != NULL)
3873 xmlFreeEnumeration(tree);
3874 break;
3875 }
3876 SKIP_BLANKS;
3877
3878 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3879 if (def <= 0) {
3880 if (attrName != NULL)
3881 xmlFree(attrName);
3882 if (defaultValue != NULL)
3883 xmlFree(defaultValue);
3884 if (tree != NULL)
3885 xmlFreeEnumeration(tree);
3886 break;
3887 }
3888
3889 GROW;
3890 if (RAW != '>') {
3891 if (!IS_BLANK(CUR)) {
3892 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3893 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3894 ctxt->sax->error(ctxt->userData,
3895 "Space required after the attribute default value\n");
3896 ctxt->wellFormed = 0;
3897 ctxt->disableSAX = 1;
3898 if (attrName != NULL)
3899 xmlFree(attrName);
3900 if (defaultValue != NULL)
3901 xmlFree(defaultValue);
3902 if (tree != NULL)
3903 xmlFreeEnumeration(tree);
3904 break;
3905 }
3906 SKIP_BLANKS;
3907 }
3908 if (check == CUR_PTR) {
3909 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3910 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3911 ctxt->sax->error(ctxt->userData,
3912 "xmlParseAttributeListDecl: detected internal error\n");
3913 if (attrName != NULL)
3914 xmlFree(attrName);
3915 if (defaultValue != NULL)
3916 xmlFree(defaultValue);
3917 if (tree != NULL)
3918 xmlFreeEnumeration(tree);
3919 break;
3920 }
3921 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3922 (ctxt->sax->attributeDecl != NULL))
3923 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3924 type, def, defaultValue, tree);
3925 if (attrName != NULL)
3926 xmlFree(attrName);
3927 if (defaultValue != NULL)
3928 xmlFree(defaultValue);
3929 GROW;
3930 }
3931 if (RAW == '>') {
3932 if (input != ctxt->input) {
3933 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3935 ctxt->sax->error(ctxt->userData,
3936"Attribute list declaration doesn't start and stop in the same entity\n");
3937 ctxt->wellFormed = 0;
3938 ctxt->disableSAX = 1;
3939 }
3940 NEXT;
3941 }
3942
3943 xmlFree(elemName);
3944 }
3945}
3946
3947/**
3948 * xmlParseElementMixedContentDecl:
3949 * @ctxt: an XML parser context
3950 *
3951 * parse the declaration for a Mixed Element content
3952 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3953 *
3954 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3955 * '(' S? '#PCDATA' S? ')'
3956 *
3957 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3958 *
3959 * [ VC: No Duplicate Types ]
3960 * The same name must not appear more than once in a single
3961 * mixed-content declaration.
3962 *
3963 * returns: the list of the xmlElementContentPtr describing the element choices
3964 */
3965xmlElementContentPtr
3966xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3967 xmlElementContentPtr ret = NULL, cur = NULL, n;
3968 xmlChar *elem = NULL;
3969
3970 GROW;
3971 if ((RAW == '#') && (NXT(1) == 'P') &&
3972 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3973 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3974 (NXT(6) == 'A')) {
3975 SKIP(7);
3976 SKIP_BLANKS;
3977 SHRINK;
3978 if (RAW == ')') {
3979 ctxt->entity = ctxt->input;
3980 NEXT;
3981 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3982 if (RAW == '*') {
3983 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3984 NEXT;
3985 }
3986 return(ret);
3987 }
3988 if ((RAW == '(') || (RAW == '|')) {
3989 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3990 if (ret == NULL) return(NULL);
3991 }
3992 while (RAW == '|') {
3993 NEXT;
3994 if (elem == NULL) {
3995 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3996 if (ret == NULL) return(NULL);
3997 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00003998 if (cur != NULL)
3999 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004000 cur = ret;
4001 } else {
4002 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4003 if (n == NULL) return(NULL);
4004 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004005 if (n->c1 != NULL)
4006 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004007 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004008 if (n != NULL)
4009 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004010 cur = n;
4011 xmlFree(elem);
4012 }
4013 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004014 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004015 if (elem == NULL) {
4016 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4018 ctxt->sax->error(ctxt->userData,
4019 "xmlParseElementMixedContentDecl : Name expected\n");
4020 ctxt->wellFormed = 0;
4021 ctxt->disableSAX = 1;
4022 xmlFreeElementContent(cur);
4023 return(NULL);
4024 }
4025 SKIP_BLANKS;
4026 GROW;
4027 }
4028 if ((RAW == ')') && (NXT(1) == '*')) {
4029 if (elem != NULL) {
4030 cur->c2 = xmlNewElementContent(elem,
4031 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004032 if (cur->c2 != NULL)
4033 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004034 xmlFree(elem);
4035 }
4036 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4037 ctxt->entity = ctxt->input;
4038 SKIP(2);
4039 } else {
4040 if (elem != NULL) xmlFree(elem);
4041 xmlFreeElementContent(ret);
4042 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4044 ctxt->sax->error(ctxt->userData,
4045 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4046 ctxt->wellFormed = 0;
4047 ctxt->disableSAX = 1;
4048 return(NULL);
4049 }
4050
4051 } else {
4052 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4054 ctxt->sax->error(ctxt->userData,
4055 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4056 ctxt->wellFormed = 0;
4057 ctxt->disableSAX = 1;
4058 }
4059 return(ret);
4060}
4061
4062/**
4063 * xmlParseElementChildrenContentDecl:
4064 * @ctxt: an XML parser context
4065 *
4066 * parse the declaration for a Mixed Element content
4067 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4068 *
4069 *
4070 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4071 *
4072 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4073 *
4074 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4075 *
4076 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4077 *
4078 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4079 * TODO Parameter-entity replacement text must be properly nested
4080 * with parenthetized groups. That is to say, if either of the
4081 * opening or closing parentheses in a choice, seq, or Mixed
4082 * construct is contained in the replacement text for a parameter
4083 * entity, both must be contained in the same replacement text. For
4084 * interoperability, if a parameter-entity reference appears in a
4085 * choice, seq, or Mixed construct, its replacement text should not
4086 * be empty, and neither the first nor last non-blank character of
4087 * the replacement text should be a connector (| or ,).
4088 *
4089 * returns: the tree of xmlElementContentPtr describing the element
4090 * hierarchy.
4091 */
4092xmlElementContentPtr
4093#ifdef VMS
4094xmlParseElementChildrenContentD
4095#else
4096xmlParseElementChildrenContentDecl
4097#endif
4098(xmlParserCtxtPtr ctxt) {
4099 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4100 xmlChar *elem;
4101 xmlChar type = 0;
4102
4103 SKIP_BLANKS;
4104 GROW;
4105 if (RAW == '(') {
4106 /* Recurse on first child */
4107 NEXT;
4108 SKIP_BLANKS;
4109 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4110 SKIP_BLANKS;
4111 GROW;
4112 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004113 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004114 if (elem == NULL) {
4115 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4116 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4117 ctxt->sax->error(ctxt->userData,
4118 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4119 ctxt->wellFormed = 0;
4120 ctxt->disableSAX = 1;
4121 return(NULL);
4122 }
4123 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4124 GROW;
4125 if (RAW == '?') {
4126 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4127 NEXT;
4128 } else if (RAW == '*') {
4129 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4130 NEXT;
4131 } else if (RAW == '+') {
4132 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4133 NEXT;
4134 } else {
4135 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4136 }
4137 xmlFree(elem);
4138 GROW;
4139 }
4140 SKIP_BLANKS;
4141 SHRINK;
4142 while (RAW != ')') {
4143 /*
4144 * Each loop we parse one separator and one element.
4145 */
4146 if (RAW == ',') {
4147 if (type == 0) type = CUR;
4148
4149 /*
4150 * Detect "Name | Name , Name" error
4151 */
4152 else if (type != CUR) {
4153 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4154 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4155 ctxt->sax->error(ctxt->userData,
4156 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4157 type);
4158 ctxt->wellFormed = 0;
4159 ctxt->disableSAX = 1;
4160 if ((op != NULL) && (op != ret))
4161 xmlFreeElementContent(op);
4162 if ((last != NULL) && (last != ret) &&
4163 (last != ret->c1) && (last != ret->c2))
4164 xmlFreeElementContent(last);
4165 if (ret != NULL)
4166 xmlFreeElementContent(ret);
4167 return(NULL);
4168 }
4169 NEXT;
4170
4171 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4172 if (op == NULL) {
4173 xmlFreeElementContent(ret);
4174 return(NULL);
4175 }
4176 if (last == NULL) {
4177 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004178 if (ret != NULL)
4179 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004180 ret = cur = op;
4181 } else {
4182 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004183 if (op != NULL)
4184 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004185 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004186 if (last != NULL)
4187 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004188 cur =op;
4189 last = NULL;
4190 }
4191 } else if (RAW == '|') {
4192 if (type == 0) type = CUR;
4193
4194 /*
4195 * Detect "Name , Name | Name" error
4196 */
4197 else if (type != CUR) {
4198 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4200 ctxt->sax->error(ctxt->userData,
4201 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4202 type);
4203 ctxt->wellFormed = 0;
4204 ctxt->disableSAX = 1;
4205 if ((op != NULL) && (op != ret) && (op != last))
4206 xmlFreeElementContent(op);
4207 if ((last != NULL) && (last != ret) &&
4208 (last != ret->c1) && (last != ret->c2))
4209 xmlFreeElementContent(last);
4210 if (ret != NULL)
4211 xmlFreeElementContent(ret);
4212 return(NULL);
4213 }
4214 NEXT;
4215
4216 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4217 if (op == NULL) {
4218 if ((op != NULL) && (op != ret))
4219 xmlFreeElementContent(op);
4220 if ((last != NULL) && (last != ret) &&
4221 (last != ret->c1) && (last != ret->c2))
4222 xmlFreeElementContent(last);
4223 if (ret != NULL)
4224 xmlFreeElementContent(ret);
4225 return(NULL);
4226 }
4227 if (last == NULL) {
4228 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004229 if (ret != NULL)
4230 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004231 ret = cur = op;
4232 } else {
4233 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004234 if (op != NULL)
4235 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004236 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004237 if (last != NULL)
4238 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004239 cur =op;
4240 last = NULL;
4241 }
4242 } else {
4243 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4244 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4245 ctxt->sax->error(ctxt->userData,
4246 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4247 ctxt->wellFormed = 0;
4248 ctxt->disableSAX = 1;
4249 if ((op != NULL) && (op != ret))
4250 xmlFreeElementContent(op);
4251 if ((last != NULL) && (last != ret) &&
4252 (last != ret->c1) && (last != ret->c2))
4253 xmlFreeElementContent(last);
4254 if (ret != NULL)
4255 xmlFreeElementContent(ret);
4256 return(NULL);
4257 }
4258 GROW;
4259 SKIP_BLANKS;
4260 GROW;
4261 if (RAW == '(') {
4262 /* Recurse on second child */
4263 NEXT;
4264 SKIP_BLANKS;
4265 last = xmlParseElementChildrenContentDecl(ctxt);
4266 SKIP_BLANKS;
4267 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004268 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004269 if (elem == NULL) {
4270 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4272 ctxt->sax->error(ctxt->userData,
4273 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4274 ctxt->wellFormed = 0;
4275 ctxt->disableSAX = 1;
4276 if ((op != NULL) && (op != ret))
4277 xmlFreeElementContent(op);
4278 if ((last != NULL) && (last != ret) &&
4279 (last != ret->c1) && (last != ret->c2))
4280 xmlFreeElementContent(last);
4281 if (ret != NULL)
4282 xmlFreeElementContent(ret);
4283 return(NULL);
4284 }
4285 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4286 xmlFree(elem);
4287 if (RAW == '?') {
4288 last->ocur = XML_ELEMENT_CONTENT_OPT;
4289 NEXT;
4290 } else if (RAW == '*') {
4291 last->ocur = XML_ELEMENT_CONTENT_MULT;
4292 NEXT;
4293 } else if (RAW == '+') {
4294 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4295 NEXT;
4296 } else {
4297 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4298 }
4299 }
4300 SKIP_BLANKS;
4301 GROW;
4302 }
4303 if ((cur != NULL) && (last != NULL)) {
4304 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004305 if (last != NULL)
4306 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004307 }
4308 ctxt->entity = ctxt->input;
4309 NEXT;
4310 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004311 if (ret != NULL)
4312 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004313 NEXT;
4314 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004315 if (ret != NULL)
4316 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004317 NEXT;
4318 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004319 if (ret != NULL)
4320 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004321 NEXT;
4322 }
4323 return(ret);
4324}
4325
4326/**
4327 * xmlParseElementContentDecl:
4328 * @ctxt: an XML parser context
4329 * @name: the name of the element being defined.
4330 * @result: the Element Content pointer will be stored here if any
4331 *
4332 * parse the declaration for an Element content either Mixed or Children,
4333 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4334 *
4335 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4336 *
4337 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4338 */
4339
4340int
4341xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4342 xmlElementContentPtr *result) {
4343
4344 xmlElementContentPtr tree = NULL;
4345 xmlParserInputPtr input = ctxt->input;
4346 int res;
4347
4348 *result = NULL;
4349
4350 if (RAW != '(') {
4351 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4353 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004354 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004355 ctxt->wellFormed = 0;
4356 ctxt->disableSAX = 1;
4357 return(-1);
4358 }
4359 NEXT;
4360 GROW;
4361 SKIP_BLANKS;
4362 if ((RAW == '#') && (NXT(1) == 'P') &&
4363 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4364 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4365 (NXT(6) == 'A')) {
4366 tree = xmlParseElementMixedContentDecl(ctxt);
4367 res = XML_ELEMENT_TYPE_MIXED;
4368 } else {
4369 tree = xmlParseElementChildrenContentDecl(ctxt);
4370 res = XML_ELEMENT_TYPE_ELEMENT;
4371 }
4372 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4373 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4375 ctxt->sax->error(ctxt->userData,
4376"Element content declaration doesn't start and stop in the same entity\n");
4377 ctxt->wellFormed = 0;
4378 ctxt->disableSAX = 1;
4379 }
4380 SKIP_BLANKS;
4381 *result = tree;
4382 return(res);
4383}
4384
4385/**
4386 * xmlParseElementDecl:
4387 * @ctxt: an XML parser context
4388 *
4389 * parse an Element declaration.
4390 *
4391 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4392 *
4393 * [ VC: Unique Element Type Declaration ]
4394 * No element type may be declared more than once
4395 *
4396 * Returns the type of the element, or -1 in case of error
4397 */
4398int
4399xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4400 xmlChar *name;
4401 int ret = -1;
4402 xmlElementContentPtr content = NULL;
4403
4404 GROW;
4405 if ((RAW == '<') && (NXT(1) == '!') &&
4406 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4407 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4408 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4409 (NXT(8) == 'T')) {
4410 xmlParserInputPtr input = ctxt->input;
4411
4412 SKIP(9);
4413 if (!IS_BLANK(CUR)) {
4414 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4416 ctxt->sax->error(ctxt->userData,
4417 "Space required after 'ELEMENT'\n");
4418 ctxt->wellFormed = 0;
4419 ctxt->disableSAX = 1;
4420 }
4421 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004422 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004423 if (name == NULL) {
4424 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4425 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4426 ctxt->sax->error(ctxt->userData,
4427 "xmlParseElementDecl: no name for Element\n");
4428 ctxt->wellFormed = 0;
4429 ctxt->disableSAX = 1;
4430 return(-1);
4431 }
4432 while ((RAW == 0) && (ctxt->inputNr > 1))
4433 xmlPopInput(ctxt);
4434 if (!IS_BLANK(CUR)) {
4435 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4437 ctxt->sax->error(ctxt->userData,
4438 "Space required after the element name\n");
4439 ctxt->wellFormed = 0;
4440 ctxt->disableSAX = 1;
4441 }
4442 SKIP_BLANKS;
4443 if ((RAW == 'E') && (NXT(1) == 'M') &&
4444 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4445 (NXT(4) == 'Y')) {
4446 SKIP(5);
4447 /*
4448 * Element must always be empty.
4449 */
4450 ret = XML_ELEMENT_TYPE_EMPTY;
4451 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4452 (NXT(2) == 'Y')) {
4453 SKIP(3);
4454 /*
4455 * Element is a generic container.
4456 */
4457 ret = XML_ELEMENT_TYPE_ANY;
4458 } else if (RAW == '(') {
4459 ret = xmlParseElementContentDecl(ctxt, name, &content);
4460 } else {
4461 /*
4462 * [ WFC: PEs in Internal Subset ] error handling.
4463 */
4464 if ((RAW == '%') && (ctxt->external == 0) &&
4465 (ctxt->inputNr == 1)) {
4466 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4468 ctxt->sax->error(ctxt->userData,
4469 "PEReference: forbidden within markup decl in internal subset\n");
4470 } else {
4471 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4473 ctxt->sax->error(ctxt->userData,
4474 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4475 }
4476 ctxt->wellFormed = 0;
4477 ctxt->disableSAX = 1;
4478 if (name != NULL) xmlFree(name);
4479 return(-1);
4480 }
4481
4482 SKIP_BLANKS;
4483 /*
4484 * Pop-up of finished entities.
4485 */
4486 while ((RAW == 0) && (ctxt->inputNr > 1))
4487 xmlPopInput(ctxt);
4488 SKIP_BLANKS;
4489
4490 if (RAW != '>') {
4491 ctxt->errNo = XML_ERR_GT_REQUIRED;
4492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4493 ctxt->sax->error(ctxt->userData,
4494 "xmlParseElementDecl: expected '>' at the end\n");
4495 ctxt->wellFormed = 0;
4496 ctxt->disableSAX = 1;
4497 } else {
4498 if (input != ctxt->input) {
4499 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4501 ctxt->sax->error(ctxt->userData,
4502"Element declaration doesn't start and stop in the same entity\n");
4503 ctxt->wellFormed = 0;
4504 ctxt->disableSAX = 1;
4505 }
4506
4507 NEXT;
4508 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4509 (ctxt->sax->elementDecl != NULL))
4510 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4511 content);
4512 }
4513 if (content != NULL) {
4514 xmlFreeElementContent(content);
4515 }
4516 if (name != NULL) {
4517 xmlFree(name);
4518 }
4519 }
4520 return(ret);
4521}
4522
4523/**
4524 * xmlParseMarkupDecl:
4525 * @ctxt: an XML parser context
4526 *
4527 * parse Markup declarations
4528 *
4529 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4530 * NotationDecl | PI | Comment
4531 *
4532 * [ VC: Proper Declaration/PE Nesting ]
4533 * Parameter-entity replacement text must be properly nested with
4534 * markup declarations. That is to say, if either the first character
4535 * or the last character of a markup declaration (markupdecl above) is
4536 * contained in the replacement text for a parameter-entity reference,
4537 * both must be contained in the same replacement text.
4538 *
4539 * [ WFC: PEs in Internal Subset ]
4540 * In the internal DTD subset, parameter-entity references can occur
4541 * only where markup declarations can occur, not within markup declarations.
4542 * (This does not apply to references that occur in external parameter
4543 * entities or to the external subset.)
4544 */
4545void
4546xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4547 GROW;
4548 xmlParseElementDecl(ctxt);
4549 xmlParseAttributeListDecl(ctxt);
4550 xmlParseEntityDecl(ctxt);
4551 xmlParseNotationDecl(ctxt);
4552 xmlParsePI(ctxt);
4553 xmlParseComment(ctxt);
4554 /*
4555 * This is only for internal subset. On external entities,
4556 * the replacement is done before parsing stage
4557 */
4558 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4559 xmlParsePEReference(ctxt);
4560 ctxt->instate = XML_PARSER_DTD;
4561}
4562
4563/**
4564 * xmlParseTextDecl:
4565 * @ctxt: an XML parser context
4566 *
4567 * parse an XML declaration header for external entities
4568 *
4569 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4570 *
4571 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4572 */
4573
4574void
4575xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4576 xmlChar *version;
4577
4578 /*
4579 * We know that '<?xml' is here.
4580 */
4581 if ((RAW == '<') && (NXT(1) == '?') &&
4582 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4583 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4584 SKIP(5);
4585 } else {
4586 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4588 ctxt->sax->error(ctxt->userData,
4589 "Text declaration '<?xml' required\n");
4590 ctxt->wellFormed = 0;
4591 ctxt->disableSAX = 1;
4592
4593 return;
4594 }
4595
4596 if (!IS_BLANK(CUR)) {
4597 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4599 ctxt->sax->error(ctxt->userData,
4600 "Space needed after '<?xml'\n");
4601 ctxt->wellFormed = 0;
4602 ctxt->disableSAX = 1;
4603 }
4604 SKIP_BLANKS;
4605
4606 /*
4607 * We may have the VersionInfo here.
4608 */
4609 version = xmlParseVersionInfo(ctxt);
4610 if (version == NULL)
4611 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4612 ctxt->input->version = version;
4613
4614 /*
4615 * We must have the encoding declaration
4616 */
4617 if (!IS_BLANK(CUR)) {
4618 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4620 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4621 ctxt->wellFormed = 0;
4622 ctxt->disableSAX = 1;
4623 }
4624 xmlParseEncodingDecl(ctxt);
4625 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4626 /*
4627 * The XML REC instructs us to stop parsing right here
4628 */
4629 return;
4630 }
4631
4632 SKIP_BLANKS;
4633 if ((RAW == '?') && (NXT(1) == '>')) {
4634 SKIP(2);
4635 } else if (RAW == '>') {
4636 /* Deprecated old WD ... */
4637 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4639 ctxt->sax->error(ctxt->userData,
4640 "XML declaration must end-up with '?>'\n");
4641 ctxt->wellFormed = 0;
4642 ctxt->disableSAX = 1;
4643 NEXT;
4644 } else {
4645 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4647 ctxt->sax->error(ctxt->userData,
4648 "parsing XML declaration: '?>' expected\n");
4649 ctxt->wellFormed = 0;
4650 ctxt->disableSAX = 1;
4651 MOVETO_ENDTAG(CUR_PTR);
4652 NEXT;
4653 }
4654}
4655
4656/*
4657 * xmlParseConditionalSections
4658 * @ctxt: an XML parser context
4659 *
4660 * [61] conditionalSect ::= includeSect | ignoreSect
4661 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4662 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4663 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4664 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4665 */
4666
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004667static void
Owen Taylor3473f882001-02-23 17:55:21 +00004668xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4669 SKIP(3);
4670 SKIP_BLANKS;
4671 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4672 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4673 (NXT(6) == 'E')) {
4674 SKIP(7);
4675 SKIP_BLANKS;
4676 if (RAW != '[') {
4677 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4679 ctxt->sax->error(ctxt->userData,
4680 "XML conditional section '[' expected\n");
4681 ctxt->wellFormed = 0;
4682 ctxt->disableSAX = 1;
4683 } else {
4684 NEXT;
4685 }
4686 if (xmlParserDebugEntities) {
4687 if ((ctxt->input != NULL) && (ctxt->input->filename))
4688 xmlGenericError(xmlGenericErrorContext,
4689 "%s(%d): ", ctxt->input->filename,
4690 ctxt->input->line);
4691 xmlGenericError(xmlGenericErrorContext,
4692 "Entering INCLUDE Conditional Section\n");
4693 }
4694
4695 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4696 (NXT(2) != '>'))) {
4697 const xmlChar *check = CUR_PTR;
4698 int cons = ctxt->input->consumed;
4699 int tok = ctxt->token;
4700
4701 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4702 xmlParseConditionalSections(ctxt);
4703 } else if (IS_BLANK(CUR)) {
4704 NEXT;
4705 } else if (RAW == '%') {
4706 xmlParsePEReference(ctxt);
4707 } else
4708 xmlParseMarkupDecl(ctxt);
4709
4710 /*
4711 * Pop-up of finished entities.
4712 */
4713 while ((RAW == 0) && (ctxt->inputNr > 1))
4714 xmlPopInput(ctxt);
4715
4716 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4717 (tok == ctxt->token)) {
4718 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4720 ctxt->sax->error(ctxt->userData,
4721 "Content error in the external subset\n");
4722 ctxt->wellFormed = 0;
4723 ctxt->disableSAX = 1;
4724 break;
4725 }
4726 }
4727 if (xmlParserDebugEntities) {
4728 if ((ctxt->input != NULL) && (ctxt->input->filename))
4729 xmlGenericError(xmlGenericErrorContext,
4730 "%s(%d): ", ctxt->input->filename,
4731 ctxt->input->line);
4732 xmlGenericError(xmlGenericErrorContext,
4733 "Leaving INCLUDE Conditional Section\n");
4734 }
4735
4736 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4737 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4738 int state;
4739 int instate;
4740 int depth = 0;
4741
4742 SKIP(6);
4743 SKIP_BLANKS;
4744 if (RAW != '[') {
4745 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4747 ctxt->sax->error(ctxt->userData,
4748 "XML conditional section '[' expected\n");
4749 ctxt->wellFormed = 0;
4750 ctxt->disableSAX = 1;
4751 } else {
4752 NEXT;
4753 }
4754 if (xmlParserDebugEntities) {
4755 if ((ctxt->input != NULL) && (ctxt->input->filename))
4756 xmlGenericError(xmlGenericErrorContext,
4757 "%s(%d): ", ctxt->input->filename,
4758 ctxt->input->line);
4759 xmlGenericError(xmlGenericErrorContext,
4760 "Entering IGNORE Conditional Section\n");
4761 }
4762
4763 /*
4764 * Parse up to the end of the conditionnal section
4765 * But disable SAX event generating DTD building in the meantime
4766 */
4767 state = ctxt->disableSAX;
4768 instate = ctxt->instate;
4769 ctxt->disableSAX = 1;
4770 ctxt->instate = XML_PARSER_IGNORE;
4771
4772 while (depth >= 0) {
4773 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4774 depth++;
4775 SKIP(3);
4776 continue;
4777 }
4778 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4779 if (--depth >= 0) SKIP(3);
4780 continue;
4781 }
4782 NEXT;
4783 continue;
4784 }
4785
4786 ctxt->disableSAX = state;
4787 ctxt->instate = instate;
4788
4789 if (xmlParserDebugEntities) {
4790 if ((ctxt->input != NULL) && (ctxt->input->filename))
4791 xmlGenericError(xmlGenericErrorContext,
4792 "%s(%d): ", ctxt->input->filename,
4793 ctxt->input->line);
4794 xmlGenericError(xmlGenericErrorContext,
4795 "Leaving IGNORE Conditional Section\n");
4796 }
4797
4798 } else {
4799 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4801 ctxt->sax->error(ctxt->userData,
4802 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4803 ctxt->wellFormed = 0;
4804 ctxt->disableSAX = 1;
4805 }
4806
4807 if (RAW == 0)
4808 SHRINK;
4809
4810 if (RAW == 0) {
4811 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4813 ctxt->sax->error(ctxt->userData,
4814 "XML conditional section not closed\n");
4815 ctxt->wellFormed = 0;
4816 ctxt->disableSAX = 1;
4817 } else {
4818 SKIP(3);
4819 }
4820}
4821
4822/**
4823 * xmlParseExternalSubset:
4824 * @ctxt: an XML parser context
4825 * @ExternalID: the external identifier
4826 * @SystemID: the system identifier (or URL)
4827 *
4828 * parse Markup declarations from an external subset
4829 *
4830 * [30] extSubset ::= textDecl? extSubsetDecl
4831 *
4832 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4833 */
4834void
4835xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4836 const xmlChar *SystemID) {
4837 GROW;
4838 if ((RAW == '<') && (NXT(1) == '?') &&
4839 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4840 (NXT(4) == 'l')) {
4841 xmlParseTextDecl(ctxt);
4842 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4843 /*
4844 * The XML REC instructs us to stop parsing right here
4845 */
4846 ctxt->instate = XML_PARSER_EOF;
4847 return;
4848 }
4849 }
4850 if (ctxt->myDoc == NULL) {
4851 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4852 }
4853 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4854 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4855
4856 ctxt->instate = XML_PARSER_DTD;
4857 ctxt->external = 1;
4858 while (((RAW == '<') && (NXT(1) == '?')) ||
4859 ((RAW == '<') && (NXT(1) == '!')) ||
4860 IS_BLANK(CUR)) {
4861 const xmlChar *check = CUR_PTR;
4862 int cons = ctxt->input->consumed;
4863 int tok = ctxt->token;
4864
4865 GROW;
4866 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4867 xmlParseConditionalSections(ctxt);
4868 } else if (IS_BLANK(CUR)) {
4869 NEXT;
4870 } else if (RAW == '%') {
4871 xmlParsePEReference(ctxt);
4872 } else
4873 xmlParseMarkupDecl(ctxt);
4874
4875 /*
4876 * Pop-up of finished entities.
4877 */
4878 while ((RAW == 0) && (ctxt->inputNr > 1))
4879 xmlPopInput(ctxt);
4880
4881 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4882 (tok == ctxt->token)) {
4883 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4884 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4885 ctxt->sax->error(ctxt->userData,
4886 "Content error in the external subset\n");
4887 ctxt->wellFormed = 0;
4888 ctxt->disableSAX = 1;
4889 break;
4890 }
4891 }
4892
4893 if (RAW != 0) {
4894 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4896 ctxt->sax->error(ctxt->userData,
4897 "Extra content at the end of the document\n");
4898 ctxt->wellFormed = 0;
4899 ctxt->disableSAX = 1;
4900 }
4901
4902}
4903
4904/**
4905 * xmlParseReference:
4906 * @ctxt: an XML parser context
4907 *
4908 * parse and handle entity references in content, depending on the SAX
4909 * interface, this may end-up in a call to character() if this is a
4910 * CharRef, a predefined entity, if there is no reference() callback.
4911 * or if the parser was asked to switch to that mode.
4912 *
4913 * [67] Reference ::= EntityRef | CharRef
4914 */
4915void
4916xmlParseReference(xmlParserCtxtPtr ctxt) {
4917 xmlEntityPtr ent;
4918 xmlChar *val;
4919 if (RAW != '&') return;
4920
4921 if (NXT(1) == '#') {
4922 int i = 0;
4923 xmlChar out[10];
4924 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004925 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004926
4927 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4928 /*
4929 * So we are using non-UTF-8 buffers
4930 * Check that the char fit on 8bits, if not
4931 * generate a CharRef.
4932 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004933 if (value <= 0xFF) {
4934 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004935 out[1] = 0;
4936 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4937 (!ctxt->disableSAX))
4938 ctxt->sax->characters(ctxt->userData, out, 1);
4939 } else {
4940 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004941 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004942 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004943 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004944 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4945 (!ctxt->disableSAX))
4946 ctxt->sax->reference(ctxt->userData, out);
4947 }
4948 } else {
4949 /*
4950 * Just encode the value in UTF-8
4951 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004952 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00004953 out[i] = 0;
4954 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4955 (!ctxt->disableSAX))
4956 ctxt->sax->characters(ctxt->userData, out, i);
4957 }
4958 } else {
4959 ent = xmlParseEntityRef(ctxt);
4960 if (ent == NULL) return;
4961 if ((ent->name != NULL) &&
4962 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4963 xmlNodePtr list = NULL;
4964 int ret;
4965
4966
4967 /*
4968 * The first reference to the entity trigger a parsing phase
4969 * where the ent->children is filled with the result from
4970 * the parsing.
4971 */
4972 if (ent->children == NULL) {
4973 xmlChar *value;
4974 value = ent->content;
4975
4976 /*
4977 * Check that this entity is well formed
4978 */
4979 if ((value != NULL) &&
4980 (value[1] == 0) && (value[0] == '<') &&
4981 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4982 /*
4983 * DONE: get definite answer on this !!!
4984 * Lots of entity decls are used to declare a single
4985 * char
4986 * <!ENTITY lt "<">
4987 * Which seems to be valid since
4988 * 2.4: The ampersand character (&) and the left angle
4989 * bracket (<) may appear in their literal form only
4990 * when used ... They are also legal within the literal
4991 * entity value of an internal entity declaration;i
4992 * see "4.3.2 Well-Formed Parsed Entities".
4993 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4994 * Looking at the OASIS test suite and James Clark
4995 * tests, this is broken. However the XML REC uses
4996 * it. Is the XML REC not well-formed ????
4997 * This is a hack to avoid this problem
4998 *
4999 * ANSWER: since lt gt amp .. are already defined,
5000 * this is a redefinition and hence the fact that the
5001 * contentis not well balanced is not a Wf error, this
5002 * is lousy but acceptable.
5003 */
5004 list = xmlNewDocText(ctxt->myDoc, value);
5005 if (list != NULL) {
5006 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5007 (ent->children == NULL)) {
5008 ent->children = list;
5009 ent->last = list;
5010 list->parent = (xmlNodePtr) ent;
5011 } else {
5012 xmlFreeNodeList(list);
5013 }
5014 } else if (list != NULL) {
5015 xmlFreeNodeList(list);
5016 }
5017 } else {
5018 /*
5019 * 4.3.2: An internal general parsed entity is well-formed
5020 * if its replacement text matches the production labeled
5021 * content.
5022 */
5023 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5024 ctxt->depth++;
5025 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5026 ctxt->sax, NULL, ctxt->depth,
5027 value, &list);
5028 ctxt->depth--;
5029 } else if (ent->etype ==
5030 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5031 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005032 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005033 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005034 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005035 ctxt->depth--;
5036 } else {
5037 ret = -1;
5038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5039 ctxt->sax->error(ctxt->userData,
5040 "Internal: invalid entity type\n");
5041 }
5042 if (ret == XML_ERR_ENTITY_LOOP) {
5043 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5045 ctxt->sax->error(ctxt->userData,
5046 "Detected entity reference loop\n");
5047 ctxt->wellFormed = 0;
5048 ctxt->disableSAX = 1;
5049 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005050 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5051 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005052 (ent->children == NULL)) {
5053 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005054 if (ctxt->replaceEntities) {
5055 /*
5056 * Prune it directly in the generated document
5057 * except for single text nodes.
5058 */
5059 if ((list->type == XML_TEXT_NODE) &&
5060 (list->next == NULL)) {
5061 list->parent = (xmlNodePtr) ent;
5062 list = NULL;
5063 } else {
5064 while (list != NULL) {
5065 list->parent = (xmlNodePtr) ctxt->node;
5066 if (list->next == NULL)
5067 ent->last = list;
5068 list = list->next;
5069 }
5070 list = ent->children;
5071 }
5072 } else {
5073 while (list != NULL) {
5074 list->parent = (xmlNodePtr) ent;
5075 if (list->next == NULL)
5076 ent->last = list;
5077 list = list->next;
5078 }
Owen Taylor3473f882001-02-23 17:55:21 +00005079 }
5080 } else {
5081 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005082 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005083 }
5084 } else if (ret > 0) {
5085 ctxt->errNo = ret;
5086 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5087 ctxt->sax->error(ctxt->userData,
5088 "Entity value required\n");
5089 ctxt->wellFormed = 0;
5090 ctxt->disableSAX = 1;
5091 } else if (list != NULL) {
5092 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005093 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005094 }
5095 }
5096 }
5097 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5098 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5099 /*
5100 * Create a node.
5101 */
5102 ctxt->sax->reference(ctxt->userData, ent->name);
5103 return;
5104 } else if (ctxt->replaceEntities) {
5105 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5106 /*
5107 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005108 * a simple tree copy for all references except the first
5109 * In the first occurence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005110 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005111 if (list == NULL) {
5112 xmlNodePtr new, cur;
5113 cur = ent->children;
5114 while (cur != NULL) {
5115 new = xmlCopyNode(cur, 1);
5116 xmlAddChild(ctxt->node, new);
5117 if (cur == ent->last)
5118 break;
5119 cur = cur->next;
5120 }
5121 } else {
5122 /*
5123 * the name change is to avoid coalescing of the
5124 * node with a prossible previous text one which
5125 * would make ent->children a dandling pointer
5126 */
5127 if (ent->children->type == XML_TEXT_NODE)
5128 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5129 if ((ent->last != ent->children) &&
5130 (ent->last->type == XML_TEXT_NODE))
5131 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5132 xmlAddChildList(ctxt->node, ent->children);
5133 }
5134
Owen Taylor3473f882001-02-23 17:55:21 +00005135 /*
5136 * This is to avoid a nasty side effect, see
5137 * characters() in SAX.c
5138 */
5139 ctxt->nodemem = 0;
5140 ctxt->nodelen = 0;
5141 return;
5142 } else {
5143 /*
5144 * Probably running in SAX mode
5145 */
5146 xmlParserInputPtr input;
5147
5148 input = xmlNewEntityInputStream(ctxt, ent);
5149 xmlPushInput(ctxt, input);
5150 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5151 (RAW == '<') && (NXT(1) == '?') &&
5152 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5153 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5154 xmlParseTextDecl(ctxt);
5155 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5156 /*
5157 * The XML REC instructs us to stop parsing right here
5158 */
5159 ctxt->instate = XML_PARSER_EOF;
5160 return;
5161 }
5162 if (input->standalone == 1) {
5163 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5164 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5165 ctxt->sax->error(ctxt->userData,
5166 "external parsed entities cannot be standalone\n");
5167 ctxt->wellFormed = 0;
5168 ctxt->disableSAX = 1;
5169 }
5170 }
5171 return;
5172 }
5173 }
5174 } else {
5175 val = ent->content;
5176 if (val == NULL) return;
5177 /*
5178 * inline the entity.
5179 */
5180 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5181 (!ctxt->disableSAX))
5182 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5183 }
5184 }
5185}
5186
5187/**
5188 * xmlParseEntityRef:
5189 * @ctxt: an XML parser context
5190 *
5191 * parse ENTITY references declarations
5192 *
5193 * [68] EntityRef ::= '&' Name ';'
5194 *
5195 * [ WFC: Entity Declared ]
5196 * In a document without any DTD, a document with only an internal DTD
5197 * subset which contains no parameter entity references, or a document
5198 * with "standalone='yes'", the Name given in the entity reference
5199 * must match that in an entity declaration, except that well-formed
5200 * documents need not declare any of the following entities: amp, lt,
5201 * gt, apos, quot. The declaration of a parameter entity must precede
5202 * any reference to it. Similarly, the declaration of a general entity
5203 * must precede any reference to it which appears in a default value in an
5204 * attribute-list declaration. Note that if entities are declared in the
5205 * external subset or in external parameter entities, a non-validating
5206 * processor is not obligated to read and process their declarations;
5207 * for such documents, the rule that an entity must be declared is a
5208 * well-formedness constraint only if standalone='yes'.
5209 *
5210 * [ WFC: Parsed Entity ]
5211 * An entity reference must not contain the name of an unparsed entity
5212 *
5213 * Returns the xmlEntityPtr if found, or NULL otherwise.
5214 */
5215xmlEntityPtr
5216xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5217 xmlChar *name;
5218 xmlEntityPtr ent = NULL;
5219
5220 GROW;
5221
5222 if (RAW == '&') {
5223 NEXT;
5224 name = xmlParseName(ctxt);
5225 if (name == NULL) {
5226 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5228 ctxt->sax->error(ctxt->userData,
5229 "xmlParseEntityRef: no name\n");
5230 ctxt->wellFormed = 0;
5231 ctxt->disableSAX = 1;
5232 } else {
5233 if (RAW == ';') {
5234 NEXT;
5235 /*
5236 * Ask first SAX for entity resolution, otherwise try the
5237 * predefined set.
5238 */
5239 if (ctxt->sax != NULL) {
5240 if (ctxt->sax->getEntity != NULL)
5241 ent = ctxt->sax->getEntity(ctxt->userData, name);
5242 if (ent == NULL)
5243 ent = xmlGetPredefinedEntity(name);
5244 }
5245 /*
5246 * [ WFC: Entity Declared ]
5247 * In a document without any DTD, a document with only an
5248 * internal DTD subset which contains no parameter entity
5249 * references, or a document with "standalone='yes'", the
5250 * Name given in the entity reference must match that in an
5251 * entity declaration, except that well-formed documents
5252 * need not declare any of the following entities: amp, lt,
5253 * gt, apos, quot.
5254 * The declaration of a parameter entity must precede any
5255 * reference to it.
5256 * Similarly, the declaration of a general entity must
5257 * precede any reference to it which appears in a default
5258 * value in an attribute-list declaration. Note that if
5259 * entities are declared in the external subset or in
5260 * external parameter entities, a non-validating processor
5261 * is not obligated to read and process their declarations;
5262 * for such documents, the rule that an entity must be
5263 * declared is a well-formedness constraint only if
5264 * standalone='yes'.
5265 */
5266 if (ent == NULL) {
5267 if ((ctxt->standalone == 1) ||
5268 ((ctxt->hasExternalSubset == 0) &&
5269 (ctxt->hasPErefs == 0))) {
5270 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5272 ctxt->sax->error(ctxt->userData,
5273 "Entity '%s' not defined\n", name);
5274 ctxt->wellFormed = 0;
5275 ctxt->disableSAX = 1;
5276 } else {
5277 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5278 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005279 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005280 "Entity '%s' not defined\n", name);
5281 }
5282 }
5283
5284 /*
5285 * [ WFC: Parsed Entity ]
5286 * An entity reference must not contain the name of an
5287 * unparsed entity
5288 */
5289 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5290 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5292 ctxt->sax->error(ctxt->userData,
5293 "Entity reference to unparsed entity %s\n", name);
5294 ctxt->wellFormed = 0;
5295 ctxt->disableSAX = 1;
5296 }
5297
5298 /*
5299 * [ WFC: No External Entity References ]
5300 * Attribute values cannot contain direct or indirect
5301 * entity references to external entities.
5302 */
5303 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5304 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5305 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5307 ctxt->sax->error(ctxt->userData,
5308 "Attribute references external entity '%s'\n", name);
5309 ctxt->wellFormed = 0;
5310 ctxt->disableSAX = 1;
5311 }
5312 /*
5313 * [ WFC: No < in Attribute Values ]
5314 * The replacement text of any entity referred to directly or
5315 * indirectly in an attribute value (other than "&lt;") must
5316 * not contain a <.
5317 */
5318 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5319 (ent != NULL) &&
5320 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5321 (ent->content != NULL) &&
5322 (xmlStrchr(ent->content, '<'))) {
5323 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5325 ctxt->sax->error(ctxt->userData,
5326 "'<' in entity '%s' is not allowed in attributes values\n", name);
5327 ctxt->wellFormed = 0;
5328 ctxt->disableSAX = 1;
5329 }
5330
5331 /*
5332 * Internal check, no parameter entities here ...
5333 */
5334 else {
5335 switch (ent->etype) {
5336 case XML_INTERNAL_PARAMETER_ENTITY:
5337 case XML_EXTERNAL_PARAMETER_ENTITY:
5338 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5340 ctxt->sax->error(ctxt->userData,
5341 "Attempt to reference the parameter entity '%s'\n", name);
5342 ctxt->wellFormed = 0;
5343 ctxt->disableSAX = 1;
5344 break;
5345 default:
5346 break;
5347 }
5348 }
5349
5350 /*
5351 * [ WFC: No Recursion ]
5352 * A parsed entity must not contain a recursive reference
5353 * to itself, either directly or indirectly.
5354 * Done somewhere else
5355 */
5356
5357 } else {
5358 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5360 ctxt->sax->error(ctxt->userData,
5361 "xmlParseEntityRef: expecting ';'\n");
5362 ctxt->wellFormed = 0;
5363 ctxt->disableSAX = 1;
5364 }
5365 xmlFree(name);
5366 }
5367 }
5368 return(ent);
5369}
5370
5371/**
5372 * xmlParseStringEntityRef:
5373 * @ctxt: an XML parser context
5374 * @str: a pointer to an index in the string
5375 *
5376 * parse ENTITY references declarations, but this version parses it from
5377 * a string value.
5378 *
5379 * [68] EntityRef ::= '&' Name ';'
5380 *
5381 * [ WFC: Entity Declared ]
5382 * In a document without any DTD, a document with only an internal DTD
5383 * subset which contains no parameter entity references, or a document
5384 * with "standalone='yes'", the Name given in the entity reference
5385 * must match that in an entity declaration, except that well-formed
5386 * documents need not declare any of the following entities: amp, lt,
5387 * gt, apos, quot. The declaration of a parameter entity must precede
5388 * any reference to it. Similarly, the declaration of a general entity
5389 * must precede any reference to it which appears in a default value in an
5390 * attribute-list declaration. Note that if entities are declared in the
5391 * external subset or in external parameter entities, a non-validating
5392 * processor is not obligated to read and process their declarations;
5393 * for such documents, the rule that an entity must be declared is a
5394 * well-formedness constraint only if standalone='yes'.
5395 *
5396 * [ WFC: Parsed Entity ]
5397 * An entity reference must not contain the name of an unparsed entity
5398 *
5399 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5400 * is updated to the current location in the string.
5401 */
5402xmlEntityPtr
5403xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5404 xmlChar *name;
5405 const xmlChar *ptr;
5406 xmlChar cur;
5407 xmlEntityPtr ent = NULL;
5408
5409 if ((str == NULL) || (*str == NULL))
5410 return(NULL);
5411 ptr = *str;
5412 cur = *ptr;
5413 if (cur == '&') {
5414 ptr++;
5415 cur = *ptr;
5416 name = xmlParseStringName(ctxt, &ptr);
5417 if (name == NULL) {
5418 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5419 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5420 ctxt->sax->error(ctxt->userData,
5421 "xmlParseEntityRef: no name\n");
5422 ctxt->wellFormed = 0;
5423 ctxt->disableSAX = 1;
5424 } else {
5425 if (*ptr == ';') {
5426 ptr++;
5427 /*
5428 * Ask first SAX for entity resolution, otherwise try the
5429 * predefined set.
5430 */
5431 if (ctxt->sax != NULL) {
5432 if (ctxt->sax->getEntity != NULL)
5433 ent = ctxt->sax->getEntity(ctxt->userData, name);
5434 if (ent == NULL)
5435 ent = xmlGetPredefinedEntity(name);
5436 }
5437 /*
5438 * [ WFC: Entity Declared ]
5439 * In a document without any DTD, a document with only an
5440 * internal DTD subset which contains no parameter entity
5441 * references, or a document with "standalone='yes'", the
5442 * Name given in the entity reference must match that in an
5443 * entity declaration, except that well-formed documents
5444 * need not declare any of the following entities: amp, lt,
5445 * gt, apos, quot.
5446 * The declaration of a parameter entity must precede any
5447 * reference to it.
5448 * Similarly, the declaration of a general entity must
5449 * precede any reference to it which appears in a default
5450 * value in an attribute-list declaration. Note that if
5451 * entities are declared in the external subset or in
5452 * external parameter entities, a non-validating processor
5453 * is not obligated to read and process their declarations;
5454 * for such documents, the rule that an entity must be
5455 * declared is a well-formedness constraint only if
5456 * standalone='yes'.
5457 */
5458 if (ent == NULL) {
5459 if ((ctxt->standalone == 1) ||
5460 ((ctxt->hasExternalSubset == 0) &&
5461 (ctxt->hasPErefs == 0))) {
5462 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5463 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5464 ctxt->sax->error(ctxt->userData,
5465 "Entity '%s' not defined\n", name);
5466 ctxt->wellFormed = 0;
5467 ctxt->disableSAX = 1;
5468 } else {
5469 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5470 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5471 ctxt->sax->warning(ctxt->userData,
5472 "Entity '%s' not defined\n", name);
5473 }
5474 }
5475
5476 /*
5477 * [ WFC: Parsed Entity ]
5478 * An entity reference must not contain the name of an
5479 * unparsed entity
5480 */
5481 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5482 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5484 ctxt->sax->error(ctxt->userData,
5485 "Entity reference to unparsed entity %s\n", name);
5486 ctxt->wellFormed = 0;
5487 ctxt->disableSAX = 1;
5488 }
5489
5490 /*
5491 * [ WFC: No External Entity References ]
5492 * Attribute values cannot contain direct or indirect
5493 * entity references to external entities.
5494 */
5495 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5496 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5497 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5499 ctxt->sax->error(ctxt->userData,
5500 "Attribute references external entity '%s'\n", name);
5501 ctxt->wellFormed = 0;
5502 ctxt->disableSAX = 1;
5503 }
5504 /*
5505 * [ WFC: No < in Attribute Values ]
5506 * The replacement text of any entity referred to directly or
5507 * indirectly in an attribute value (other than "&lt;") must
5508 * not contain a <.
5509 */
5510 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5511 (ent != NULL) &&
5512 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5513 (ent->content != NULL) &&
5514 (xmlStrchr(ent->content, '<'))) {
5515 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5517 ctxt->sax->error(ctxt->userData,
5518 "'<' in entity '%s' is not allowed in attributes values\n", name);
5519 ctxt->wellFormed = 0;
5520 ctxt->disableSAX = 1;
5521 }
5522
5523 /*
5524 * Internal check, no parameter entities here ...
5525 */
5526 else {
5527 switch (ent->etype) {
5528 case XML_INTERNAL_PARAMETER_ENTITY:
5529 case XML_EXTERNAL_PARAMETER_ENTITY:
5530 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5532 ctxt->sax->error(ctxt->userData,
5533 "Attempt to reference the parameter entity '%s'\n", name);
5534 ctxt->wellFormed = 0;
5535 ctxt->disableSAX = 1;
5536 break;
5537 default:
5538 break;
5539 }
5540 }
5541
5542 /*
5543 * [ WFC: No Recursion ]
5544 * A parsed entity must not contain a recursive reference
5545 * to itself, either directly or indirectly.
5546 * Done somewhwere else
5547 */
5548
5549 } else {
5550 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5552 ctxt->sax->error(ctxt->userData,
5553 "xmlParseEntityRef: expecting ';'\n");
5554 ctxt->wellFormed = 0;
5555 ctxt->disableSAX = 1;
5556 }
5557 xmlFree(name);
5558 }
5559 }
5560 *str = ptr;
5561 return(ent);
5562}
5563
5564/**
5565 * xmlParsePEReference:
5566 * @ctxt: an XML parser context
5567 *
5568 * parse PEReference declarations
5569 * The entity content is handled directly by pushing it's content as
5570 * a new input stream.
5571 *
5572 * [69] PEReference ::= '%' Name ';'
5573 *
5574 * [ WFC: No Recursion ]
5575 * A parsed entity must not contain a recursive
5576 * reference to itself, either directly or indirectly.
5577 *
5578 * [ WFC: Entity Declared ]
5579 * In a document without any DTD, a document with only an internal DTD
5580 * subset which contains no parameter entity references, or a document
5581 * with "standalone='yes'", ... ... The declaration of a parameter
5582 * entity must precede any reference to it...
5583 *
5584 * [ VC: Entity Declared ]
5585 * In a document with an external subset or external parameter entities
5586 * with "standalone='no'", ... ... The declaration of a parameter entity
5587 * must precede any reference to it...
5588 *
5589 * [ WFC: In DTD ]
5590 * Parameter-entity references may only appear in the DTD.
5591 * NOTE: misleading but this is handled.
5592 */
5593void
5594xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5595 xmlChar *name;
5596 xmlEntityPtr entity = NULL;
5597 xmlParserInputPtr input;
5598
5599 if (RAW == '%') {
5600 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005601 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005602 if (name == NULL) {
5603 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5605 ctxt->sax->error(ctxt->userData,
5606 "xmlParsePEReference: no name\n");
5607 ctxt->wellFormed = 0;
5608 ctxt->disableSAX = 1;
5609 } else {
5610 if (RAW == ';') {
5611 NEXT;
5612 if ((ctxt->sax != NULL) &&
5613 (ctxt->sax->getParameterEntity != NULL))
5614 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5615 name);
5616 if (entity == NULL) {
5617 /*
5618 * [ WFC: Entity Declared ]
5619 * In a document without any DTD, a document with only an
5620 * internal DTD subset which contains no parameter entity
5621 * references, or a document with "standalone='yes'", ...
5622 * ... The declaration of a parameter entity must precede
5623 * any reference to it...
5624 */
5625 if ((ctxt->standalone == 1) ||
5626 ((ctxt->hasExternalSubset == 0) &&
5627 (ctxt->hasPErefs == 0))) {
5628 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5629 if ((!ctxt->disableSAX) &&
5630 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5631 ctxt->sax->error(ctxt->userData,
5632 "PEReference: %%%s; not found\n", name);
5633 ctxt->wellFormed = 0;
5634 ctxt->disableSAX = 1;
5635 } else {
5636 /*
5637 * [ VC: Entity Declared ]
5638 * In a document with an external subset or external
5639 * parameter entities with "standalone='no'", ...
5640 * ... The declaration of a parameter entity must precede
5641 * any reference to it...
5642 */
5643 if ((!ctxt->disableSAX) &&
5644 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5645 ctxt->sax->warning(ctxt->userData,
5646 "PEReference: %%%s; not found\n", name);
5647 ctxt->valid = 0;
5648 }
5649 } else {
5650 /*
5651 * Internal checking in case the entity quest barfed
5652 */
5653 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5654 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5655 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5656 ctxt->sax->warning(ctxt->userData,
5657 "Internal: %%%s; is not a parameter entity\n", name);
5658 } else {
5659 /*
5660 * TODO !!!
5661 * handle the extra spaces added before and after
5662 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5663 */
5664 input = xmlNewEntityInputStream(ctxt, entity);
5665 xmlPushInput(ctxt, input);
5666 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5667 (RAW == '<') && (NXT(1) == '?') &&
5668 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5669 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5670 xmlParseTextDecl(ctxt);
5671 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5672 /*
5673 * The XML REC instructs us to stop parsing
5674 * right here
5675 */
5676 ctxt->instate = XML_PARSER_EOF;
5677 xmlFree(name);
5678 return;
5679 }
5680 }
5681 if (ctxt->token == 0)
5682 ctxt->token = ' ';
5683 }
5684 }
5685 ctxt->hasPErefs = 1;
5686 } else {
5687 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5689 ctxt->sax->error(ctxt->userData,
5690 "xmlParsePEReference: expecting ';'\n");
5691 ctxt->wellFormed = 0;
5692 ctxt->disableSAX = 1;
5693 }
5694 xmlFree(name);
5695 }
5696 }
5697}
5698
5699/**
5700 * xmlParseStringPEReference:
5701 * @ctxt: an XML parser context
5702 * @str: a pointer to an index in the string
5703 *
5704 * parse PEReference declarations
5705 *
5706 * [69] PEReference ::= '%' Name ';'
5707 *
5708 * [ WFC: No Recursion ]
5709 * A parsed entity must not contain a recursive
5710 * reference to itself, either directly or indirectly.
5711 *
5712 * [ WFC: Entity Declared ]
5713 * In a document without any DTD, a document with only an internal DTD
5714 * subset which contains no parameter entity references, or a document
5715 * with "standalone='yes'", ... ... The declaration of a parameter
5716 * entity must precede any reference to it...
5717 *
5718 * [ VC: Entity Declared ]
5719 * In a document with an external subset or external parameter entities
5720 * with "standalone='no'", ... ... The declaration of a parameter entity
5721 * must precede any reference to it...
5722 *
5723 * [ WFC: In DTD ]
5724 * Parameter-entity references may only appear in the DTD.
5725 * NOTE: misleading but this is handled.
5726 *
5727 * Returns the string of the entity content.
5728 * str is updated to the current value of the index
5729 */
5730xmlEntityPtr
5731xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5732 const xmlChar *ptr;
5733 xmlChar cur;
5734 xmlChar *name;
5735 xmlEntityPtr entity = NULL;
5736
5737 if ((str == NULL) || (*str == NULL)) return(NULL);
5738 ptr = *str;
5739 cur = *ptr;
5740 if (cur == '%') {
5741 ptr++;
5742 cur = *ptr;
5743 name = xmlParseStringName(ctxt, &ptr);
5744 if (name == NULL) {
5745 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5747 ctxt->sax->error(ctxt->userData,
5748 "xmlParseStringPEReference: no name\n");
5749 ctxt->wellFormed = 0;
5750 ctxt->disableSAX = 1;
5751 } else {
5752 cur = *ptr;
5753 if (cur == ';') {
5754 ptr++;
5755 cur = *ptr;
5756 if ((ctxt->sax != NULL) &&
5757 (ctxt->sax->getParameterEntity != NULL))
5758 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5759 name);
5760 if (entity == NULL) {
5761 /*
5762 * [ WFC: Entity Declared ]
5763 * In a document without any DTD, a document with only an
5764 * internal DTD subset which contains no parameter entity
5765 * references, or a document with "standalone='yes'", ...
5766 * ... The declaration of a parameter entity must precede
5767 * any reference to it...
5768 */
5769 if ((ctxt->standalone == 1) ||
5770 ((ctxt->hasExternalSubset == 0) &&
5771 (ctxt->hasPErefs == 0))) {
5772 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5774 ctxt->sax->error(ctxt->userData,
5775 "PEReference: %%%s; not found\n", name);
5776 ctxt->wellFormed = 0;
5777 ctxt->disableSAX = 1;
5778 } else {
5779 /*
5780 * [ VC: Entity Declared ]
5781 * In a document with an external subset or external
5782 * parameter entities with "standalone='no'", ...
5783 * ... The declaration of a parameter entity must
5784 * precede any reference to it...
5785 */
5786 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5787 ctxt->sax->warning(ctxt->userData,
5788 "PEReference: %%%s; not found\n", name);
5789 ctxt->valid = 0;
5790 }
5791 } else {
5792 /*
5793 * Internal checking in case the entity quest barfed
5794 */
5795 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5796 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5797 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5798 ctxt->sax->warning(ctxt->userData,
5799 "Internal: %%%s; is not a parameter entity\n", name);
5800 }
5801 }
5802 ctxt->hasPErefs = 1;
5803 } else {
5804 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5806 ctxt->sax->error(ctxt->userData,
5807 "xmlParseStringPEReference: expecting ';'\n");
5808 ctxt->wellFormed = 0;
5809 ctxt->disableSAX = 1;
5810 }
5811 xmlFree(name);
5812 }
5813 }
5814 *str = ptr;
5815 return(entity);
5816}
5817
5818/**
5819 * xmlParseDocTypeDecl:
5820 * @ctxt: an XML parser context
5821 *
5822 * parse a DOCTYPE declaration
5823 *
5824 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5825 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5826 *
5827 * [ VC: Root Element Type ]
5828 * The Name in the document type declaration must match the element
5829 * type of the root element.
5830 */
5831
5832void
5833xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5834 xmlChar *name = NULL;
5835 xmlChar *ExternalID = NULL;
5836 xmlChar *URI = NULL;
5837
5838 /*
5839 * We know that '<!DOCTYPE' has been detected.
5840 */
5841 SKIP(9);
5842
5843 SKIP_BLANKS;
5844
5845 /*
5846 * Parse the DOCTYPE name.
5847 */
5848 name = xmlParseName(ctxt);
5849 if (name == NULL) {
5850 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5852 ctxt->sax->error(ctxt->userData,
5853 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5854 ctxt->wellFormed = 0;
5855 ctxt->disableSAX = 1;
5856 }
5857 ctxt->intSubName = name;
5858
5859 SKIP_BLANKS;
5860
5861 /*
5862 * Check for SystemID and ExternalID
5863 */
5864 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5865
5866 if ((URI != NULL) || (ExternalID != NULL)) {
5867 ctxt->hasExternalSubset = 1;
5868 }
5869 ctxt->extSubURI = URI;
5870 ctxt->extSubSystem = ExternalID;
5871
5872 SKIP_BLANKS;
5873
5874 /*
5875 * Create and update the internal subset.
5876 */
5877 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5878 (!ctxt->disableSAX))
5879 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5880
5881 /*
5882 * Is there any internal subset declarations ?
5883 * they are handled separately in xmlParseInternalSubset()
5884 */
5885 if (RAW == '[')
5886 return;
5887
5888 /*
5889 * We should be at the end of the DOCTYPE declaration.
5890 */
5891 if (RAW != '>') {
5892 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5893 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5894 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5895 ctxt->wellFormed = 0;
5896 ctxt->disableSAX = 1;
5897 }
5898 NEXT;
5899}
5900
5901/**
5902 * xmlParseInternalsubset:
5903 * @ctxt: an XML parser context
5904 *
5905 * parse the internal subset declaration
5906 *
5907 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5908 */
5909
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005910static void
Owen Taylor3473f882001-02-23 17:55:21 +00005911xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5912 /*
5913 * Is there any DTD definition ?
5914 */
5915 if (RAW == '[') {
5916 ctxt->instate = XML_PARSER_DTD;
5917 NEXT;
5918 /*
5919 * Parse the succession of Markup declarations and
5920 * PEReferences.
5921 * Subsequence (markupdecl | PEReference | S)*
5922 */
5923 while (RAW != ']') {
5924 const xmlChar *check = CUR_PTR;
5925 int cons = ctxt->input->consumed;
5926
5927 SKIP_BLANKS;
5928 xmlParseMarkupDecl(ctxt);
5929 xmlParsePEReference(ctxt);
5930
5931 /*
5932 * Pop-up of finished entities.
5933 */
5934 while ((RAW == 0) && (ctxt->inputNr > 1))
5935 xmlPopInput(ctxt);
5936
5937 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5938 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5940 ctxt->sax->error(ctxt->userData,
5941 "xmlParseInternalSubset: error detected in Markup declaration\n");
5942 ctxt->wellFormed = 0;
5943 ctxt->disableSAX = 1;
5944 break;
5945 }
5946 }
5947 if (RAW == ']') {
5948 NEXT;
5949 SKIP_BLANKS;
5950 }
5951 }
5952
5953 /*
5954 * We should be at the end of the DOCTYPE declaration.
5955 */
5956 if (RAW != '>') {
5957 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5959 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5960 ctxt->wellFormed = 0;
5961 ctxt->disableSAX = 1;
5962 }
5963 NEXT;
5964}
5965
5966/**
5967 * xmlParseAttribute:
5968 * @ctxt: an XML parser context
5969 * @value: a xmlChar ** used to store the value of the attribute
5970 *
5971 * parse an attribute
5972 *
5973 * [41] Attribute ::= Name Eq AttValue
5974 *
5975 * [ WFC: No External Entity References ]
5976 * Attribute values cannot contain direct or indirect entity references
5977 * to external entities.
5978 *
5979 * [ WFC: No < in Attribute Values ]
5980 * The replacement text of any entity referred to directly or indirectly in
5981 * an attribute value (other than "&lt;") must not contain a <.
5982 *
5983 * [ VC: Attribute Value Type ]
5984 * The attribute must have been declared; the value must be of the type
5985 * declared for it.
5986 *
5987 * [25] Eq ::= S? '=' S?
5988 *
5989 * With namespace:
5990 *
5991 * [NS 11] Attribute ::= QName Eq AttValue
5992 *
5993 * Also the case QName == xmlns:??? is handled independently as a namespace
5994 * definition.
5995 *
5996 * Returns the attribute name, and the value in *value.
5997 */
5998
5999xmlChar *
6000xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6001 xmlChar *name, *val;
6002
6003 *value = NULL;
6004 name = xmlParseName(ctxt);
6005 if (name == NULL) {
6006 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6008 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6009 ctxt->wellFormed = 0;
6010 ctxt->disableSAX = 1;
6011 return(NULL);
6012 }
6013
6014 /*
6015 * read the value
6016 */
6017 SKIP_BLANKS;
6018 if (RAW == '=') {
6019 NEXT;
6020 SKIP_BLANKS;
6021 val = xmlParseAttValue(ctxt);
6022 ctxt->instate = XML_PARSER_CONTENT;
6023 } else {
6024 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6026 ctxt->sax->error(ctxt->userData,
6027 "Specification mandate value for attribute %s\n", name);
6028 ctxt->wellFormed = 0;
6029 ctxt->disableSAX = 1;
6030 xmlFree(name);
6031 return(NULL);
6032 }
6033
6034 /*
6035 * Check that xml:lang conforms to the specification
6036 * No more registered as an error, just generate a warning now
6037 * since this was deprecated in XML second edition
6038 */
6039 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6040 if (!xmlCheckLanguageID(val)) {
6041 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6042 ctxt->sax->warning(ctxt->userData,
6043 "Malformed value for xml:lang : %s\n", val);
6044 }
6045 }
6046
6047 /*
6048 * Check that xml:space conforms to the specification
6049 */
6050 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6051 if (xmlStrEqual(val, BAD_CAST "default"))
6052 *(ctxt->space) = 0;
6053 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6054 *(ctxt->space) = 1;
6055 else {
6056 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6058 ctxt->sax->error(ctxt->userData,
6059"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6060 val);
6061 ctxt->wellFormed = 0;
6062 ctxt->disableSAX = 1;
6063 }
6064 }
6065
6066 *value = val;
6067 return(name);
6068}
6069
6070/**
6071 * xmlParseStartTag:
6072 * @ctxt: an XML parser context
6073 *
6074 * parse a start of tag either for rule element or
6075 * EmptyElement. In both case we don't parse the tag closing chars.
6076 *
6077 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6078 *
6079 * [ WFC: Unique Att Spec ]
6080 * No attribute name may appear more than once in the same start-tag or
6081 * empty-element tag.
6082 *
6083 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6084 *
6085 * [ WFC: Unique Att Spec ]
6086 * No attribute name may appear more than once in the same start-tag or
6087 * empty-element tag.
6088 *
6089 * With namespace:
6090 *
6091 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6092 *
6093 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6094 *
6095 * Returns the element name parsed
6096 */
6097
6098xmlChar *
6099xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6100 xmlChar *name;
6101 xmlChar *attname;
6102 xmlChar *attvalue;
6103 const xmlChar **atts = NULL;
6104 int nbatts = 0;
6105 int maxatts = 0;
6106 int i;
6107
6108 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006109 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006110
6111 name = xmlParseName(ctxt);
6112 if (name == NULL) {
6113 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6114 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6115 ctxt->sax->error(ctxt->userData,
6116 "xmlParseStartTag: invalid element name\n");
6117 ctxt->wellFormed = 0;
6118 ctxt->disableSAX = 1;
6119 return(NULL);
6120 }
6121
6122 /*
6123 * Now parse the attributes, it ends up with the ending
6124 *
6125 * (S Attribute)* S?
6126 */
6127 SKIP_BLANKS;
6128 GROW;
6129
Daniel Veillard21a0f912001-02-25 19:54:14 +00006130 while ((RAW != '>') &&
6131 ((RAW != '/') || (NXT(1) != '>')) &&
6132 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006133 const xmlChar *q = CUR_PTR;
6134 int cons = ctxt->input->consumed;
6135
6136 attname = xmlParseAttribute(ctxt, &attvalue);
6137 if ((attname != NULL) && (attvalue != NULL)) {
6138 /*
6139 * [ WFC: Unique Att Spec ]
6140 * No attribute name may appear more than once in the same
6141 * start-tag or empty-element tag.
6142 */
6143 for (i = 0; i < nbatts;i += 2) {
6144 if (xmlStrEqual(atts[i], attname)) {
6145 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6146 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6147 ctxt->sax->error(ctxt->userData,
6148 "Attribute %s redefined\n",
6149 attname);
6150 ctxt->wellFormed = 0;
6151 ctxt->disableSAX = 1;
6152 xmlFree(attname);
6153 xmlFree(attvalue);
6154 goto failed;
6155 }
6156 }
6157
6158 /*
6159 * Add the pair to atts
6160 */
6161 if (atts == NULL) {
6162 maxatts = 10;
6163 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6164 if (atts == NULL) {
6165 xmlGenericError(xmlGenericErrorContext,
6166 "malloc of %ld byte failed\n",
6167 maxatts * (long)sizeof(xmlChar *));
6168 return(NULL);
6169 }
6170 } else if (nbatts + 4 > maxatts) {
6171 maxatts *= 2;
6172 atts = (const xmlChar **) xmlRealloc((void *) atts,
6173 maxatts * sizeof(xmlChar *));
6174 if (atts == NULL) {
6175 xmlGenericError(xmlGenericErrorContext,
6176 "realloc of %ld byte failed\n",
6177 maxatts * (long)sizeof(xmlChar *));
6178 return(NULL);
6179 }
6180 }
6181 atts[nbatts++] = attname;
6182 atts[nbatts++] = attvalue;
6183 atts[nbatts] = NULL;
6184 atts[nbatts + 1] = NULL;
6185 } else {
6186 if (attname != NULL)
6187 xmlFree(attname);
6188 if (attvalue != NULL)
6189 xmlFree(attvalue);
6190 }
6191
6192failed:
6193
6194 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6195 break;
6196 if (!IS_BLANK(RAW)) {
6197 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6199 ctxt->sax->error(ctxt->userData,
6200 "attributes construct error\n");
6201 ctxt->wellFormed = 0;
6202 ctxt->disableSAX = 1;
6203 }
6204 SKIP_BLANKS;
6205 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6206 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6208 ctxt->sax->error(ctxt->userData,
6209 "xmlParseStartTag: problem parsing attributes\n");
6210 ctxt->wellFormed = 0;
6211 ctxt->disableSAX = 1;
6212 break;
6213 }
6214 GROW;
6215 }
6216
6217 /*
6218 * SAX: Start of Element !
6219 */
6220 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6221 (!ctxt->disableSAX))
6222 ctxt->sax->startElement(ctxt->userData, name, atts);
6223
6224 if (atts != NULL) {
6225 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6226 xmlFree((void *) atts);
6227 }
6228 return(name);
6229}
6230
6231/**
6232 * xmlParseEndTag:
6233 * @ctxt: an XML parser context
6234 *
6235 * parse an end of tag
6236 *
6237 * [42] ETag ::= '</' Name S? '>'
6238 *
6239 * With namespace
6240 *
6241 * [NS 9] ETag ::= '</' QName S? '>'
6242 */
6243
6244void
6245xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6246 xmlChar *name;
6247 xmlChar *oldname;
6248
6249 GROW;
6250 if ((RAW != '<') || (NXT(1) != '/')) {
6251 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6253 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6254 ctxt->wellFormed = 0;
6255 ctxt->disableSAX = 1;
6256 return;
6257 }
6258 SKIP(2);
6259
6260 name = xmlParseName(ctxt);
6261
6262 /*
6263 * We should definitely be at the ending "S? '>'" part
6264 */
6265 GROW;
6266 SKIP_BLANKS;
6267 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6268 ctxt->errNo = XML_ERR_GT_REQUIRED;
6269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6270 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6271 ctxt->wellFormed = 0;
6272 ctxt->disableSAX = 1;
6273 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006274 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006275
6276 /*
6277 * [ WFC: Element Type Match ]
6278 * The Name in an element's end-tag must match the element type in the
6279 * start-tag.
6280 *
6281 */
6282 if ((name == NULL) || (ctxt->name == NULL) ||
6283 (!xmlStrEqual(name, ctxt->name))) {
6284 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6286 if ((name != NULL) && (ctxt->name != NULL)) {
6287 ctxt->sax->error(ctxt->userData,
6288 "Opening and ending tag mismatch: %s and %s\n",
6289 ctxt->name, name);
6290 } else if (ctxt->name != NULL) {
6291 ctxt->sax->error(ctxt->userData,
6292 "Ending tag eror for: %s\n", ctxt->name);
6293 } else {
6294 ctxt->sax->error(ctxt->userData,
6295 "Ending tag error: internal error ???\n");
6296 }
6297
6298 }
6299 ctxt->wellFormed = 0;
6300 ctxt->disableSAX = 1;
6301 }
6302
6303 /*
6304 * SAX: End of Tag
6305 */
6306 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6307 (!ctxt->disableSAX))
6308 ctxt->sax->endElement(ctxt->userData, name);
6309
6310 if (name != NULL)
6311 xmlFree(name);
6312 oldname = namePop(ctxt);
6313 spacePop(ctxt);
6314 if (oldname != NULL) {
6315#ifdef DEBUG_STACK
6316 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6317#endif
6318 xmlFree(oldname);
6319 }
6320 return;
6321}
6322
6323/**
6324 * xmlParseCDSect:
6325 * @ctxt: an XML parser context
6326 *
6327 * Parse escaped pure raw content.
6328 *
6329 * [18] CDSect ::= CDStart CData CDEnd
6330 *
6331 * [19] CDStart ::= '<![CDATA['
6332 *
6333 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6334 *
6335 * [21] CDEnd ::= ']]>'
6336 */
6337void
6338xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6339 xmlChar *buf = NULL;
6340 int len = 0;
6341 int size = XML_PARSER_BUFFER_SIZE;
6342 int r, rl;
6343 int s, sl;
6344 int cur, l;
6345 int count = 0;
6346
6347 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6348 (NXT(2) == '[') && (NXT(3) == 'C') &&
6349 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6350 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6351 (NXT(8) == '[')) {
6352 SKIP(9);
6353 } else
6354 return;
6355
6356 ctxt->instate = XML_PARSER_CDATA_SECTION;
6357 r = CUR_CHAR(rl);
6358 if (!IS_CHAR(r)) {
6359 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6361 ctxt->sax->error(ctxt->userData,
6362 "CData section not finished\n");
6363 ctxt->wellFormed = 0;
6364 ctxt->disableSAX = 1;
6365 ctxt->instate = XML_PARSER_CONTENT;
6366 return;
6367 }
6368 NEXTL(rl);
6369 s = CUR_CHAR(sl);
6370 if (!IS_CHAR(s)) {
6371 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6373 ctxt->sax->error(ctxt->userData,
6374 "CData section not finished\n");
6375 ctxt->wellFormed = 0;
6376 ctxt->disableSAX = 1;
6377 ctxt->instate = XML_PARSER_CONTENT;
6378 return;
6379 }
6380 NEXTL(sl);
6381 cur = CUR_CHAR(l);
6382 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6383 if (buf == NULL) {
6384 xmlGenericError(xmlGenericErrorContext,
6385 "malloc of %d byte failed\n", size);
6386 return;
6387 }
6388 while (IS_CHAR(cur) &&
6389 ((r != ']') || (s != ']') || (cur != '>'))) {
6390 if (len + 5 >= size) {
6391 size *= 2;
6392 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6393 if (buf == NULL) {
6394 xmlGenericError(xmlGenericErrorContext,
6395 "realloc of %d byte failed\n", size);
6396 return;
6397 }
6398 }
6399 COPY_BUF(rl,buf,len,r);
6400 r = s;
6401 rl = sl;
6402 s = cur;
6403 sl = l;
6404 count++;
6405 if (count > 50) {
6406 GROW;
6407 count = 0;
6408 }
6409 NEXTL(l);
6410 cur = CUR_CHAR(l);
6411 }
6412 buf[len] = 0;
6413 ctxt->instate = XML_PARSER_CONTENT;
6414 if (cur != '>') {
6415 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6417 ctxt->sax->error(ctxt->userData,
6418 "CData section not finished\n%.50s\n", buf);
6419 ctxt->wellFormed = 0;
6420 ctxt->disableSAX = 1;
6421 xmlFree(buf);
6422 return;
6423 }
6424 NEXTL(l);
6425
6426 /*
6427 * Ok the buffer is to be consumed as cdata.
6428 */
6429 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6430 if (ctxt->sax->cdataBlock != NULL)
6431 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006432 else if (ctxt->sax->characters != NULL)
6433 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006434 }
6435 xmlFree(buf);
6436}
6437
6438/**
6439 * xmlParseContent:
6440 * @ctxt: an XML parser context
6441 *
6442 * Parse a content:
6443 *
6444 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6445 */
6446
6447void
6448xmlParseContent(xmlParserCtxtPtr ctxt) {
6449 GROW;
6450 while (((RAW != 0) || (ctxt->token != 0)) &&
6451 ((RAW != '<') || (NXT(1) != '/'))) {
6452 const xmlChar *test = CUR_PTR;
6453 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006454 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006455 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006456
6457 /*
6458 * Handle possible processed charrefs.
6459 */
6460 if (ctxt->token != 0) {
6461 xmlParseCharData(ctxt, 0);
6462 }
6463 /*
6464 * First case : a Processing Instruction.
6465 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006466 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006467 xmlParsePI(ctxt);
6468 }
6469
6470 /*
6471 * Second case : a CDSection
6472 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006473 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006474 (NXT(2) == '[') && (NXT(3) == 'C') &&
6475 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6476 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6477 (NXT(8) == '[')) {
6478 xmlParseCDSect(ctxt);
6479 }
6480
6481 /*
6482 * Third case : a comment
6483 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006484 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006485 (NXT(2) == '-') && (NXT(3) == '-')) {
6486 xmlParseComment(ctxt);
6487 ctxt->instate = XML_PARSER_CONTENT;
6488 }
6489
6490 /*
6491 * Fourth case : a sub-element.
6492 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006493 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006494 xmlParseElement(ctxt);
6495 }
6496
6497 /*
6498 * Fifth case : a reference. If if has not been resolved,
6499 * parsing returns it's Name, create the node
6500 */
6501
Daniel Veillard21a0f912001-02-25 19:54:14 +00006502 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006503 xmlParseReference(ctxt);
6504 }
6505
6506 /*
6507 * Last case, text. Note that References are handled directly.
6508 */
6509 else {
6510 xmlParseCharData(ctxt, 0);
6511 }
6512
6513 GROW;
6514 /*
6515 * Pop-up of finished entities.
6516 */
6517 while ((RAW == 0) && (ctxt->inputNr > 1))
6518 xmlPopInput(ctxt);
6519 SHRINK;
6520
6521 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6522 (tok == ctxt->token)) {
6523 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6525 ctxt->sax->error(ctxt->userData,
6526 "detected an error in element content\n");
6527 ctxt->wellFormed = 0;
6528 ctxt->disableSAX = 1;
6529 ctxt->instate = XML_PARSER_EOF;
6530 break;
6531 }
6532 }
6533}
6534
6535/**
6536 * xmlParseElement:
6537 * @ctxt: an XML parser context
6538 *
6539 * parse an XML element, this is highly recursive
6540 *
6541 * [39] element ::= EmptyElemTag | STag content ETag
6542 *
6543 * [ WFC: Element Type Match ]
6544 * The Name in an element's end-tag must match the element type in the
6545 * start-tag.
6546 *
6547 * [ VC: Element Valid ]
6548 * An element is valid if there is a declaration matching elementdecl
6549 * where the Name matches the element type and one of the following holds:
6550 * - The declaration matches EMPTY and the element has no content.
6551 * - The declaration matches children and the sequence of child elements
6552 * belongs to the language generated by the regular expression in the
6553 * content model, with optional white space (characters matching the
6554 * nonterminal S) between each pair of child elements.
6555 * - The declaration matches Mixed and the content consists of character
6556 * data and child elements whose types match names in the content model.
6557 * - The declaration matches ANY, and the types of any child elements have
6558 * been declared.
6559 */
6560
6561void
6562xmlParseElement(xmlParserCtxtPtr ctxt) {
6563 const xmlChar *openTag = CUR_PTR;
6564 xmlChar *name;
6565 xmlChar *oldname;
6566 xmlParserNodeInfo node_info;
6567 xmlNodePtr ret;
6568
6569 /* Capture start position */
6570 if (ctxt->record_info) {
6571 node_info.begin_pos = ctxt->input->consumed +
6572 (CUR_PTR - ctxt->input->base);
6573 node_info.begin_line = ctxt->input->line;
6574 }
6575
6576 if (ctxt->spaceNr == 0)
6577 spacePush(ctxt, -1);
6578 else
6579 spacePush(ctxt, *ctxt->space);
6580
6581 name = xmlParseStartTag(ctxt);
6582 if (name == NULL) {
6583 spacePop(ctxt);
6584 return;
6585 }
6586 namePush(ctxt, name);
6587 ret = ctxt->node;
6588
6589 /*
6590 * [ VC: Root Element Type ]
6591 * The Name in the document type declaration must match the element
6592 * type of the root element.
6593 */
6594 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6595 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6596 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6597
6598 /*
6599 * Check for an Empty Element.
6600 */
6601 if ((RAW == '/') && (NXT(1) == '>')) {
6602 SKIP(2);
6603 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6604 (!ctxt->disableSAX))
6605 ctxt->sax->endElement(ctxt->userData, name);
6606 oldname = namePop(ctxt);
6607 spacePop(ctxt);
6608 if (oldname != NULL) {
6609#ifdef DEBUG_STACK
6610 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6611#endif
6612 xmlFree(oldname);
6613 }
6614 if ( ret != NULL && ctxt->record_info ) {
6615 node_info.end_pos = ctxt->input->consumed +
6616 (CUR_PTR - ctxt->input->base);
6617 node_info.end_line = ctxt->input->line;
6618 node_info.node = ret;
6619 xmlParserAddNodeInfo(ctxt, &node_info);
6620 }
6621 return;
6622 }
6623 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006624 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006625 } else {
6626 ctxt->errNo = XML_ERR_GT_REQUIRED;
6627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6628 ctxt->sax->error(ctxt->userData,
6629 "Couldn't find end of Start Tag\n%.30s\n",
6630 openTag);
6631 ctxt->wellFormed = 0;
6632 ctxt->disableSAX = 1;
6633
6634 /*
6635 * end of parsing of this node.
6636 */
6637 nodePop(ctxt);
6638 oldname = namePop(ctxt);
6639 spacePop(ctxt);
6640 if (oldname != NULL) {
6641#ifdef DEBUG_STACK
6642 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6643#endif
6644 xmlFree(oldname);
6645 }
6646
6647 /*
6648 * Capture end position and add node
6649 */
6650 if ( ret != NULL && ctxt->record_info ) {
6651 node_info.end_pos = ctxt->input->consumed +
6652 (CUR_PTR - ctxt->input->base);
6653 node_info.end_line = ctxt->input->line;
6654 node_info.node = ret;
6655 xmlParserAddNodeInfo(ctxt, &node_info);
6656 }
6657 return;
6658 }
6659
6660 /*
6661 * Parse the content of the element:
6662 */
6663 xmlParseContent(ctxt);
6664 if (!IS_CHAR(RAW)) {
6665 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6667 ctxt->sax->error(ctxt->userData,
6668 "Premature end of data in tag %.30s\n", openTag);
6669 ctxt->wellFormed = 0;
6670 ctxt->disableSAX = 1;
6671
6672 /*
6673 * end of parsing of this node.
6674 */
6675 nodePop(ctxt);
6676 oldname = namePop(ctxt);
6677 spacePop(ctxt);
6678 if (oldname != NULL) {
6679#ifdef DEBUG_STACK
6680 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6681#endif
6682 xmlFree(oldname);
6683 }
6684 return;
6685 }
6686
6687 /*
6688 * parse the end of tag: '</' should be here.
6689 */
6690 xmlParseEndTag(ctxt);
6691
6692 /*
6693 * Capture end position and add node
6694 */
6695 if ( ret != NULL && ctxt->record_info ) {
6696 node_info.end_pos = ctxt->input->consumed +
6697 (CUR_PTR - ctxt->input->base);
6698 node_info.end_line = ctxt->input->line;
6699 node_info.node = ret;
6700 xmlParserAddNodeInfo(ctxt, &node_info);
6701 }
6702}
6703
6704/**
6705 * xmlParseVersionNum:
6706 * @ctxt: an XML parser context
6707 *
6708 * parse the XML version value.
6709 *
6710 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6711 *
6712 * Returns the string giving the XML version number, or NULL
6713 */
6714xmlChar *
6715xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6716 xmlChar *buf = NULL;
6717 int len = 0;
6718 int size = 10;
6719 xmlChar cur;
6720
6721 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6722 if (buf == NULL) {
6723 xmlGenericError(xmlGenericErrorContext,
6724 "malloc of %d byte failed\n", size);
6725 return(NULL);
6726 }
6727 cur = CUR;
6728 while (((cur >= 'a') && (cur <= 'z')) ||
6729 ((cur >= 'A') && (cur <= 'Z')) ||
6730 ((cur >= '0') && (cur <= '9')) ||
6731 (cur == '_') || (cur == '.') ||
6732 (cur == ':') || (cur == '-')) {
6733 if (len + 1 >= size) {
6734 size *= 2;
6735 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6736 if (buf == NULL) {
6737 xmlGenericError(xmlGenericErrorContext,
6738 "realloc of %d byte failed\n", size);
6739 return(NULL);
6740 }
6741 }
6742 buf[len++] = cur;
6743 NEXT;
6744 cur=CUR;
6745 }
6746 buf[len] = 0;
6747 return(buf);
6748}
6749
6750/**
6751 * xmlParseVersionInfo:
6752 * @ctxt: an XML parser context
6753 *
6754 * parse the XML version.
6755 *
6756 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6757 *
6758 * [25] Eq ::= S? '=' S?
6759 *
6760 * Returns the version string, e.g. "1.0"
6761 */
6762
6763xmlChar *
6764xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6765 xmlChar *version = NULL;
6766 const xmlChar *q;
6767
6768 if ((RAW == 'v') && (NXT(1) == 'e') &&
6769 (NXT(2) == 'r') && (NXT(3) == 's') &&
6770 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6771 (NXT(6) == 'n')) {
6772 SKIP(7);
6773 SKIP_BLANKS;
6774 if (RAW != '=') {
6775 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6777 ctxt->sax->error(ctxt->userData,
6778 "xmlParseVersionInfo : expected '='\n");
6779 ctxt->wellFormed = 0;
6780 ctxt->disableSAX = 1;
6781 return(NULL);
6782 }
6783 NEXT;
6784 SKIP_BLANKS;
6785 if (RAW == '"') {
6786 NEXT;
6787 q = CUR_PTR;
6788 version = xmlParseVersionNum(ctxt);
6789 if (RAW != '"') {
6790 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6792 ctxt->sax->error(ctxt->userData,
6793 "String not closed\n%.50s\n", q);
6794 ctxt->wellFormed = 0;
6795 ctxt->disableSAX = 1;
6796 } else
6797 NEXT;
6798 } else if (RAW == '\''){
6799 NEXT;
6800 q = CUR_PTR;
6801 version = xmlParseVersionNum(ctxt);
6802 if (RAW != '\'') {
6803 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6804 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6805 ctxt->sax->error(ctxt->userData,
6806 "String not closed\n%.50s\n", q);
6807 ctxt->wellFormed = 0;
6808 ctxt->disableSAX = 1;
6809 } else
6810 NEXT;
6811 } else {
6812 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6814 ctxt->sax->error(ctxt->userData,
6815 "xmlParseVersionInfo : expected ' or \"\n");
6816 ctxt->wellFormed = 0;
6817 ctxt->disableSAX = 1;
6818 }
6819 }
6820 return(version);
6821}
6822
6823/**
6824 * xmlParseEncName:
6825 * @ctxt: an XML parser context
6826 *
6827 * parse the XML encoding name
6828 *
6829 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6830 *
6831 * Returns the encoding name value or NULL
6832 */
6833xmlChar *
6834xmlParseEncName(xmlParserCtxtPtr ctxt) {
6835 xmlChar *buf = NULL;
6836 int len = 0;
6837 int size = 10;
6838 xmlChar cur;
6839
6840 cur = CUR;
6841 if (((cur >= 'a') && (cur <= 'z')) ||
6842 ((cur >= 'A') && (cur <= 'Z'))) {
6843 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6844 if (buf == NULL) {
6845 xmlGenericError(xmlGenericErrorContext,
6846 "malloc of %d byte failed\n", size);
6847 return(NULL);
6848 }
6849
6850 buf[len++] = cur;
6851 NEXT;
6852 cur = CUR;
6853 while (((cur >= 'a') && (cur <= 'z')) ||
6854 ((cur >= 'A') && (cur <= 'Z')) ||
6855 ((cur >= '0') && (cur <= '9')) ||
6856 (cur == '.') || (cur == '_') ||
6857 (cur == '-')) {
6858 if (len + 1 >= size) {
6859 size *= 2;
6860 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6861 if (buf == NULL) {
6862 xmlGenericError(xmlGenericErrorContext,
6863 "realloc of %d byte failed\n", size);
6864 return(NULL);
6865 }
6866 }
6867 buf[len++] = cur;
6868 NEXT;
6869 cur = CUR;
6870 if (cur == 0) {
6871 SHRINK;
6872 GROW;
6873 cur = CUR;
6874 }
6875 }
6876 buf[len] = 0;
6877 } else {
6878 ctxt->errNo = XML_ERR_ENCODING_NAME;
6879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6880 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6881 ctxt->wellFormed = 0;
6882 ctxt->disableSAX = 1;
6883 }
6884 return(buf);
6885}
6886
6887/**
6888 * xmlParseEncodingDecl:
6889 * @ctxt: an XML parser context
6890 *
6891 * parse the XML encoding declaration
6892 *
6893 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6894 *
6895 * this setups the conversion filters.
6896 *
6897 * Returns the encoding value or NULL
6898 */
6899
6900xmlChar *
6901xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6902 xmlChar *encoding = NULL;
6903 const xmlChar *q;
6904
6905 SKIP_BLANKS;
6906 if ((RAW == 'e') && (NXT(1) == 'n') &&
6907 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6908 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6909 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6910 SKIP(8);
6911 SKIP_BLANKS;
6912 if (RAW != '=') {
6913 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6914 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6915 ctxt->sax->error(ctxt->userData,
6916 "xmlParseEncodingDecl : expected '='\n");
6917 ctxt->wellFormed = 0;
6918 ctxt->disableSAX = 1;
6919 return(NULL);
6920 }
6921 NEXT;
6922 SKIP_BLANKS;
6923 if (RAW == '"') {
6924 NEXT;
6925 q = CUR_PTR;
6926 encoding = xmlParseEncName(ctxt);
6927 if (RAW != '"') {
6928 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6930 ctxt->sax->error(ctxt->userData,
6931 "String not closed\n%.50s\n", q);
6932 ctxt->wellFormed = 0;
6933 ctxt->disableSAX = 1;
6934 } else
6935 NEXT;
6936 } else if (RAW == '\''){
6937 NEXT;
6938 q = CUR_PTR;
6939 encoding = xmlParseEncName(ctxt);
6940 if (RAW != '\'') {
6941 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6943 ctxt->sax->error(ctxt->userData,
6944 "String not closed\n%.50s\n", q);
6945 ctxt->wellFormed = 0;
6946 ctxt->disableSAX = 1;
6947 } else
6948 NEXT;
6949 } else if (RAW == '"'){
6950 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6952 ctxt->sax->error(ctxt->userData,
6953 "xmlParseEncodingDecl : expected ' or \"\n");
6954 ctxt->wellFormed = 0;
6955 ctxt->disableSAX = 1;
6956 }
6957 if (encoding != NULL) {
6958 xmlCharEncoding enc;
6959 xmlCharEncodingHandlerPtr handler;
6960
6961 if (ctxt->input->encoding != NULL)
6962 xmlFree((xmlChar *) ctxt->input->encoding);
6963 ctxt->input->encoding = encoding;
6964
6965 enc = xmlParseCharEncoding((const char *) encoding);
6966 /*
6967 * registered set of known encodings
6968 */
6969 if (enc != XML_CHAR_ENCODING_ERROR) {
6970 xmlSwitchEncoding(ctxt, enc);
6971 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6972 xmlFree(encoding);
6973 return(NULL);
6974 }
6975 } else {
6976 /*
6977 * fallback for unknown encodings
6978 */
6979 handler = xmlFindCharEncodingHandler((const char *) encoding);
6980 if (handler != NULL) {
6981 xmlSwitchToEncoding(ctxt, handler);
6982 } else {
6983 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6985 ctxt->sax->error(ctxt->userData,
6986 "Unsupported encoding %s\n", encoding);
6987 return(NULL);
6988 }
6989 }
6990 }
6991 }
6992 return(encoding);
6993}
6994
6995/**
6996 * xmlParseSDDecl:
6997 * @ctxt: an XML parser context
6998 *
6999 * parse the XML standalone declaration
7000 *
7001 * [32] SDDecl ::= S 'standalone' Eq
7002 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7003 *
7004 * [ VC: Standalone Document Declaration ]
7005 * TODO The standalone document declaration must have the value "no"
7006 * if any external markup declarations contain declarations of:
7007 * - attributes with default values, if elements to which these
7008 * attributes apply appear in the document without specifications
7009 * of values for these attributes, or
7010 * - entities (other than amp, lt, gt, apos, quot), if references
7011 * to those entities appear in the document, or
7012 * - attributes with values subject to normalization, where the
7013 * attribute appears in the document with a value which will change
7014 * as a result of normalization, or
7015 * - element types with element content, if white space occurs directly
7016 * within any instance of those types.
7017 *
7018 * Returns 1 if standalone, 0 otherwise
7019 */
7020
7021int
7022xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7023 int standalone = -1;
7024
7025 SKIP_BLANKS;
7026 if ((RAW == 's') && (NXT(1) == 't') &&
7027 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7028 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7029 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7030 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7031 SKIP(10);
7032 SKIP_BLANKS;
7033 if (RAW != '=') {
7034 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7036 ctxt->sax->error(ctxt->userData,
7037 "XML standalone declaration : expected '='\n");
7038 ctxt->wellFormed = 0;
7039 ctxt->disableSAX = 1;
7040 return(standalone);
7041 }
7042 NEXT;
7043 SKIP_BLANKS;
7044 if (RAW == '\''){
7045 NEXT;
7046 if ((RAW == 'n') && (NXT(1) == 'o')) {
7047 standalone = 0;
7048 SKIP(2);
7049 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7050 (NXT(2) == 's')) {
7051 standalone = 1;
7052 SKIP(3);
7053 } else {
7054 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7055 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7056 ctxt->sax->error(ctxt->userData,
7057 "standalone accepts only 'yes' or 'no'\n");
7058 ctxt->wellFormed = 0;
7059 ctxt->disableSAX = 1;
7060 }
7061 if (RAW != '\'') {
7062 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7063 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7064 ctxt->sax->error(ctxt->userData, "String not closed\n");
7065 ctxt->wellFormed = 0;
7066 ctxt->disableSAX = 1;
7067 } else
7068 NEXT;
7069 } else if (RAW == '"'){
7070 NEXT;
7071 if ((RAW == 'n') && (NXT(1) == 'o')) {
7072 standalone = 0;
7073 SKIP(2);
7074 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7075 (NXT(2) == 's')) {
7076 standalone = 1;
7077 SKIP(3);
7078 } else {
7079 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7081 ctxt->sax->error(ctxt->userData,
7082 "standalone accepts only 'yes' or 'no'\n");
7083 ctxt->wellFormed = 0;
7084 ctxt->disableSAX = 1;
7085 }
7086 if (RAW != '"') {
7087 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7088 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7089 ctxt->sax->error(ctxt->userData, "String not closed\n");
7090 ctxt->wellFormed = 0;
7091 ctxt->disableSAX = 1;
7092 } else
7093 NEXT;
7094 } else {
7095 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7097 ctxt->sax->error(ctxt->userData,
7098 "Standalone value not found\n");
7099 ctxt->wellFormed = 0;
7100 ctxt->disableSAX = 1;
7101 }
7102 }
7103 return(standalone);
7104}
7105
7106/**
7107 * xmlParseXMLDecl:
7108 * @ctxt: an XML parser context
7109 *
7110 * parse an XML declaration header
7111 *
7112 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7113 */
7114
7115void
7116xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7117 xmlChar *version;
7118
7119 /*
7120 * We know that '<?xml' is here.
7121 */
7122 SKIP(5);
7123
7124 if (!IS_BLANK(RAW)) {
7125 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7127 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7128 ctxt->wellFormed = 0;
7129 ctxt->disableSAX = 1;
7130 }
7131 SKIP_BLANKS;
7132
7133 /*
7134 * We should have the VersionInfo here.
7135 */
7136 version = xmlParseVersionInfo(ctxt);
7137 if (version == NULL)
7138 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7139 ctxt->version = xmlStrdup(version);
7140 xmlFree(version);
7141
7142 /*
7143 * We may have the encoding declaration
7144 */
7145 if (!IS_BLANK(RAW)) {
7146 if ((RAW == '?') && (NXT(1) == '>')) {
7147 SKIP(2);
7148 return;
7149 }
7150 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7152 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7153 ctxt->wellFormed = 0;
7154 ctxt->disableSAX = 1;
7155 }
7156 xmlParseEncodingDecl(ctxt);
7157 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7158 /*
7159 * The XML REC instructs us to stop parsing right here
7160 */
7161 return;
7162 }
7163
7164 /*
7165 * We may have the standalone status.
7166 */
7167 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7168 if ((RAW == '?') && (NXT(1) == '>')) {
7169 SKIP(2);
7170 return;
7171 }
7172 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7173 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7174 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7175 ctxt->wellFormed = 0;
7176 ctxt->disableSAX = 1;
7177 }
7178 SKIP_BLANKS;
7179 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7180
7181 SKIP_BLANKS;
7182 if ((RAW == '?') && (NXT(1) == '>')) {
7183 SKIP(2);
7184 } else if (RAW == '>') {
7185 /* Deprecated old WD ... */
7186 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7188 ctxt->sax->error(ctxt->userData,
7189 "XML declaration must end-up with '?>'\n");
7190 ctxt->wellFormed = 0;
7191 ctxt->disableSAX = 1;
7192 NEXT;
7193 } else {
7194 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7196 ctxt->sax->error(ctxt->userData,
7197 "parsing XML declaration: '?>' expected\n");
7198 ctxt->wellFormed = 0;
7199 ctxt->disableSAX = 1;
7200 MOVETO_ENDTAG(CUR_PTR);
7201 NEXT;
7202 }
7203}
7204
7205/**
7206 * xmlParseMisc:
7207 * @ctxt: an XML parser context
7208 *
7209 * parse an XML Misc* optionnal field.
7210 *
7211 * [27] Misc ::= Comment | PI | S
7212 */
7213
7214void
7215xmlParseMisc(xmlParserCtxtPtr ctxt) {
7216 while (((RAW == '<') && (NXT(1) == '?')) ||
7217 ((RAW == '<') && (NXT(1) == '!') &&
7218 (NXT(2) == '-') && (NXT(3) == '-')) ||
7219 IS_BLANK(CUR)) {
7220 if ((RAW == '<') && (NXT(1) == '?')) {
7221 xmlParsePI(ctxt);
7222 } else if (IS_BLANK(CUR)) {
7223 NEXT;
7224 } else
7225 xmlParseComment(ctxt);
7226 }
7227}
7228
7229/**
7230 * xmlParseDocument:
7231 * @ctxt: an XML parser context
7232 *
7233 * parse an XML document (and build a tree if using the standard SAX
7234 * interface).
7235 *
7236 * [1] document ::= prolog element Misc*
7237 *
7238 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7239 *
7240 * Returns 0, -1 in case of error. the parser context is augmented
7241 * as a result of the parsing.
7242 */
7243
7244int
7245xmlParseDocument(xmlParserCtxtPtr ctxt) {
7246 xmlChar start[4];
7247 xmlCharEncoding enc;
7248
7249 xmlInitParser();
7250
7251 GROW;
7252
7253 /*
7254 * SAX: beginning of the document processing.
7255 */
7256 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7257 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7258
7259 /*
7260 * Get the 4 first bytes and decode the charset
7261 * if enc != XML_CHAR_ENCODING_NONE
7262 * plug some encoding conversion routines.
7263 */
7264 start[0] = RAW;
7265 start[1] = NXT(1);
7266 start[2] = NXT(2);
7267 start[3] = NXT(3);
7268 enc = xmlDetectCharEncoding(start, 4);
7269 if (enc != XML_CHAR_ENCODING_NONE) {
7270 xmlSwitchEncoding(ctxt, enc);
7271 }
7272
7273
7274 if (CUR == 0) {
7275 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7277 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7278 ctxt->wellFormed = 0;
7279 ctxt->disableSAX = 1;
7280 }
7281
7282 /*
7283 * Check for the XMLDecl in the Prolog.
7284 */
7285 GROW;
7286 if ((RAW == '<') && (NXT(1) == '?') &&
7287 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7288 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7289
7290 /*
7291 * Note that we will switch encoding on the fly.
7292 */
7293 xmlParseXMLDecl(ctxt);
7294 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7295 /*
7296 * The XML REC instructs us to stop parsing right here
7297 */
7298 return(-1);
7299 }
7300 ctxt->standalone = ctxt->input->standalone;
7301 SKIP_BLANKS;
7302 } else {
7303 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7304 }
7305 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7306 ctxt->sax->startDocument(ctxt->userData);
7307
7308 /*
7309 * The Misc part of the Prolog
7310 */
7311 GROW;
7312 xmlParseMisc(ctxt);
7313
7314 /*
7315 * Then possibly doc type declaration(s) and more Misc
7316 * (doctypedecl Misc*)?
7317 */
7318 GROW;
7319 if ((RAW == '<') && (NXT(1) == '!') &&
7320 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7321 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7322 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7323 (NXT(8) == 'E')) {
7324
7325 ctxt->inSubset = 1;
7326 xmlParseDocTypeDecl(ctxt);
7327 if (RAW == '[') {
7328 ctxt->instate = XML_PARSER_DTD;
7329 xmlParseInternalSubset(ctxt);
7330 }
7331
7332 /*
7333 * Create and update the external subset.
7334 */
7335 ctxt->inSubset = 2;
7336 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7337 (!ctxt->disableSAX))
7338 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7339 ctxt->extSubSystem, ctxt->extSubURI);
7340 ctxt->inSubset = 0;
7341
7342
7343 ctxt->instate = XML_PARSER_PROLOG;
7344 xmlParseMisc(ctxt);
7345 }
7346
7347 /*
7348 * Time to start parsing the tree itself
7349 */
7350 GROW;
7351 if (RAW != '<') {
7352 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7353 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7354 ctxt->sax->error(ctxt->userData,
7355 "Start tag expected, '<' not found\n");
7356 ctxt->wellFormed = 0;
7357 ctxt->disableSAX = 1;
7358 ctxt->instate = XML_PARSER_EOF;
7359 } else {
7360 ctxt->instate = XML_PARSER_CONTENT;
7361 xmlParseElement(ctxt);
7362 ctxt->instate = XML_PARSER_EPILOG;
7363
7364
7365 /*
7366 * The Misc part at the end
7367 */
7368 xmlParseMisc(ctxt);
7369
7370 if (RAW != 0) {
7371 ctxt->errNo = XML_ERR_DOCUMENT_END;
7372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7373 ctxt->sax->error(ctxt->userData,
7374 "Extra content at the end of the document\n");
7375 ctxt->wellFormed = 0;
7376 ctxt->disableSAX = 1;
7377 }
7378 ctxt->instate = XML_PARSER_EOF;
7379 }
7380
7381 /*
7382 * SAX: end of the document processing.
7383 */
7384 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7385 (!ctxt->disableSAX))
7386 ctxt->sax->endDocument(ctxt->userData);
7387
7388 if (! ctxt->wellFormed) return(-1);
7389 return(0);
7390}
7391
7392/**
7393 * xmlParseExtParsedEnt:
7394 * @ctxt: an XML parser context
7395 *
7396 * parse a genreral parsed entity
7397 * An external general parsed entity is well-formed if it matches the
7398 * production labeled extParsedEnt.
7399 *
7400 * [78] extParsedEnt ::= TextDecl? content
7401 *
7402 * Returns 0, -1 in case of error. the parser context is augmented
7403 * as a result of the parsing.
7404 */
7405
7406int
7407xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7408 xmlChar start[4];
7409 xmlCharEncoding enc;
7410
7411 xmlDefaultSAXHandlerInit();
7412
7413 GROW;
7414
7415 /*
7416 * SAX: beginning of the document processing.
7417 */
7418 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7419 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7420
7421 /*
7422 * Get the 4 first bytes and decode the charset
7423 * if enc != XML_CHAR_ENCODING_NONE
7424 * plug some encoding conversion routines.
7425 */
7426 start[0] = RAW;
7427 start[1] = NXT(1);
7428 start[2] = NXT(2);
7429 start[3] = NXT(3);
7430 enc = xmlDetectCharEncoding(start, 4);
7431 if (enc != XML_CHAR_ENCODING_NONE) {
7432 xmlSwitchEncoding(ctxt, enc);
7433 }
7434
7435
7436 if (CUR == 0) {
7437 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7438 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7439 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7440 ctxt->wellFormed = 0;
7441 ctxt->disableSAX = 1;
7442 }
7443
7444 /*
7445 * Check for the XMLDecl in the Prolog.
7446 */
7447 GROW;
7448 if ((RAW == '<') && (NXT(1) == '?') &&
7449 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7450 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7451
7452 /*
7453 * Note that we will switch encoding on the fly.
7454 */
7455 xmlParseXMLDecl(ctxt);
7456 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7457 /*
7458 * The XML REC instructs us to stop parsing right here
7459 */
7460 return(-1);
7461 }
7462 SKIP_BLANKS;
7463 } else {
7464 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7465 }
7466 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7467 ctxt->sax->startDocument(ctxt->userData);
7468
7469 /*
7470 * Doing validity checking on chunk doesn't make sense
7471 */
7472 ctxt->instate = XML_PARSER_CONTENT;
7473 ctxt->validate = 0;
7474 ctxt->loadsubset = 0;
7475 ctxt->depth = 0;
7476
7477 xmlParseContent(ctxt);
7478
7479 if ((RAW == '<') && (NXT(1) == '/')) {
7480 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7481 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7482 ctxt->sax->error(ctxt->userData,
7483 "chunk is not well balanced\n");
7484 ctxt->wellFormed = 0;
7485 ctxt->disableSAX = 1;
7486 } else if (RAW != 0) {
7487 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7488 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7489 ctxt->sax->error(ctxt->userData,
7490 "extra content at the end of well balanced chunk\n");
7491 ctxt->wellFormed = 0;
7492 ctxt->disableSAX = 1;
7493 }
7494
7495 /*
7496 * SAX: end of the document processing.
7497 */
7498 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7499 (!ctxt->disableSAX))
7500 ctxt->sax->endDocument(ctxt->userData);
7501
7502 if (! ctxt->wellFormed) return(-1);
7503 return(0);
7504}
7505
7506/************************************************************************
7507 * *
7508 * Progressive parsing interfaces *
7509 * *
7510 ************************************************************************/
7511
7512/**
7513 * xmlParseLookupSequence:
7514 * @ctxt: an XML parser context
7515 * @first: the first char to lookup
7516 * @next: the next char to lookup or zero
7517 * @third: the next char to lookup or zero
7518 *
7519 * Try to find if a sequence (first, next, third) or just (first next) or
7520 * (first) is available in the input stream.
7521 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7522 * to avoid rescanning sequences of bytes, it DOES change the state of the
7523 * parser, do not use liberally.
7524 *
7525 * Returns the index to the current parsing point if the full sequence
7526 * is available, -1 otherwise.
7527 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007528static int
Owen Taylor3473f882001-02-23 17:55:21 +00007529xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7530 xmlChar next, xmlChar third) {
7531 int base, len;
7532 xmlParserInputPtr in;
7533 const xmlChar *buf;
7534
7535 in = ctxt->input;
7536 if (in == NULL) return(-1);
7537 base = in->cur - in->base;
7538 if (base < 0) return(-1);
7539 if (ctxt->checkIndex > base)
7540 base = ctxt->checkIndex;
7541 if (in->buf == NULL) {
7542 buf = in->base;
7543 len = in->length;
7544 } else {
7545 buf = in->buf->buffer->content;
7546 len = in->buf->buffer->use;
7547 }
7548 /* take into account the sequence length */
7549 if (third) len -= 2;
7550 else if (next) len --;
7551 for (;base < len;base++) {
7552 if (buf[base] == first) {
7553 if (third != 0) {
7554 if ((buf[base + 1] != next) ||
7555 (buf[base + 2] != third)) continue;
7556 } else if (next != 0) {
7557 if (buf[base + 1] != next) continue;
7558 }
7559 ctxt->checkIndex = 0;
7560#ifdef DEBUG_PUSH
7561 if (next == 0)
7562 xmlGenericError(xmlGenericErrorContext,
7563 "PP: lookup '%c' found at %d\n",
7564 first, base);
7565 else if (third == 0)
7566 xmlGenericError(xmlGenericErrorContext,
7567 "PP: lookup '%c%c' found at %d\n",
7568 first, next, base);
7569 else
7570 xmlGenericError(xmlGenericErrorContext,
7571 "PP: lookup '%c%c%c' found at %d\n",
7572 first, next, third, base);
7573#endif
7574 return(base - (in->cur - in->base));
7575 }
7576 }
7577 ctxt->checkIndex = base;
7578#ifdef DEBUG_PUSH
7579 if (next == 0)
7580 xmlGenericError(xmlGenericErrorContext,
7581 "PP: lookup '%c' failed\n", first);
7582 else if (third == 0)
7583 xmlGenericError(xmlGenericErrorContext,
7584 "PP: lookup '%c%c' failed\n", first, next);
7585 else
7586 xmlGenericError(xmlGenericErrorContext,
7587 "PP: lookup '%c%c%c' failed\n", first, next, third);
7588#endif
7589 return(-1);
7590}
7591
7592/**
7593 * xmlParseTryOrFinish:
7594 * @ctxt: an XML parser context
7595 * @terminate: last chunk indicator
7596 *
7597 * Try to progress on parsing
7598 *
7599 * Returns zero if no parsing was possible
7600 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007601static int
Owen Taylor3473f882001-02-23 17:55:21 +00007602xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7603 int ret = 0;
7604 int avail;
7605 xmlChar cur, next;
7606
7607#ifdef DEBUG_PUSH
7608 switch (ctxt->instate) {
7609 case XML_PARSER_EOF:
7610 xmlGenericError(xmlGenericErrorContext,
7611 "PP: try EOF\n"); break;
7612 case XML_PARSER_START:
7613 xmlGenericError(xmlGenericErrorContext,
7614 "PP: try START\n"); break;
7615 case XML_PARSER_MISC:
7616 xmlGenericError(xmlGenericErrorContext,
7617 "PP: try MISC\n");break;
7618 case XML_PARSER_COMMENT:
7619 xmlGenericError(xmlGenericErrorContext,
7620 "PP: try COMMENT\n");break;
7621 case XML_PARSER_PROLOG:
7622 xmlGenericError(xmlGenericErrorContext,
7623 "PP: try PROLOG\n");break;
7624 case XML_PARSER_START_TAG:
7625 xmlGenericError(xmlGenericErrorContext,
7626 "PP: try START_TAG\n");break;
7627 case XML_PARSER_CONTENT:
7628 xmlGenericError(xmlGenericErrorContext,
7629 "PP: try CONTENT\n");break;
7630 case XML_PARSER_CDATA_SECTION:
7631 xmlGenericError(xmlGenericErrorContext,
7632 "PP: try CDATA_SECTION\n");break;
7633 case XML_PARSER_END_TAG:
7634 xmlGenericError(xmlGenericErrorContext,
7635 "PP: try END_TAG\n");break;
7636 case XML_PARSER_ENTITY_DECL:
7637 xmlGenericError(xmlGenericErrorContext,
7638 "PP: try ENTITY_DECL\n");break;
7639 case XML_PARSER_ENTITY_VALUE:
7640 xmlGenericError(xmlGenericErrorContext,
7641 "PP: try ENTITY_VALUE\n");break;
7642 case XML_PARSER_ATTRIBUTE_VALUE:
7643 xmlGenericError(xmlGenericErrorContext,
7644 "PP: try ATTRIBUTE_VALUE\n");break;
7645 case XML_PARSER_DTD:
7646 xmlGenericError(xmlGenericErrorContext,
7647 "PP: try DTD\n");break;
7648 case XML_PARSER_EPILOG:
7649 xmlGenericError(xmlGenericErrorContext,
7650 "PP: try EPILOG\n");break;
7651 case XML_PARSER_PI:
7652 xmlGenericError(xmlGenericErrorContext,
7653 "PP: try PI\n");break;
7654 case XML_PARSER_IGNORE:
7655 xmlGenericError(xmlGenericErrorContext,
7656 "PP: try IGNORE\n");break;
7657 }
7658#endif
7659
7660 while (1) {
7661 /*
7662 * Pop-up of finished entities.
7663 */
7664 while ((RAW == 0) && (ctxt->inputNr > 1))
7665 xmlPopInput(ctxt);
7666
7667 if (ctxt->input ==NULL) break;
7668 if (ctxt->input->buf == NULL)
7669 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7670 else
7671 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7672 if (avail < 1)
7673 goto done;
7674 switch (ctxt->instate) {
7675 case XML_PARSER_EOF:
7676 /*
7677 * Document parsing is done !
7678 */
7679 goto done;
7680 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007681 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7682 xmlChar start[4];
7683 xmlCharEncoding enc;
7684
7685 /*
7686 * Very first chars read from the document flow.
7687 */
7688 if (avail < 4)
7689 goto done;
7690
7691 /*
7692 * Get the 4 first bytes and decode the charset
7693 * if enc != XML_CHAR_ENCODING_NONE
7694 * plug some encoding conversion routines.
7695 */
7696 start[0] = RAW;
7697 start[1] = NXT(1);
7698 start[2] = NXT(2);
7699 start[3] = NXT(3);
7700 enc = xmlDetectCharEncoding(start, 4);
7701 if (enc != XML_CHAR_ENCODING_NONE) {
7702 xmlSwitchEncoding(ctxt, enc);
7703 }
7704 break;
7705 }
Owen Taylor3473f882001-02-23 17:55:21 +00007706
7707 cur = ctxt->input->cur[0];
7708 next = ctxt->input->cur[1];
7709 if (cur == 0) {
7710 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7711 ctxt->sax->setDocumentLocator(ctxt->userData,
7712 &xmlDefaultSAXLocator);
7713 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7715 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7716 ctxt->wellFormed = 0;
7717 ctxt->disableSAX = 1;
7718 ctxt->instate = XML_PARSER_EOF;
7719#ifdef DEBUG_PUSH
7720 xmlGenericError(xmlGenericErrorContext,
7721 "PP: entering EOF\n");
7722#endif
7723 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7724 ctxt->sax->endDocument(ctxt->userData);
7725 goto done;
7726 }
7727 if ((cur == '<') && (next == '?')) {
7728 /* PI or XML decl */
7729 if (avail < 5) return(ret);
7730 if ((!terminate) &&
7731 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7732 return(ret);
7733 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7734 ctxt->sax->setDocumentLocator(ctxt->userData,
7735 &xmlDefaultSAXLocator);
7736 if ((ctxt->input->cur[2] == 'x') &&
7737 (ctxt->input->cur[3] == 'm') &&
7738 (ctxt->input->cur[4] == 'l') &&
7739 (IS_BLANK(ctxt->input->cur[5]))) {
7740 ret += 5;
7741#ifdef DEBUG_PUSH
7742 xmlGenericError(xmlGenericErrorContext,
7743 "PP: Parsing XML Decl\n");
7744#endif
7745 xmlParseXMLDecl(ctxt);
7746 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7747 /*
7748 * The XML REC instructs us to stop parsing right
7749 * here
7750 */
7751 ctxt->instate = XML_PARSER_EOF;
7752 return(0);
7753 }
7754 ctxt->standalone = ctxt->input->standalone;
7755 if ((ctxt->encoding == NULL) &&
7756 (ctxt->input->encoding != NULL))
7757 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7758 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7759 (!ctxt->disableSAX))
7760 ctxt->sax->startDocument(ctxt->userData);
7761 ctxt->instate = XML_PARSER_MISC;
7762#ifdef DEBUG_PUSH
7763 xmlGenericError(xmlGenericErrorContext,
7764 "PP: entering MISC\n");
7765#endif
7766 } else {
7767 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7768 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7769 (!ctxt->disableSAX))
7770 ctxt->sax->startDocument(ctxt->userData);
7771 ctxt->instate = XML_PARSER_MISC;
7772#ifdef DEBUG_PUSH
7773 xmlGenericError(xmlGenericErrorContext,
7774 "PP: entering MISC\n");
7775#endif
7776 }
7777 } else {
7778 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7779 ctxt->sax->setDocumentLocator(ctxt->userData,
7780 &xmlDefaultSAXLocator);
7781 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7782 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7783 (!ctxt->disableSAX))
7784 ctxt->sax->startDocument(ctxt->userData);
7785 ctxt->instate = XML_PARSER_MISC;
7786#ifdef DEBUG_PUSH
7787 xmlGenericError(xmlGenericErrorContext,
7788 "PP: entering MISC\n");
7789#endif
7790 }
7791 break;
7792 case XML_PARSER_MISC:
7793 SKIP_BLANKS;
7794 if (ctxt->input->buf == NULL)
7795 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7796 else
7797 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7798 if (avail < 2)
7799 goto done;
7800 cur = ctxt->input->cur[0];
7801 next = ctxt->input->cur[1];
7802 if ((cur == '<') && (next == '?')) {
7803 if ((!terminate) &&
7804 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7805 goto done;
7806#ifdef DEBUG_PUSH
7807 xmlGenericError(xmlGenericErrorContext,
7808 "PP: Parsing PI\n");
7809#endif
7810 xmlParsePI(ctxt);
7811 } else if ((cur == '<') && (next == '!') &&
7812 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7813 if ((!terminate) &&
7814 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7815 goto done;
7816#ifdef DEBUG_PUSH
7817 xmlGenericError(xmlGenericErrorContext,
7818 "PP: Parsing Comment\n");
7819#endif
7820 xmlParseComment(ctxt);
7821 ctxt->instate = XML_PARSER_MISC;
7822 } else if ((cur == '<') && (next == '!') &&
7823 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7824 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7825 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7826 (ctxt->input->cur[8] == 'E')) {
7827 if ((!terminate) &&
7828 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7829 goto done;
7830#ifdef DEBUG_PUSH
7831 xmlGenericError(xmlGenericErrorContext,
7832 "PP: Parsing internal subset\n");
7833#endif
7834 ctxt->inSubset = 1;
7835 xmlParseDocTypeDecl(ctxt);
7836 if (RAW == '[') {
7837 ctxt->instate = XML_PARSER_DTD;
7838#ifdef DEBUG_PUSH
7839 xmlGenericError(xmlGenericErrorContext,
7840 "PP: entering DTD\n");
7841#endif
7842 } else {
7843 /*
7844 * Create and update the external subset.
7845 */
7846 ctxt->inSubset = 2;
7847 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7848 (ctxt->sax->externalSubset != NULL))
7849 ctxt->sax->externalSubset(ctxt->userData,
7850 ctxt->intSubName, ctxt->extSubSystem,
7851 ctxt->extSubURI);
7852 ctxt->inSubset = 0;
7853 ctxt->instate = XML_PARSER_PROLOG;
7854#ifdef DEBUG_PUSH
7855 xmlGenericError(xmlGenericErrorContext,
7856 "PP: entering PROLOG\n");
7857#endif
7858 }
7859 } else if ((cur == '<') && (next == '!') &&
7860 (avail < 9)) {
7861 goto done;
7862 } else {
7863 ctxt->instate = XML_PARSER_START_TAG;
7864#ifdef DEBUG_PUSH
7865 xmlGenericError(xmlGenericErrorContext,
7866 "PP: entering START_TAG\n");
7867#endif
7868 }
7869 break;
7870 case XML_PARSER_IGNORE:
7871 xmlGenericError(xmlGenericErrorContext,
7872 "PP: internal error, state == IGNORE");
7873 ctxt->instate = XML_PARSER_DTD;
7874#ifdef DEBUG_PUSH
7875 xmlGenericError(xmlGenericErrorContext,
7876 "PP: entering DTD\n");
7877#endif
7878 break;
7879 case XML_PARSER_PROLOG:
7880 SKIP_BLANKS;
7881 if (ctxt->input->buf == NULL)
7882 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7883 else
7884 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7885 if (avail < 2)
7886 goto done;
7887 cur = ctxt->input->cur[0];
7888 next = ctxt->input->cur[1];
7889 if ((cur == '<') && (next == '?')) {
7890 if ((!terminate) &&
7891 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7892 goto done;
7893#ifdef DEBUG_PUSH
7894 xmlGenericError(xmlGenericErrorContext,
7895 "PP: Parsing PI\n");
7896#endif
7897 xmlParsePI(ctxt);
7898 } else if ((cur == '<') && (next == '!') &&
7899 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7900 if ((!terminate) &&
7901 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7902 goto done;
7903#ifdef DEBUG_PUSH
7904 xmlGenericError(xmlGenericErrorContext,
7905 "PP: Parsing Comment\n");
7906#endif
7907 xmlParseComment(ctxt);
7908 ctxt->instate = XML_PARSER_PROLOG;
7909 } else if ((cur == '<') && (next == '!') &&
7910 (avail < 4)) {
7911 goto done;
7912 } else {
7913 ctxt->instate = XML_PARSER_START_TAG;
7914#ifdef DEBUG_PUSH
7915 xmlGenericError(xmlGenericErrorContext,
7916 "PP: entering START_TAG\n");
7917#endif
7918 }
7919 break;
7920 case XML_PARSER_EPILOG:
7921 SKIP_BLANKS;
7922 if (ctxt->input->buf == NULL)
7923 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7924 else
7925 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7926 if (avail < 2)
7927 goto done;
7928 cur = ctxt->input->cur[0];
7929 next = ctxt->input->cur[1];
7930 if ((cur == '<') && (next == '?')) {
7931 if ((!terminate) &&
7932 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7933 goto done;
7934#ifdef DEBUG_PUSH
7935 xmlGenericError(xmlGenericErrorContext,
7936 "PP: Parsing PI\n");
7937#endif
7938 xmlParsePI(ctxt);
7939 ctxt->instate = XML_PARSER_EPILOG;
7940 } else if ((cur == '<') && (next == '!') &&
7941 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7942 if ((!terminate) &&
7943 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7944 goto done;
7945#ifdef DEBUG_PUSH
7946 xmlGenericError(xmlGenericErrorContext,
7947 "PP: Parsing Comment\n");
7948#endif
7949 xmlParseComment(ctxt);
7950 ctxt->instate = XML_PARSER_EPILOG;
7951 } else if ((cur == '<') && (next == '!') &&
7952 (avail < 4)) {
7953 goto done;
7954 } else {
7955 ctxt->errNo = XML_ERR_DOCUMENT_END;
7956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7957 ctxt->sax->error(ctxt->userData,
7958 "Extra content at the end of the document\n");
7959 ctxt->wellFormed = 0;
7960 ctxt->disableSAX = 1;
7961 ctxt->instate = XML_PARSER_EOF;
7962#ifdef DEBUG_PUSH
7963 xmlGenericError(xmlGenericErrorContext,
7964 "PP: entering EOF\n");
7965#endif
7966 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7967 (!ctxt->disableSAX))
7968 ctxt->sax->endDocument(ctxt->userData);
7969 goto done;
7970 }
7971 break;
7972 case XML_PARSER_START_TAG: {
7973 xmlChar *name, *oldname;
7974
7975 if ((avail < 2) && (ctxt->inputNr == 1))
7976 goto done;
7977 cur = ctxt->input->cur[0];
7978 if (cur != '<') {
7979 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7981 ctxt->sax->error(ctxt->userData,
7982 "Start tag expect, '<' not found\n");
7983 ctxt->wellFormed = 0;
7984 ctxt->disableSAX = 1;
7985 ctxt->instate = XML_PARSER_EOF;
7986#ifdef DEBUG_PUSH
7987 xmlGenericError(xmlGenericErrorContext,
7988 "PP: entering EOF\n");
7989#endif
7990 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7991 (!ctxt->disableSAX))
7992 ctxt->sax->endDocument(ctxt->userData);
7993 goto done;
7994 }
7995 if ((!terminate) &&
7996 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7997 goto done;
7998 if (ctxt->spaceNr == 0)
7999 spacePush(ctxt, -1);
8000 else
8001 spacePush(ctxt, *ctxt->space);
8002 name = xmlParseStartTag(ctxt);
8003 if (name == NULL) {
8004 spacePop(ctxt);
8005 ctxt->instate = XML_PARSER_EOF;
8006#ifdef DEBUG_PUSH
8007 xmlGenericError(xmlGenericErrorContext,
8008 "PP: entering EOF\n");
8009#endif
8010 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8011 (!ctxt->disableSAX))
8012 ctxt->sax->endDocument(ctxt->userData);
8013 goto done;
8014 }
8015 namePush(ctxt, xmlStrdup(name));
8016
8017 /*
8018 * [ VC: Root Element Type ]
8019 * The Name in the document type declaration must match
8020 * the element type of the root element.
8021 */
8022 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8023 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8024 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8025
8026 /*
8027 * Check for an Empty Element.
8028 */
8029 if ((RAW == '/') && (NXT(1) == '>')) {
8030 SKIP(2);
8031 if ((ctxt->sax != NULL) &&
8032 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8033 ctxt->sax->endElement(ctxt->userData, name);
8034 xmlFree(name);
8035 oldname = namePop(ctxt);
8036 spacePop(ctxt);
8037 if (oldname != NULL) {
8038#ifdef DEBUG_STACK
8039 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8040#endif
8041 xmlFree(oldname);
8042 }
8043 if (ctxt->name == NULL) {
8044 ctxt->instate = XML_PARSER_EPILOG;
8045#ifdef DEBUG_PUSH
8046 xmlGenericError(xmlGenericErrorContext,
8047 "PP: entering EPILOG\n");
8048#endif
8049 } else {
8050 ctxt->instate = XML_PARSER_CONTENT;
8051#ifdef DEBUG_PUSH
8052 xmlGenericError(xmlGenericErrorContext,
8053 "PP: entering CONTENT\n");
8054#endif
8055 }
8056 break;
8057 }
8058 if (RAW == '>') {
8059 NEXT;
8060 } else {
8061 ctxt->errNo = XML_ERR_GT_REQUIRED;
8062 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8063 ctxt->sax->error(ctxt->userData,
8064 "Couldn't find end of Start Tag %s\n",
8065 name);
8066 ctxt->wellFormed = 0;
8067 ctxt->disableSAX = 1;
8068
8069 /*
8070 * end of parsing of this node.
8071 */
8072 nodePop(ctxt);
8073 oldname = namePop(ctxt);
8074 spacePop(ctxt);
8075 if (oldname != NULL) {
8076#ifdef DEBUG_STACK
8077 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8078#endif
8079 xmlFree(oldname);
8080 }
8081 }
8082 xmlFree(name);
8083 ctxt->instate = XML_PARSER_CONTENT;
8084#ifdef DEBUG_PUSH
8085 xmlGenericError(xmlGenericErrorContext,
8086 "PP: entering CONTENT\n");
8087#endif
8088 break;
8089 }
8090 case XML_PARSER_CONTENT: {
8091 const xmlChar *test;
8092 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008093 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008094
8095 /*
8096 * Handle preparsed entities and charRef
8097 */
8098 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008099 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008100
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008101 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008102 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8103 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008104 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008105 ctxt->token = 0;
8106 }
8107 if ((avail < 2) && (ctxt->inputNr == 1))
8108 goto done;
8109 cur = ctxt->input->cur[0];
8110 next = ctxt->input->cur[1];
8111
8112 test = CUR_PTR;
8113 cons = ctxt->input->consumed;
8114 tok = ctxt->token;
8115 if ((cur == '<') && (next == '?')) {
8116 if ((!terminate) &&
8117 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8118 goto done;
8119#ifdef DEBUG_PUSH
8120 xmlGenericError(xmlGenericErrorContext,
8121 "PP: Parsing PI\n");
8122#endif
8123 xmlParsePI(ctxt);
8124 } else if ((cur == '<') && (next == '!') &&
8125 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8126 if ((!terminate) &&
8127 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8128 goto done;
8129#ifdef DEBUG_PUSH
8130 xmlGenericError(xmlGenericErrorContext,
8131 "PP: Parsing Comment\n");
8132#endif
8133 xmlParseComment(ctxt);
8134 ctxt->instate = XML_PARSER_CONTENT;
8135 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8136 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8137 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8138 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8139 (ctxt->input->cur[8] == '[')) {
8140 SKIP(9);
8141 ctxt->instate = XML_PARSER_CDATA_SECTION;
8142#ifdef DEBUG_PUSH
8143 xmlGenericError(xmlGenericErrorContext,
8144 "PP: entering CDATA_SECTION\n");
8145#endif
8146 break;
8147 } else if ((cur == '<') && (next == '!') &&
8148 (avail < 9)) {
8149 goto done;
8150 } else if ((cur == '<') && (next == '/')) {
8151 ctxt->instate = XML_PARSER_END_TAG;
8152#ifdef DEBUG_PUSH
8153 xmlGenericError(xmlGenericErrorContext,
8154 "PP: entering END_TAG\n");
8155#endif
8156 break;
8157 } else if (cur == '<') {
8158 ctxt->instate = XML_PARSER_START_TAG;
8159#ifdef DEBUG_PUSH
8160 xmlGenericError(xmlGenericErrorContext,
8161 "PP: entering START_TAG\n");
8162#endif
8163 break;
8164 } else if (cur == '&') {
8165 if ((!terminate) &&
8166 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8167 goto done;
8168#ifdef DEBUG_PUSH
8169 xmlGenericError(xmlGenericErrorContext,
8170 "PP: Parsing Reference\n");
8171#endif
8172 xmlParseReference(ctxt);
8173 } else {
8174 /* TODO Avoid the extra copy, handle directly !!! */
8175 /*
8176 * Goal of the following test is:
8177 * - minimize calls to the SAX 'character' callback
8178 * when they are mergeable
8179 * - handle an problem for isBlank when we only parse
8180 * a sequence of blank chars and the next one is
8181 * not available to check against '<' presence.
8182 * - tries to homogenize the differences in SAX
8183 * callbacks beween the push and pull versions
8184 * of the parser.
8185 */
8186 if ((ctxt->inputNr == 1) &&
8187 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8188 if ((!terminate) &&
8189 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8190 goto done;
8191 }
8192 ctxt->checkIndex = 0;
8193#ifdef DEBUG_PUSH
8194 xmlGenericError(xmlGenericErrorContext,
8195 "PP: Parsing char data\n");
8196#endif
8197 xmlParseCharData(ctxt, 0);
8198 }
8199 /*
8200 * Pop-up of finished entities.
8201 */
8202 while ((RAW == 0) && (ctxt->inputNr > 1))
8203 xmlPopInput(ctxt);
8204 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8205 (tok == ctxt->token)) {
8206 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8208 ctxt->sax->error(ctxt->userData,
8209 "detected an error in element content\n");
8210 ctxt->wellFormed = 0;
8211 ctxt->disableSAX = 1;
8212 ctxt->instate = XML_PARSER_EOF;
8213 break;
8214 }
8215 break;
8216 }
8217 case XML_PARSER_CDATA_SECTION: {
8218 /*
8219 * The Push mode need to have the SAX callback for
8220 * cdataBlock merge back contiguous callbacks.
8221 */
8222 int base;
8223
8224 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8225 if (base < 0) {
8226 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8227 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8228 if (ctxt->sax->cdataBlock != NULL)
8229 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8230 XML_PARSER_BIG_BUFFER_SIZE);
8231 }
8232 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8233 ctxt->checkIndex = 0;
8234 }
8235 goto done;
8236 } else {
8237 if ((ctxt->sax != NULL) && (base > 0) &&
8238 (!ctxt->disableSAX)) {
8239 if (ctxt->sax->cdataBlock != NULL)
8240 ctxt->sax->cdataBlock(ctxt->userData,
8241 ctxt->input->cur, base);
8242 }
8243 SKIP(base + 3);
8244 ctxt->checkIndex = 0;
8245 ctxt->instate = XML_PARSER_CONTENT;
8246#ifdef DEBUG_PUSH
8247 xmlGenericError(xmlGenericErrorContext,
8248 "PP: entering CONTENT\n");
8249#endif
8250 }
8251 break;
8252 }
8253 case XML_PARSER_END_TAG:
8254 if (avail < 2)
8255 goto done;
8256 if ((!terminate) &&
8257 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8258 goto done;
8259 xmlParseEndTag(ctxt);
8260 if (ctxt->name == NULL) {
8261 ctxt->instate = XML_PARSER_EPILOG;
8262#ifdef DEBUG_PUSH
8263 xmlGenericError(xmlGenericErrorContext,
8264 "PP: entering EPILOG\n");
8265#endif
8266 } else {
8267 ctxt->instate = XML_PARSER_CONTENT;
8268#ifdef DEBUG_PUSH
8269 xmlGenericError(xmlGenericErrorContext,
8270 "PP: entering CONTENT\n");
8271#endif
8272 }
8273 break;
8274 case XML_PARSER_DTD: {
8275 /*
8276 * Sorry but progressive parsing of the internal subset
8277 * is not expected to be supported. We first check that
8278 * the full content of the internal subset is available and
8279 * the parsing is launched only at that point.
8280 * Internal subset ends up with "']' S? '>'" in an unescaped
8281 * section and not in a ']]>' sequence which are conditional
8282 * sections (whoever argued to keep that crap in XML deserve
8283 * a place in hell !).
8284 */
8285 int base, i;
8286 xmlChar *buf;
8287 xmlChar quote = 0;
8288
8289 base = ctxt->input->cur - ctxt->input->base;
8290 if (base < 0) return(0);
8291 if (ctxt->checkIndex > base)
8292 base = ctxt->checkIndex;
8293 buf = ctxt->input->buf->buffer->content;
8294 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8295 base++) {
8296 if (quote != 0) {
8297 if (buf[base] == quote)
8298 quote = 0;
8299 continue;
8300 }
8301 if (buf[base] == '"') {
8302 quote = '"';
8303 continue;
8304 }
8305 if (buf[base] == '\'') {
8306 quote = '\'';
8307 continue;
8308 }
8309 if (buf[base] == ']') {
8310 if ((unsigned int) base +1 >=
8311 ctxt->input->buf->buffer->use)
8312 break;
8313 if (buf[base + 1] == ']') {
8314 /* conditional crap, skip both ']' ! */
8315 base++;
8316 continue;
8317 }
8318 for (i = 0;
8319 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8320 i++) {
8321 if (buf[base + i] == '>')
8322 goto found_end_int_subset;
8323 }
8324 break;
8325 }
8326 }
8327 /*
8328 * We didn't found the end of the Internal subset
8329 */
8330 if (quote == 0)
8331 ctxt->checkIndex = base;
8332#ifdef DEBUG_PUSH
8333 if (next == 0)
8334 xmlGenericError(xmlGenericErrorContext,
8335 "PP: lookup of int subset end filed\n");
8336#endif
8337 goto done;
8338
8339found_end_int_subset:
8340 xmlParseInternalSubset(ctxt);
8341 ctxt->inSubset = 2;
8342 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8343 (ctxt->sax->externalSubset != NULL))
8344 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8345 ctxt->extSubSystem, ctxt->extSubURI);
8346 ctxt->inSubset = 0;
8347 ctxt->instate = XML_PARSER_PROLOG;
8348 ctxt->checkIndex = 0;
8349#ifdef DEBUG_PUSH
8350 xmlGenericError(xmlGenericErrorContext,
8351 "PP: entering PROLOG\n");
8352#endif
8353 break;
8354 }
8355 case XML_PARSER_COMMENT:
8356 xmlGenericError(xmlGenericErrorContext,
8357 "PP: internal error, state == COMMENT\n");
8358 ctxt->instate = XML_PARSER_CONTENT;
8359#ifdef DEBUG_PUSH
8360 xmlGenericError(xmlGenericErrorContext,
8361 "PP: entering CONTENT\n");
8362#endif
8363 break;
8364 case XML_PARSER_PI:
8365 xmlGenericError(xmlGenericErrorContext,
8366 "PP: internal error, state == PI\n");
8367 ctxt->instate = XML_PARSER_CONTENT;
8368#ifdef DEBUG_PUSH
8369 xmlGenericError(xmlGenericErrorContext,
8370 "PP: entering CONTENT\n");
8371#endif
8372 break;
8373 case XML_PARSER_ENTITY_DECL:
8374 xmlGenericError(xmlGenericErrorContext,
8375 "PP: internal error, state == ENTITY_DECL\n");
8376 ctxt->instate = XML_PARSER_DTD;
8377#ifdef DEBUG_PUSH
8378 xmlGenericError(xmlGenericErrorContext,
8379 "PP: entering DTD\n");
8380#endif
8381 break;
8382 case XML_PARSER_ENTITY_VALUE:
8383 xmlGenericError(xmlGenericErrorContext,
8384 "PP: internal error, state == ENTITY_VALUE\n");
8385 ctxt->instate = XML_PARSER_CONTENT;
8386#ifdef DEBUG_PUSH
8387 xmlGenericError(xmlGenericErrorContext,
8388 "PP: entering DTD\n");
8389#endif
8390 break;
8391 case XML_PARSER_ATTRIBUTE_VALUE:
8392 xmlGenericError(xmlGenericErrorContext,
8393 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8394 ctxt->instate = XML_PARSER_START_TAG;
8395#ifdef DEBUG_PUSH
8396 xmlGenericError(xmlGenericErrorContext,
8397 "PP: entering START_TAG\n");
8398#endif
8399 break;
8400 case XML_PARSER_SYSTEM_LITERAL:
8401 xmlGenericError(xmlGenericErrorContext,
8402 "PP: internal error, state == SYSTEM_LITERAL\n");
8403 ctxt->instate = XML_PARSER_START_TAG;
8404#ifdef DEBUG_PUSH
8405 xmlGenericError(xmlGenericErrorContext,
8406 "PP: entering START_TAG\n");
8407#endif
8408 break;
8409 }
8410 }
8411done:
8412#ifdef DEBUG_PUSH
8413 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8414#endif
8415 return(ret);
8416}
8417
8418/**
Owen Taylor3473f882001-02-23 17:55:21 +00008419 * xmlParseChunk:
8420 * @ctxt: an XML parser context
8421 * @chunk: an char array
8422 * @size: the size in byte of the chunk
8423 * @terminate: last chunk indicator
8424 *
8425 * Parse a Chunk of memory
8426 *
8427 * Returns zero if no error, the xmlParserErrors otherwise.
8428 */
8429int
8430xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8431 int terminate) {
8432 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8433 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8434 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8435 int cur = ctxt->input->cur - ctxt->input->base;
8436
8437 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8438 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8439 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008440 ctxt->input->end =
8441 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008442#ifdef DEBUG_PUSH
8443 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8444#endif
8445
8446 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8447 xmlParseTryOrFinish(ctxt, terminate);
8448 } else if (ctxt->instate != XML_PARSER_EOF) {
8449 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8450 xmlParserInputBufferPtr in = ctxt->input->buf;
8451 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8452 (in->raw != NULL)) {
8453 int nbchars;
8454
8455 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8456 if (nbchars < 0) {
8457 xmlGenericError(xmlGenericErrorContext,
8458 "xmlParseChunk: encoder error\n");
8459 return(XML_ERR_INVALID_ENCODING);
8460 }
8461 }
8462 }
8463 }
8464 xmlParseTryOrFinish(ctxt, terminate);
8465 if (terminate) {
8466 /*
8467 * Check for termination
8468 */
8469 if ((ctxt->instate != XML_PARSER_EOF) &&
8470 (ctxt->instate != XML_PARSER_EPILOG)) {
8471 ctxt->errNo = XML_ERR_DOCUMENT_END;
8472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8473 ctxt->sax->error(ctxt->userData,
8474 "Extra content at the end of the document\n");
8475 ctxt->wellFormed = 0;
8476 ctxt->disableSAX = 1;
8477 }
8478 if (ctxt->instate != XML_PARSER_EOF) {
8479 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8480 (!ctxt->disableSAX))
8481 ctxt->sax->endDocument(ctxt->userData);
8482 }
8483 ctxt->instate = XML_PARSER_EOF;
8484 }
8485 return((xmlParserErrors) ctxt->errNo);
8486}
8487
8488/************************************************************************
8489 * *
8490 * I/O front end functions to the parser *
8491 * *
8492 ************************************************************************/
8493
8494/**
8495 * xmlStopParser:
8496 * @ctxt: an XML parser context
8497 *
8498 * Blocks further parser processing
8499 */
8500void
8501xmlStopParser(xmlParserCtxtPtr ctxt) {
8502 ctxt->instate = XML_PARSER_EOF;
8503 if (ctxt->input != NULL)
8504 ctxt->input->cur = BAD_CAST"";
8505}
8506
8507/**
8508 * xmlCreatePushParserCtxt:
8509 * @sax: a SAX handler
8510 * @user_data: The user data returned on SAX callbacks
8511 * @chunk: a pointer to an array of chars
8512 * @size: number of chars in the array
8513 * @filename: an optional file name or URI
8514 *
8515 * Create a parser context for using the XML parser in push mode
8516 * To allow content encoding detection, @size should be >= 4
8517 * The value of @filename is used for fetching external entities
8518 * and error/warning reports.
8519 *
8520 * Returns the new parser context or NULL
8521 */
8522xmlParserCtxtPtr
8523xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8524 const char *chunk, int size, const char *filename) {
8525 xmlParserCtxtPtr ctxt;
8526 xmlParserInputPtr inputStream;
8527 xmlParserInputBufferPtr buf;
8528 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8529
8530 /*
8531 * plug some encoding conversion routines
8532 */
8533 if ((chunk != NULL) && (size >= 4))
8534 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8535
8536 buf = xmlAllocParserInputBuffer(enc);
8537 if (buf == NULL) return(NULL);
8538
8539 ctxt = xmlNewParserCtxt();
8540 if (ctxt == NULL) {
8541 xmlFree(buf);
8542 return(NULL);
8543 }
8544 if (sax != NULL) {
8545 if (ctxt->sax != &xmlDefaultSAXHandler)
8546 xmlFree(ctxt->sax);
8547 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8548 if (ctxt->sax == NULL) {
8549 xmlFree(buf);
8550 xmlFree(ctxt);
8551 return(NULL);
8552 }
8553 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8554 if (user_data != NULL)
8555 ctxt->userData = user_data;
8556 }
8557 if (filename == NULL) {
8558 ctxt->directory = NULL;
8559 } else {
8560 ctxt->directory = xmlParserGetDirectory(filename);
8561 }
8562
8563 inputStream = xmlNewInputStream(ctxt);
8564 if (inputStream == NULL) {
8565 xmlFreeParserCtxt(ctxt);
8566 return(NULL);
8567 }
8568
8569 if (filename == NULL)
8570 inputStream->filename = NULL;
8571 else
8572 inputStream->filename = xmlMemStrdup(filename);
8573 inputStream->buf = buf;
8574 inputStream->base = inputStream->buf->buffer->content;
8575 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008576 inputStream->end =
8577 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008578
8579 inputPush(ctxt, inputStream);
8580
8581 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8582 (ctxt->input->buf != NULL)) {
8583 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8584#ifdef DEBUG_PUSH
8585 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8586#endif
8587 }
8588
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008589 if (enc != XML_CHAR_ENCODING_NONE) {
8590 xmlSwitchEncoding(ctxt, enc);
8591 }
8592
Owen Taylor3473f882001-02-23 17:55:21 +00008593 return(ctxt);
8594}
8595
8596/**
8597 * xmlCreateIOParserCtxt:
8598 * @sax: a SAX handler
8599 * @user_data: The user data returned on SAX callbacks
8600 * @ioread: an I/O read function
8601 * @ioclose: an I/O close function
8602 * @ioctx: an I/O handler
8603 * @enc: the charset encoding if known
8604 *
8605 * Create a parser context for using the XML parser with an existing
8606 * I/O stream
8607 *
8608 * Returns the new parser context or NULL
8609 */
8610xmlParserCtxtPtr
8611xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8612 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8613 void *ioctx, xmlCharEncoding enc) {
8614 xmlParserCtxtPtr ctxt;
8615 xmlParserInputPtr inputStream;
8616 xmlParserInputBufferPtr buf;
8617
8618 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8619 if (buf == NULL) return(NULL);
8620
8621 ctxt = xmlNewParserCtxt();
8622 if (ctxt == NULL) {
8623 xmlFree(buf);
8624 return(NULL);
8625 }
8626 if (sax != NULL) {
8627 if (ctxt->sax != &xmlDefaultSAXHandler)
8628 xmlFree(ctxt->sax);
8629 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8630 if (ctxt->sax == NULL) {
8631 xmlFree(buf);
8632 xmlFree(ctxt);
8633 return(NULL);
8634 }
8635 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8636 if (user_data != NULL)
8637 ctxt->userData = user_data;
8638 }
8639
8640 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8641 if (inputStream == NULL) {
8642 xmlFreeParserCtxt(ctxt);
8643 return(NULL);
8644 }
8645 inputPush(ctxt, inputStream);
8646
8647 return(ctxt);
8648}
8649
8650/************************************************************************
8651 * *
8652 * Front ends when parsing a Dtd *
8653 * *
8654 ************************************************************************/
8655
8656/**
8657 * xmlIOParseDTD:
8658 * @sax: the SAX handler block or NULL
8659 * @input: an Input Buffer
8660 * @enc: the charset encoding if known
8661 *
8662 * Load and parse a DTD
8663 *
8664 * Returns the resulting xmlDtdPtr or NULL in case of error.
8665 * @input will be freed at parsing end.
8666 */
8667
8668xmlDtdPtr
8669xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8670 xmlCharEncoding enc) {
8671 xmlDtdPtr ret = NULL;
8672 xmlParserCtxtPtr ctxt;
8673 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008674 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008675
8676 if (input == NULL)
8677 return(NULL);
8678
8679 ctxt = xmlNewParserCtxt();
8680 if (ctxt == NULL) {
8681 return(NULL);
8682 }
8683
8684 /*
8685 * Set-up the SAX context
8686 */
8687 if (sax != NULL) {
8688 if (ctxt->sax != NULL)
8689 xmlFree(ctxt->sax);
8690 ctxt->sax = sax;
8691 ctxt->userData = NULL;
8692 }
8693
8694 /*
8695 * generate a parser input from the I/O handler
8696 */
8697
8698 pinput = xmlNewIOInputStream(ctxt, input, enc);
8699 if (pinput == NULL) {
8700 if (sax != NULL) ctxt->sax = NULL;
8701 xmlFreeParserCtxt(ctxt);
8702 return(NULL);
8703 }
8704
8705 /*
8706 * plug some encoding conversion routines here.
8707 */
8708 xmlPushInput(ctxt, pinput);
8709
8710 pinput->filename = NULL;
8711 pinput->line = 1;
8712 pinput->col = 1;
8713 pinput->base = ctxt->input->cur;
8714 pinput->cur = ctxt->input->cur;
8715 pinput->free = NULL;
8716
8717 /*
8718 * let's parse that entity knowing it's an external subset.
8719 */
8720 ctxt->inSubset = 2;
8721 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8722 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8723 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008724
8725 if (enc == XML_CHAR_ENCODING_NONE) {
8726 /*
8727 * Get the 4 first bytes and decode the charset
8728 * if enc != XML_CHAR_ENCODING_NONE
8729 * plug some encoding conversion routines.
8730 */
8731 start[0] = RAW;
8732 start[1] = NXT(1);
8733 start[2] = NXT(2);
8734 start[3] = NXT(3);
8735 enc = xmlDetectCharEncoding(start, 4);
8736 if (enc != XML_CHAR_ENCODING_NONE) {
8737 xmlSwitchEncoding(ctxt, enc);
8738 }
8739 }
8740
Owen Taylor3473f882001-02-23 17:55:21 +00008741 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8742
8743 if (ctxt->myDoc != NULL) {
8744 if (ctxt->wellFormed) {
8745 ret = ctxt->myDoc->extSubset;
8746 ctxt->myDoc->extSubset = NULL;
8747 } else {
8748 ret = NULL;
8749 }
8750 xmlFreeDoc(ctxt->myDoc);
8751 ctxt->myDoc = NULL;
8752 }
8753 if (sax != NULL) ctxt->sax = NULL;
8754 xmlFreeParserCtxt(ctxt);
8755
8756 return(ret);
8757}
8758
8759/**
8760 * xmlSAXParseDTD:
8761 * @sax: the SAX handler block
8762 * @ExternalID: a NAME* containing the External ID of the DTD
8763 * @SystemID: a NAME* containing the URL to the DTD
8764 *
8765 * Load and parse an external subset.
8766 *
8767 * Returns the resulting xmlDtdPtr or NULL in case of error.
8768 */
8769
8770xmlDtdPtr
8771xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8772 const xmlChar *SystemID) {
8773 xmlDtdPtr ret = NULL;
8774 xmlParserCtxtPtr ctxt;
8775 xmlParserInputPtr input = NULL;
8776 xmlCharEncoding enc;
8777
8778 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8779
8780 ctxt = xmlNewParserCtxt();
8781 if (ctxt == NULL) {
8782 return(NULL);
8783 }
8784
8785 /*
8786 * Set-up the SAX context
8787 */
8788 if (sax != NULL) {
8789 if (ctxt->sax != NULL)
8790 xmlFree(ctxt->sax);
8791 ctxt->sax = sax;
8792 ctxt->userData = NULL;
8793 }
8794
8795 /*
8796 * Ask the Entity resolver to load the damn thing
8797 */
8798
8799 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8800 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8801 if (input == NULL) {
8802 if (sax != NULL) ctxt->sax = NULL;
8803 xmlFreeParserCtxt(ctxt);
8804 return(NULL);
8805 }
8806
8807 /*
8808 * plug some encoding conversion routines here.
8809 */
8810 xmlPushInput(ctxt, input);
8811 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8812 xmlSwitchEncoding(ctxt, enc);
8813
8814 if (input->filename == NULL)
8815 input->filename = (char *) xmlStrdup(SystemID);
8816 input->line = 1;
8817 input->col = 1;
8818 input->base = ctxt->input->cur;
8819 input->cur = ctxt->input->cur;
8820 input->free = NULL;
8821
8822 /*
8823 * let's parse that entity knowing it's an external subset.
8824 */
8825 ctxt->inSubset = 2;
8826 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8827 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8828 ExternalID, SystemID);
8829 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8830
8831 if (ctxt->myDoc != NULL) {
8832 if (ctxt->wellFormed) {
8833 ret = ctxt->myDoc->extSubset;
8834 ctxt->myDoc->extSubset = NULL;
8835 } else {
8836 ret = NULL;
8837 }
8838 xmlFreeDoc(ctxt->myDoc);
8839 ctxt->myDoc = NULL;
8840 }
8841 if (sax != NULL) ctxt->sax = NULL;
8842 xmlFreeParserCtxt(ctxt);
8843
8844 return(ret);
8845}
8846
8847/**
8848 * xmlParseDTD:
8849 * @ExternalID: a NAME* containing the External ID of the DTD
8850 * @SystemID: a NAME* containing the URL to the DTD
8851 *
8852 * Load and parse an external subset.
8853 *
8854 * Returns the resulting xmlDtdPtr or NULL in case of error.
8855 */
8856
8857xmlDtdPtr
8858xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8859 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8860}
8861
8862/************************************************************************
8863 * *
8864 * Front ends when parsing an Entity *
8865 * *
8866 ************************************************************************/
8867
8868/**
Owen Taylor3473f882001-02-23 17:55:21 +00008869 * xmlParseCtxtExternalEntity:
8870 * @ctx: the existing parsing context
8871 * @URL: the URL for the entity to load
8872 * @ID: the System ID for the entity to load
8873 * @list: the return value for the set of parsed nodes
8874 *
8875 * Parse an external general entity within an existing parsing context
8876 * An external general parsed entity is well-formed if it matches the
8877 * production labeled extParsedEnt.
8878 *
8879 * [78] extParsedEnt ::= TextDecl? content
8880 *
8881 * Returns 0 if the entity is well formed, -1 in case of args problem and
8882 * the parser error code otherwise
8883 */
8884
8885int
8886xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8887 const xmlChar *ID, xmlNodePtr *list) {
8888 xmlParserCtxtPtr ctxt;
8889 xmlDocPtr newDoc;
8890 xmlSAXHandlerPtr oldsax = NULL;
8891 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008892 xmlChar start[4];
8893 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00008894
8895 if (ctx->depth > 40) {
8896 return(XML_ERR_ENTITY_LOOP);
8897 }
8898
8899 if (list != NULL)
8900 *list = NULL;
8901 if ((URL == NULL) && (ID == NULL))
8902 return(-1);
8903 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8904 return(-1);
8905
8906
8907 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8908 if (ctxt == NULL) return(-1);
8909 ctxt->userData = ctxt;
8910 oldsax = ctxt->sax;
8911 ctxt->sax = ctx->sax;
8912 newDoc = xmlNewDoc(BAD_CAST "1.0");
8913 if (newDoc == NULL) {
8914 xmlFreeParserCtxt(ctxt);
8915 return(-1);
8916 }
8917 if (ctx->myDoc != NULL) {
8918 newDoc->intSubset = ctx->myDoc->intSubset;
8919 newDoc->extSubset = ctx->myDoc->extSubset;
8920 }
8921 if (ctx->myDoc->URL != NULL) {
8922 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8923 }
8924 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8925 if (newDoc->children == NULL) {
8926 ctxt->sax = oldsax;
8927 xmlFreeParserCtxt(ctxt);
8928 newDoc->intSubset = NULL;
8929 newDoc->extSubset = NULL;
8930 xmlFreeDoc(newDoc);
8931 return(-1);
8932 }
8933 nodePush(ctxt, newDoc->children);
8934 if (ctx->myDoc == NULL) {
8935 ctxt->myDoc = newDoc;
8936 } else {
8937 ctxt->myDoc = ctx->myDoc;
8938 newDoc->children->doc = ctx->myDoc;
8939 }
8940
Daniel Veillard87a764e2001-06-20 17:41:10 +00008941 /*
8942 * Get the 4 first bytes and decode the charset
8943 * if enc != XML_CHAR_ENCODING_NONE
8944 * plug some encoding conversion routines.
8945 */
8946 GROW
8947 start[0] = RAW;
8948 start[1] = NXT(1);
8949 start[2] = NXT(2);
8950 start[3] = NXT(3);
8951 enc = xmlDetectCharEncoding(start, 4);
8952 if (enc != XML_CHAR_ENCODING_NONE) {
8953 xmlSwitchEncoding(ctxt, enc);
8954 }
8955
Owen Taylor3473f882001-02-23 17:55:21 +00008956 /*
8957 * Parse a possible text declaration first
8958 */
Owen Taylor3473f882001-02-23 17:55:21 +00008959 if ((RAW == '<') && (NXT(1) == '?') &&
8960 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8961 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8962 xmlParseTextDecl(ctxt);
8963 }
8964
8965 /*
8966 * Doing validity checking on chunk doesn't make sense
8967 */
8968 ctxt->instate = XML_PARSER_CONTENT;
8969 ctxt->validate = ctx->validate;
8970 ctxt->loadsubset = ctx->loadsubset;
8971 ctxt->depth = ctx->depth + 1;
8972 ctxt->replaceEntities = ctx->replaceEntities;
8973 if (ctxt->validate) {
8974 ctxt->vctxt.error = ctx->vctxt.error;
8975 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00008976 } else {
8977 ctxt->vctxt.error = NULL;
8978 ctxt->vctxt.warning = NULL;
8979 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00008980 ctxt->vctxt.nodeTab = NULL;
8981 ctxt->vctxt.nodeNr = 0;
8982 ctxt->vctxt.nodeMax = 0;
8983 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008984
8985 xmlParseContent(ctxt);
8986
8987 if ((RAW == '<') && (NXT(1) == '/')) {
8988 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8990 ctxt->sax->error(ctxt->userData,
8991 "chunk is not well balanced\n");
8992 ctxt->wellFormed = 0;
8993 ctxt->disableSAX = 1;
8994 } else if (RAW != 0) {
8995 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8997 ctxt->sax->error(ctxt->userData,
8998 "extra content at the end of well balanced chunk\n");
8999 ctxt->wellFormed = 0;
9000 ctxt->disableSAX = 1;
9001 }
9002 if (ctxt->node != newDoc->children) {
9003 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9005 ctxt->sax->error(ctxt->userData,
9006 "chunk is not well balanced\n");
9007 ctxt->wellFormed = 0;
9008 ctxt->disableSAX = 1;
9009 }
9010
9011 if (!ctxt->wellFormed) {
9012 if (ctxt->errNo == 0)
9013 ret = 1;
9014 else
9015 ret = ctxt->errNo;
9016 } else {
9017 if (list != NULL) {
9018 xmlNodePtr cur;
9019
9020 /*
9021 * Return the newly created nodeset after unlinking it from
9022 * they pseudo parent.
9023 */
9024 cur = newDoc->children->children;
9025 *list = cur;
9026 while (cur != NULL) {
9027 cur->parent = NULL;
9028 cur = cur->next;
9029 }
9030 newDoc->children->children = NULL;
9031 }
9032 ret = 0;
9033 }
9034 ctxt->sax = oldsax;
9035 xmlFreeParserCtxt(ctxt);
9036 newDoc->intSubset = NULL;
9037 newDoc->extSubset = NULL;
9038 xmlFreeDoc(newDoc);
9039
9040 return(ret);
9041}
9042
9043/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009044 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009045 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009046 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009047 * @sax: the SAX handler bloc (possibly NULL)
9048 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9049 * @depth: Used for loop detection, use 0
9050 * @URL: the URL for the entity to load
9051 * @ID: the System ID for the entity to load
9052 * @list: the return value for the set of parsed nodes
9053 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009054 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009055 *
9056 * Returns 0 if the entity is well formed, -1 in case of args problem and
9057 * the parser error code otherwise
9058 */
9059
Daniel Veillard257d9102001-05-08 10:41:44 +00009060static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009061xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9062 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009063 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009064 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009065 xmlParserCtxtPtr ctxt;
9066 xmlDocPtr newDoc;
9067 xmlSAXHandlerPtr oldsax = NULL;
9068 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009069 xmlChar start[4];
9070 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009071
9072 if (depth > 40) {
9073 return(XML_ERR_ENTITY_LOOP);
9074 }
9075
9076
9077
9078 if (list != NULL)
9079 *list = NULL;
9080 if ((URL == NULL) && (ID == NULL))
9081 return(-1);
9082 if (doc == NULL) /* @@ relax but check for dereferences */
9083 return(-1);
9084
9085
9086 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9087 if (ctxt == NULL) return(-1);
9088 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009089 if (oldctxt != NULL) {
9090 ctxt->_private = oldctxt->_private;
9091 ctxt->loadsubset = oldctxt->loadsubset;
9092 ctxt->validate = oldctxt->validate;
9093 ctxt->external = oldctxt->external;
9094 } else {
9095 /*
9096 * Doing validity checking on chunk without context
9097 * doesn't make sense
9098 */
9099 ctxt->_private = NULL;
9100 ctxt->validate = 0;
9101 ctxt->external = 2;
9102 ctxt->loadsubset = 0;
9103 }
Owen Taylor3473f882001-02-23 17:55:21 +00009104 if (sax != NULL) {
9105 oldsax = ctxt->sax;
9106 ctxt->sax = sax;
9107 if (user_data != NULL)
9108 ctxt->userData = user_data;
9109 }
9110 newDoc = xmlNewDoc(BAD_CAST "1.0");
9111 if (newDoc == NULL) {
9112 xmlFreeParserCtxt(ctxt);
9113 return(-1);
9114 }
9115 if (doc != NULL) {
9116 newDoc->intSubset = doc->intSubset;
9117 newDoc->extSubset = doc->extSubset;
9118 }
9119 if (doc->URL != NULL) {
9120 newDoc->URL = xmlStrdup(doc->URL);
9121 }
9122 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9123 if (newDoc->children == NULL) {
9124 if (sax != NULL)
9125 ctxt->sax = oldsax;
9126 xmlFreeParserCtxt(ctxt);
9127 newDoc->intSubset = NULL;
9128 newDoc->extSubset = NULL;
9129 xmlFreeDoc(newDoc);
9130 return(-1);
9131 }
9132 nodePush(ctxt, newDoc->children);
9133 if (doc == NULL) {
9134 ctxt->myDoc = newDoc;
9135 } else {
9136 ctxt->myDoc = doc;
9137 newDoc->children->doc = doc;
9138 }
9139
Daniel Veillard87a764e2001-06-20 17:41:10 +00009140 /*
9141 * Get the 4 first bytes and decode the charset
9142 * if enc != XML_CHAR_ENCODING_NONE
9143 * plug some encoding conversion routines.
9144 */
9145 GROW;
9146 start[0] = RAW;
9147 start[1] = NXT(1);
9148 start[2] = NXT(2);
9149 start[3] = NXT(3);
9150 enc = xmlDetectCharEncoding(start, 4);
9151 if (enc != XML_CHAR_ENCODING_NONE) {
9152 xmlSwitchEncoding(ctxt, enc);
9153 }
9154
Owen Taylor3473f882001-02-23 17:55:21 +00009155 /*
9156 * Parse a possible text declaration first
9157 */
Owen Taylor3473f882001-02-23 17:55:21 +00009158 if ((RAW == '<') && (NXT(1) == '?') &&
9159 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9160 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9161 xmlParseTextDecl(ctxt);
9162 }
9163
Owen Taylor3473f882001-02-23 17:55:21 +00009164 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009165 ctxt->depth = depth;
9166
9167 xmlParseContent(ctxt);
9168
9169 if ((RAW == '<') && (NXT(1) == '/')) {
9170 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9172 ctxt->sax->error(ctxt->userData,
9173 "chunk is not well balanced\n");
9174 ctxt->wellFormed = 0;
9175 ctxt->disableSAX = 1;
9176 } else if (RAW != 0) {
9177 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9179 ctxt->sax->error(ctxt->userData,
9180 "extra content at the end of well balanced chunk\n");
9181 ctxt->wellFormed = 0;
9182 ctxt->disableSAX = 1;
9183 }
9184 if (ctxt->node != newDoc->children) {
9185 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9187 ctxt->sax->error(ctxt->userData,
9188 "chunk is not well balanced\n");
9189 ctxt->wellFormed = 0;
9190 ctxt->disableSAX = 1;
9191 }
9192
9193 if (!ctxt->wellFormed) {
9194 if (ctxt->errNo == 0)
9195 ret = 1;
9196 else
9197 ret = ctxt->errNo;
9198 } else {
9199 if (list != NULL) {
9200 xmlNodePtr cur;
9201
9202 /*
9203 * Return the newly created nodeset after unlinking it from
9204 * they pseudo parent.
9205 */
9206 cur = newDoc->children->children;
9207 *list = cur;
9208 while (cur != NULL) {
9209 cur->parent = NULL;
9210 cur = cur->next;
9211 }
9212 newDoc->children->children = NULL;
9213 }
9214 ret = 0;
9215 }
9216 if (sax != NULL)
9217 ctxt->sax = oldsax;
9218 xmlFreeParserCtxt(ctxt);
9219 newDoc->intSubset = NULL;
9220 newDoc->extSubset = NULL;
9221 xmlFreeDoc(newDoc);
9222
9223 return(ret);
9224}
9225
9226/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009227 * xmlParseExternalEntity:
9228 * @doc: the document the chunk pertains to
9229 * @sax: the SAX handler bloc (possibly NULL)
9230 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9231 * @depth: Used for loop detection, use 0
9232 * @URL: the URL for the entity to load
9233 * @ID: the System ID for the entity to load
9234 * @list: the return value for the set of parsed nodes
9235 *
9236 * Parse an external general entity
9237 * An external general parsed entity is well-formed if it matches the
9238 * production labeled extParsedEnt.
9239 *
9240 * [78] extParsedEnt ::= TextDecl? content
9241 *
9242 * Returns 0 if the entity is well formed, -1 in case of args problem and
9243 * the parser error code otherwise
9244 */
9245
9246int
9247xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
9248 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009249 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
9250 ID, list));
Daniel Veillard257d9102001-05-08 10:41:44 +00009251}
9252
9253/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009254 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009255 * @doc: the document the chunk pertains to
9256 * @sax: the SAX handler bloc (possibly NULL)
9257 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9258 * @depth: Used for loop detection, use 0
9259 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9260 * @list: the return value for the set of parsed nodes
9261 *
9262 * Parse a well-balanced chunk of an XML document
9263 * called by the parser
9264 * The allowed sequence for the Well Balanced Chunk is the one defined by
9265 * the content production in the XML grammar:
9266 *
9267 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9268 *
9269 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9270 * the parser error code otherwise
9271 */
9272
9273int
9274xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9275 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9276 xmlParserCtxtPtr ctxt;
9277 xmlDocPtr newDoc;
9278 xmlSAXHandlerPtr oldsax = NULL;
9279 int size;
9280 int ret = 0;
9281
9282 if (depth > 40) {
9283 return(XML_ERR_ENTITY_LOOP);
9284 }
9285
9286
9287 if (list != NULL)
9288 *list = NULL;
9289 if (string == NULL)
9290 return(-1);
9291
9292 size = xmlStrlen(string);
9293
9294 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9295 if (ctxt == NULL) return(-1);
9296 ctxt->userData = ctxt;
9297 if (sax != NULL) {
9298 oldsax = ctxt->sax;
9299 ctxt->sax = sax;
9300 if (user_data != NULL)
9301 ctxt->userData = user_data;
9302 }
9303 newDoc = xmlNewDoc(BAD_CAST "1.0");
9304 if (newDoc == NULL) {
9305 xmlFreeParserCtxt(ctxt);
9306 return(-1);
9307 }
9308 if (doc != NULL) {
9309 newDoc->intSubset = doc->intSubset;
9310 newDoc->extSubset = doc->extSubset;
9311 }
9312 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9313 if (newDoc->children == NULL) {
9314 if (sax != NULL)
9315 ctxt->sax = oldsax;
9316 xmlFreeParserCtxt(ctxt);
9317 newDoc->intSubset = NULL;
9318 newDoc->extSubset = NULL;
9319 xmlFreeDoc(newDoc);
9320 return(-1);
9321 }
9322 nodePush(ctxt, newDoc->children);
9323 if (doc == NULL) {
9324 ctxt->myDoc = newDoc;
9325 } else {
9326 ctxt->myDoc = doc;
9327 newDoc->children->doc = doc;
9328 }
9329 ctxt->instate = XML_PARSER_CONTENT;
9330 ctxt->depth = depth;
9331
9332 /*
9333 * Doing validity checking on chunk doesn't make sense
9334 */
9335 ctxt->validate = 0;
9336 ctxt->loadsubset = 0;
9337
9338 xmlParseContent(ctxt);
9339
9340 if ((RAW == '<') && (NXT(1) == '/')) {
9341 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9343 ctxt->sax->error(ctxt->userData,
9344 "chunk is not well balanced\n");
9345 ctxt->wellFormed = 0;
9346 ctxt->disableSAX = 1;
9347 } else if (RAW != 0) {
9348 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9350 ctxt->sax->error(ctxt->userData,
9351 "extra content at the end of well balanced chunk\n");
9352 ctxt->wellFormed = 0;
9353 ctxt->disableSAX = 1;
9354 }
9355 if (ctxt->node != newDoc->children) {
9356 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9358 ctxt->sax->error(ctxt->userData,
9359 "chunk is not well balanced\n");
9360 ctxt->wellFormed = 0;
9361 ctxt->disableSAX = 1;
9362 }
9363
9364 if (!ctxt->wellFormed) {
9365 if (ctxt->errNo == 0)
9366 ret = 1;
9367 else
9368 ret = ctxt->errNo;
9369 } else {
9370 if (list != NULL) {
9371 xmlNodePtr cur;
9372
9373 /*
9374 * Return the newly created nodeset after unlinking it from
9375 * they pseudo parent.
9376 */
9377 cur = newDoc->children->children;
9378 *list = cur;
9379 while (cur != NULL) {
9380 cur->parent = NULL;
9381 cur = cur->next;
9382 }
9383 newDoc->children->children = NULL;
9384 }
9385 ret = 0;
9386 }
9387 if (sax != NULL)
9388 ctxt->sax = oldsax;
9389 xmlFreeParserCtxt(ctxt);
9390 newDoc->intSubset = NULL;
9391 newDoc->extSubset = NULL;
9392 xmlFreeDoc(newDoc);
9393
9394 return(ret);
9395}
9396
9397/**
9398 * xmlSAXParseEntity:
9399 * @sax: the SAX handler block
9400 * @filename: the filename
9401 *
9402 * parse an XML external entity out of context and build a tree.
9403 * It use the given SAX function block to handle the parsing callback.
9404 * If sax is NULL, fallback to the default DOM tree building routines.
9405 *
9406 * [78] extParsedEnt ::= TextDecl? content
9407 *
9408 * This correspond to a "Well Balanced" chunk
9409 *
9410 * Returns the resulting document tree
9411 */
9412
9413xmlDocPtr
9414xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9415 xmlDocPtr ret;
9416 xmlParserCtxtPtr ctxt;
9417 char *directory = NULL;
9418
9419 ctxt = xmlCreateFileParserCtxt(filename);
9420 if (ctxt == NULL) {
9421 return(NULL);
9422 }
9423 if (sax != NULL) {
9424 if (ctxt->sax != NULL)
9425 xmlFree(ctxt->sax);
9426 ctxt->sax = sax;
9427 ctxt->userData = NULL;
9428 }
9429
9430 if ((ctxt->directory == NULL) && (directory == NULL))
9431 directory = xmlParserGetDirectory(filename);
9432
9433 xmlParseExtParsedEnt(ctxt);
9434
9435 if (ctxt->wellFormed)
9436 ret = ctxt->myDoc;
9437 else {
9438 ret = NULL;
9439 xmlFreeDoc(ctxt->myDoc);
9440 ctxt->myDoc = NULL;
9441 }
9442 if (sax != NULL)
9443 ctxt->sax = NULL;
9444 xmlFreeParserCtxt(ctxt);
9445
9446 return(ret);
9447}
9448
9449/**
9450 * xmlParseEntity:
9451 * @filename: the filename
9452 *
9453 * parse an XML external entity out of context and build a tree.
9454 *
9455 * [78] extParsedEnt ::= TextDecl? content
9456 *
9457 * This correspond to a "Well Balanced" chunk
9458 *
9459 * Returns the resulting document tree
9460 */
9461
9462xmlDocPtr
9463xmlParseEntity(const char *filename) {
9464 return(xmlSAXParseEntity(NULL, filename));
9465}
9466
9467/**
9468 * xmlCreateEntityParserCtxt:
9469 * @URL: the entity URL
9470 * @ID: the entity PUBLIC ID
9471 * @base: a posible base for the target URI
9472 *
9473 * Create a parser context for an external entity
9474 * Automatic support for ZLIB/Compress compressed document is provided
9475 * by default if found at compile-time.
9476 *
9477 * Returns the new parser context or NULL
9478 */
9479xmlParserCtxtPtr
9480xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9481 const xmlChar *base) {
9482 xmlParserCtxtPtr ctxt;
9483 xmlParserInputPtr inputStream;
9484 char *directory = NULL;
9485 xmlChar *uri;
9486
9487 ctxt = xmlNewParserCtxt();
9488 if (ctxt == NULL) {
9489 return(NULL);
9490 }
9491
9492 uri = xmlBuildURI(URL, base);
9493
9494 if (uri == NULL) {
9495 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9496 if (inputStream == NULL) {
9497 xmlFreeParserCtxt(ctxt);
9498 return(NULL);
9499 }
9500
9501 inputPush(ctxt, inputStream);
9502
9503 if ((ctxt->directory == NULL) && (directory == NULL))
9504 directory = xmlParserGetDirectory((char *)URL);
9505 if ((ctxt->directory == NULL) && (directory != NULL))
9506 ctxt->directory = directory;
9507 } else {
9508 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9509 if (inputStream == NULL) {
9510 xmlFree(uri);
9511 xmlFreeParserCtxt(ctxt);
9512 return(NULL);
9513 }
9514
9515 inputPush(ctxt, inputStream);
9516
9517 if ((ctxt->directory == NULL) && (directory == NULL))
9518 directory = xmlParserGetDirectory((char *)uri);
9519 if ((ctxt->directory == NULL) && (directory != NULL))
9520 ctxt->directory = directory;
9521 xmlFree(uri);
9522 }
9523
9524 return(ctxt);
9525}
9526
9527/************************************************************************
9528 * *
9529 * Front ends when parsing from a file *
9530 * *
9531 ************************************************************************/
9532
9533/**
9534 * xmlCreateFileParserCtxt:
9535 * @filename: the filename
9536 *
9537 * Create a parser context for a file content.
9538 * Automatic support for ZLIB/Compress compressed document is provided
9539 * by default if found at compile-time.
9540 *
9541 * Returns the new parser context or NULL
9542 */
9543xmlParserCtxtPtr
9544xmlCreateFileParserCtxt(const char *filename)
9545{
9546 xmlParserCtxtPtr ctxt;
9547 xmlParserInputPtr inputStream;
9548 xmlParserInputBufferPtr buf;
9549 char *directory = NULL;
9550
9551 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9552 if (buf == NULL) {
9553 return(NULL);
9554 }
9555
9556 ctxt = xmlNewParserCtxt();
9557 if (ctxt == NULL) {
9558 if (xmlDefaultSAXHandler.error != NULL) {
9559 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9560 }
9561 return(NULL);
9562 }
9563
9564 inputStream = xmlNewInputStream(ctxt);
9565 if (inputStream == NULL) {
9566 xmlFreeParserCtxt(ctxt);
9567 return(NULL);
9568 }
9569
9570 inputStream->filename = xmlMemStrdup(filename);
9571 inputStream->buf = buf;
9572 inputStream->base = inputStream->buf->buffer->content;
9573 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009574 inputStream->end =
9575 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009576
9577 inputPush(ctxt, inputStream);
9578 if ((ctxt->directory == NULL) && (directory == NULL))
9579 directory = xmlParserGetDirectory(filename);
9580 if ((ctxt->directory == NULL) && (directory != NULL))
9581 ctxt->directory = directory;
9582
9583 return(ctxt);
9584}
9585
9586/**
9587 * xmlSAXParseFile:
9588 * @sax: the SAX handler block
9589 * @filename: the filename
9590 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9591 * documents
9592 *
9593 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9594 * compressed document is provided by default if found at compile-time.
9595 * It use the given SAX function block to handle the parsing callback.
9596 * If sax is NULL, fallback to the default DOM tree building routines.
9597 *
9598 * Returns the resulting document tree
9599 */
9600
9601xmlDocPtr
9602xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9603 int recovery) {
9604 xmlDocPtr ret;
9605 xmlParserCtxtPtr ctxt;
9606 char *directory = NULL;
9607
9608 ctxt = xmlCreateFileParserCtxt(filename);
9609 if (ctxt == NULL) {
9610 return(NULL);
9611 }
9612 if (sax != NULL) {
9613 if (ctxt->sax != NULL)
9614 xmlFree(ctxt->sax);
9615 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009616 }
9617
9618 if ((ctxt->directory == NULL) && (directory == NULL))
9619 directory = xmlParserGetDirectory(filename);
9620 if ((ctxt->directory == NULL) && (directory != NULL))
9621 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9622
9623 xmlParseDocument(ctxt);
9624
9625 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9626 else {
9627 ret = NULL;
9628 xmlFreeDoc(ctxt->myDoc);
9629 ctxt->myDoc = NULL;
9630 }
9631 if (sax != NULL)
9632 ctxt->sax = NULL;
9633 xmlFreeParserCtxt(ctxt);
9634
9635 return(ret);
9636}
9637
9638/**
9639 * xmlRecoverDoc:
9640 * @cur: a pointer to an array of xmlChar
9641 *
9642 * parse an XML in-memory document and build a tree.
9643 * In the case the document is not Well Formed, a tree is built anyway
9644 *
9645 * Returns the resulting document tree
9646 */
9647
9648xmlDocPtr
9649xmlRecoverDoc(xmlChar *cur) {
9650 return(xmlSAXParseDoc(NULL, cur, 1));
9651}
9652
9653/**
9654 * xmlParseFile:
9655 * @filename: the filename
9656 *
9657 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9658 * compressed document is provided by default if found at compile-time.
9659 *
9660 * Returns the resulting document tree
9661 */
9662
9663xmlDocPtr
9664xmlParseFile(const char *filename) {
9665 return(xmlSAXParseFile(NULL, filename, 0));
9666}
9667
9668/**
9669 * xmlRecoverFile:
9670 * @filename: the filename
9671 *
9672 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9673 * compressed document is provided by default if found at compile-time.
9674 * In the case the document is not Well Formed, a tree is built anyway
9675 *
9676 * Returns the resulting document tree
9677 */
9678
9679xmlDocPtr
9680xmlRecoverFile(const char *filename) {
9681 return(xmlSAXParseFile(NULL, filename, 1));
9682}
9683
9684
9685/**
9686 * xmlSetupParserForBuffer:
9687 * @ctxt: an XML parser context
9688 * @buffer: a xmlChar * buffer
9689 * @filename: a file name
9690 *
9691 * Setup the parser context to parse a new buffer; Clears any prior
9692 * contents from the parser context. The buffer parameter must not be
9693 * NULL, but the filename parameter can be
9694 */
9695void
9696xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9697 const char* filename)
9698{
9699 xmlParserInputPtr input;
9700
9701 input = xmlNewInputStream(ctxt);
9702 if (input == NULL) {
9703 perror("malloc");
9704 xmlFree(ctxt);
9705 return;
9706 }
9707
9708 xmlClearParserCtxt(ctxt);
9709 if (filename != NULL)
9710 input->filename = xmlMemStrdup(filename);
9711 input->base = buffer;
9712 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009713 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009714 inputPush(ctxt, input);
9715}
9716
9717/**
9718 * xmlSAXUserParseFile:
9719 * @sax: a SAX handler
9720 * @user_data: The user data returned on SAX callbacks
9721 * @filename: a file name
9722 *
9723 * parse an XML file and call the given SAX handler routines.
9724 * Automatic support for ZLIB/Compress compressed document is provided
9725 *
9726 * Returns 0 in case of success or a error number otherwise
9727 */
9728int
9729xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9730 const char *filename) {
9731 int ret = 0;
9732 xmlParserCtxtPtr ctxt;
9733
9734 ctxt = xmlCreateFileParserCtxt(filename);
9735 if (ctxt == NULL) return -1;
9736 if (ctxt->sax != &xmlDefaultSAXHandler)
9737 xmlFree(ctxt->sax);
9738 ctxt->sax = sax;
9739 if (user_data != NULL)
9740 ctxt->userData = user_data;
9741
9742 xmlParseDocument(ctxt);
9743
9744 if (ctxt->wellFormed)
9745 ret = 0;
9746 else {
9747 if (ctxt->errNo != 0)
9748 ret = ctxt->errNo;
9749 else
9750 ret = -1;
9751 }
9752 if (sax != NULL)
9753 ctxt->sax = NULL;
9754 xmlFreeParserCtxt(ctxt);
9755
9756 return ret;
9757}
9758
9759/************************************************************************
9760 * *
9761 * Front ends when parsing from memory *
9762 * *
9763 ************************************************************************/
9764
9765/**
9766 * xmlCreateMemoryParserCtxt:
9767 * @buffer: a pointer to a char array
9768 * @size: the size of the array
9769 *
9770 * Create a parser context for an XML in-memory document.
9771 *
9772 * Returns the new parser context or NULL
9773 */
9774xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009775xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009776 xmlParserCtxtPtr ctxt;
9777 xmlParserInputPtr input;
9778 xmlParserInputBufferPtr buf;
9779
9780 if (buffer == NULL)
9781 return(NULL);
9782 if (size <= 0)
9783 return(NULL);
9784
9785 ctxt = xmlNewParserCtxt();
9786 if (ctxt == NULL)
9787 return(NULL);
9788
9789 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9790 if (buf == NULL) return(NULL);
9791
9792 input = xmlNewInputStream(ctxt);
9793 if (input == NULL) {
9794 xmlFreeParserCtxt(ctxt);
9795 return(NULL);
9796 }
9797
9798 input->filename = NULL;
9799 input->buf = buf;
9800 input->base = input->buf->buffer->content;
9801 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009802 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009803
9804 inputPush(ctxt, input);
9805 return(ctxt);
9806}
9807
9808/**
9809 * xmlSAXParseMemory:
9810 * @sax: the SAX handler block
9811 * @buffer: an pointer to a char array
9812 * @size: the size of the array
9813 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9814 * documents
9815 *
9816 * parse an XML in-memory block and use the given SAX function block
9817 * to handle the parsing callback. If sax is NULL, fallback to the default
9818 * DOM tree building routines.
9819 *
9820 * Returns the resulting document tree
9821 */
9822xmlDocPtr
9823xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9824 xmlDocPtr ret;
9825 xmlParserCtxtPtr ctxt;
9826
9827 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9828 if (ctxt == NULL) return(NULL);
9829 if (sax != NULL) {
9830 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009831 }
9832
9833 xmlParseDocument(ctxt);
9834
9835 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9836 else {
9837 ret = NULL;
9838 xmlFreeDoc(ctxt->myDoc);
9839 ctxt->myDoc = NULL;
9840 }
9841 if (sax != NULL)
9842 ctxt->sax = NULL;
9843 xmlFreeParserCtxt(ctxt);
9844
9845 return(ret);
9846}
9847
9848/**
9849 * xmlParseMemory:
9850 * @buffer: an pointer to a char array
9851 * @size: the size of the array
9852 *
9853 * parse an XML in-memory block and build a tree.
9854 *
9855 * Returns the resulting document tree
9856 */
9857
9858xmlDocPtr xmlParseMemory(char *buffer, int size) {
9859 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9860}
9861
9862/**
9863 * xmlRecoverMemory:
9864 * @buffer: an pointer to a char array
9865 * @size: the size of the array
9866 *
9867 * parse an XML in-memory block and build a tree.
9868 * In the case the document is not Well Formed, a tree is built anyway
9869 *
9870 * Returns the resulting document tree
9871 */
9872
9873xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9874 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9875}
9876
9877/**
9878 * xmlSAXUserParseMemory:
9879 * @sax: a SAX handler
9880 * @user_data: The user data returned on SAX callbacks
9881 * @buffer: an in-memory XML document input
9882 * @size: the length of the XML document in bytes
9883 *
9884 * A better SAX parsing routine.
9885 * parse an XML in-memory buffer and call the given SAX handler routines.
9886 *
9887 * Returns 0 in case of success or a error number otherwise
9888 */
9889int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009890 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009891 int ret = 0;
9892 xmlParserCtxtPtr ctxt;
9893 xmlSAXHandlerPtr oldsax = NULL;
9894
9895 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9896 if (ctxt == NULL) return -1;
9897 if (sax != NULL) {
9898 oldsax = ctxt->sax;
9899 ctxt->sax = sax;
9900 }
Daniel Veillard30211a02001-04-26 09:33:18 +00009901 if (user_data != NULL)
9902 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00009903
9904 xmlParseDocument(ctxt);
9905
9906 if (ctxt->wellFormed)
9907 ret = 0;
9908 else {
9909 if (ctxt->errNo != 0)
9910 ret = ctxt->errNo;
9911 else
9912 ret = -1;
9913 }
9914 if (sax != NULL) {
9915 ctxt->sax = oldsax;
9916 }
9917 xmlFreeParserCtxt(ctxt);
9918
9919 return ret;
9920}
9921
9922/**
9923 * xmlCreateDocParserCtxt:
9924 * @cur: a pointer to an array of xmlChar
9925 *
9926 * Creates a parser context for an XML in-memory document.
9927 *
9928 * Returns the new parser context or NULL
9929 */
9930xmlParserCtxtPtr
9931xmlCreateDocParserCtxt(xmlChar *cur) {
9932 int len;
9933
9934 if (cur == NULL)
9935 return(NULL);
9936 len = xmlStrlen(cur);
9937 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9938}
9939
9940/**
9941 * xmlSAXParseDoc:
9942 * @sax: the SAX handler block
9943 * @cur: a pointer to an array of xmlChar
9944 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9945 * documents
9946 *
9947 * parse an XML in-memory document and build a tree.
9948 * It use the given SAX function block to handle the parsing callback.
9949 * If sax is NULL, fallback to the default DOM tree building routines.
9950 *
9951 * Returns the resulting document tree
9952 */
9953
9954xmlDocPtr
9955xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9956 xmlDocPtr ret;
9957 xmlParserCtxtPtr ctxt;
9958
9959 if (cur == NULL) return(NULL);
9960
9961
9962 ctxt = xmlCreateDocParserCtxt(cur);
9963 if (ctxt == NULL) return(NULL);
9964 if (sax != NULL) {
9965 ctxt->sax = sax;
9966 ctxt->userData = NULL;
9967 }
9968
9969 xmlParseDocument(ctxt);
9970 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9971 else {
9972 ret = NULL;
9973 xmlFreeDoc(ctxt->myDoc);
9974 ctxt->myDoc = NULL;
9975 }
9976 if (sax != NULL)
9977 ctxt->sax = NULL;
9978 xmlFreeParserCtxt(ctxt);
9979
9980 return(ret);
9981}
9982
9983/**
9984 * xmlParseDoc:
9985 * @cur: a pointer to an array of xmlChar
9986 *
9987 * parse an XML in-memory document and build a tree.
9988 *
9989 * Returns the resulting document tree
9990 */
9991
9992xmlDocPtr
9993xmlParseDoc(xmlChar *cur) {
9994 return(xmlSAXParseDoc(NULL, cur, 0));
9995}
9996
9997
9998/************************************************************************
9999 * *
10000 * Miscellaneous *
10001 * *
10002 ************************************************************************/
10003
10004#ifdef LIBXML_XPATH_ENABLED
10005#include <libxml/xpath.h>
10006#endif
10007
10008static int xmlParserInitialized = 0;
10009
10010/**
10011 * xmlInitParser:
10012 *
10013 * Initialization function for the XML parser.
10014 * This is not reentrant. Call once before processing in case of
10015 * use in multithreaded programs.
10016 */
10017
10018void
10019xmlInitParser(void) {
10020 if (xmlParserInitialized) return;
10021
10022 xmlInitCharEncodingHandlers();
10023 xmlInitializePredefinedEntities();
10024 xmlDefaultSAXHandlerInit();
10025 xmlRegisterDefaultInputCallbacks();
10026 xmlRegisterDefaultOutputCallbacks();
10027#ifdef LIBXML_HTML_ENABLED
10028 htmlInitAutoClose();
10029 htmlDefaultSAXHandlerInit();
10030#endif
10031#ifdef LIBXML_XPATH_ENABLED
10032 xmlXPathInit();
10033#endif
10034 xmlParserInitialized = 1;
10035}
10036
10037/**
10038 * xmlCleanupParser:
10039 *
10040 * Cleanup function for the XML parser. It tries to reclaim all
10041 * parsing related global memory allocated for the parser processing.
10042 * It doesn't deallocate any document related memory. Calling this
10043 * function should not prevent reusing the parser.
10044 */
10045
10046void
10047xmlCleanupParser(void) {
10048 xmlParserInitialized = 0;
10049 xmlCleanupCharEncodingHandlers();
10050 xmlCleanupPredefinedEntities();
10051}
10052
10053/**
10054 * xmlPedanticParserDefault:
10055 * @val: int 0 or 1
10056 *
10057 * Set and return the previous value for enabling pedantic warnings.
10058 *
10059 * Returns the last value for 0 for no substitution, 1 for substitution.
10060 */
10061
10062int
10063xmlPedanticParserDefault(int val) {
10064 int old = xmlPedanticParserDefaultValue;
10065
10066 xmlPedanticParserDefaultValue = val;
10067 return(old);
10068}
10069
10070/**
10071 * xmlSubstituteEntitiesDefault:
10072 * @val: int 0 or 1
10073 *
10074 * Set and return the previous value for default entity support.
10075 * Initially the parser always keep entity references instead of substituting
10076 * entity values in the output. This function has to be used to change the
10077 * default parser behaviour
10078 * SAX::subtituteEntities() has to be used for changing that on a file by
10079 * file basis.
10080 *
10081 * Returns the last value for 0 for no substitution, 1 for substitution.
10082 */
10083
10084int
10085xmlSubstituteEntitiesDefault(int val) {
10086 int old = xmlSubstituteEntitiesDefaultValue;
10087
10088 xmlSubstituteEntitiesDefaultValue = val;
10089 return(old);
10090}
10091
10092/**
10093 * xmlKeepBlanksDefault:
10094 * @val: int 0 or 1
10095 *
10096 * Set and return the previous value for default blanks text nodes support.
10097 * The 1.x version of the parser used an heuristic to try to detect
10098 * ignorable white spaces. As a result the SAX callback was generating
10099 * ignorableWhitespace() callbacks instead of characters() one, and when
10100 * using the DOM output text nodes containing those blanks were not generated.
10101 * The 2.x and later version will switch to the XML standard way and
10102 * ignorableWhitespace() are only generated when running the parser in
10103 * validating mode and when the current element doesn't allow CDATA or
10104 * mixed content.
10105 * This function is provided as a way to force the standard behaviour
10106 * on 1.X libs and to switch back to the old mode for compatibility when
10107 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
10108 * by using xmlIsBlankNode() commodity function to detect the "empty"
10109 * nodes generated.
10110 * This value also affect autogeneration of indentation when saving code
10111 * if blanks sections are kept, indentation is not generated.
10112 *
10113 * Returns the last value for 0 for no substitution, 1 for substitution.
10114 */
10115
10116int
10117xmlKeepBlanksDefault(int val) {
10118 int old = xmlKeepBlanksDefaultValue;
10119
10120 xmlKeepBlanksDefaultValue = val;
10121 xmlIndentTreeOutput = !val;
10122 return(old);
10123}
10124