blob: 3c8e8beb12368807424461d1c18ee6c91f202287 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Daniel Veillard3c5ed912002-01-08 10:36:16 +000035#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000044#include <libxml/threads.h>
45#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/tree.h>
47#include <libxml/parser.h>
48#include <libxml/parserInternals.h>
49#include <libxml/valid.h>
50#include <libxml/entities.h>
51#include <libxml/xmlerror.h>
52#include <libxml/encoding.h>
53#include <libxml/xmlIO.h>
54#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000055#ifdef LIBXML_CATALOG_ENABLED
56#include <libxml/catalog.h>
57#endif
Owen Taylor3473f882001-02-23 17:55:21 +000058
59#ifdef HAVE_CTYPE_H
60#include <ctype.h>
61#endif
62#ifdef HAVE_STDLIB_H
63#include <stdlib.h>
64#endif
65#ifdef HAVE_SYS_STAT_H
66#include <sys/stat.h>
67#endif
68#ifdef HAVE_FCNTL_H
69#include <fcntl.h>
70#endif
71#ifdef HAVE_UNISTD_H
72#include <unistd.h>
73#endif
74#ifdef HAVE_ZLIB_H
75#include <zlib.h>
76#endif
77
78
Daniel Veillard21a0f912001-02-25 19:54:14 +000079#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000080#define XML_PARSER_BUFFER_SIZE 100
81
82/*
Owen Taylor3473f882001-02-23 17:55:21 +000083 * List of XML prefixed PI allowed by W3C specs
84 */
85
Daniel Veillardb44025c2001-10-11 22:55:55 +000086static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000087 "xml-stylesheet",
88 NULL
89};
90
91/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
92void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
93xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
94 const xmlChar **str);
95
Daniel Veillard257d9102001-05-08 10:41:44 +000096static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000097xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
98 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000099 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000100 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000101
Daniel Veillard8107a222002-01-13 14:10:10 +0000102static void
103xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
104 xmlNodePtr lastNode);
105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/************************************************************************
107 * *
108 * Parser stacks related functions and macros *
109 * *
110 ************************************************************************/
111
112xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
113 const xmlChar ** str);
114
115/*
116 * Generic function for accessing stacks in the Parser Context
117 */
118
119#define PUSH_AND_POP(scope, type, name) \
120scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
121 if (ctxt->name##Nr >= ctxt->name##Max) { \
122 ctxt->name##Max *= 2; \
123 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
124 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
125 if (ctxt->name##Tab == NULL) { \
126 xmlGenericError(xmlGenericErrorContext, \
127 "realloc failed !\n"); \
128 return(0); \
129 } \
130 } \
131 ctxt->name##Tab[ctxt->name##Nr] = value; \
132 ctxt->name = value; \
133 return(ctxt->name##Nr++); \
134} \
135scope type name##Pop(xmlParserCtxtPtr ctxt) { \
136 type ret; \
137 if (ctxt->name##Nr <= 0) return(0); \
138 ctxt->name##Nr--; \
139 if (ctxt->name##Nr > 0) \
140 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
141 else \
142 ctxt->name = NULL; \
143 ret = ctxt->name##Tab[ctxt->name##Nr]; \
144 ctxt->name##Tab[ctxt->name##Nr] = 0; \
145 return(ret); \
146} \
147
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000148/**
149 * inputPop:
150 * @ctxt: an XML parser context
151 *
152 * Pops the top parser input from the input stack
153 *
154 * Returns the input just removed
155 */
156/**
157 * inputPush:
158 * @ctxt: an XML parser context
159 * @input: the parser input
160 *
161 * Pushes a new parser input on top of the input stack
162 */
163/**
164 * namePop:
165 * @ctxt: an XML parser context
166 *
167 * Pops the top element name from the name stack
168 *
169 * Returns the name just removed
170 */
171/**
172 * namePush:
173 * @ctxt: an XML parser context
174 * @name: the element name
175 *
176 * Pushes a new element name on top of the name stack
177 */
178/**
179 * nodePop:
180 * @ctxt: an XML parser context
181 *
182 * Pops the top element node from the node stack
183 *
184 * Returns the node just removed
185 */
186/**
187 * nodePush:
188 * @ctxt: an XML parser context
189 * @node: the element node
190 *
191 * Pushes a new element node on top of the node stack
192 */
Owen Taylor3473f882001-02-23 17:55:21 +0000193/*
194 * Those macros actually generate the functions
195 */
196PUSH_AND_POP(extern, xmlParserInputPtr, input)
197PUSH_AND_POP(extern, xmlNodePtr, node)
198PUSH_AND_POP(extern, xmlChar*, name)
199
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000200static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000201 if (ctxt->spaceNr >= ctxt->spaceMax) {
202 ctxt->spaceMax *= 2;
203 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
204 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
205 if (ctxt->spaceTab == NULL) {
206 xmlGenericError(xmlGenericErrorContext,
207 "realloc failed !\n");
208 return(0);
209 }
210 }
211 ctxt->spaceTab[ctxt->spaceNr] = val;
212 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
213 return(ctxt->spaceNr++);
214}
215
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000216static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000217 int ret;
218 if (ctxt->spaceNr <= 0) return(0);
219 ctxt->spaceNr--;
220 if (ctxt->spaceNr > 0)
221 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
222 else
223 ctxt->space = NULL;
224 ret = ctxt->spaceTab[ctxt->spaceNr];
225 ctxt->spaceTab[ctxt->spaceNr] = -1;
226 return(ret);
227}
228
229/*
230 * Macros for accessing the content. Those should be used only by the parser,
231 * and not exported.
232 *
233 * Dirty macros, i.e. one often need to make assumption on the context to
234 * use them
235 *
236 * CUR_PTR return the current pointer to the xmlChar to be parsed.
237 * To be used with extreme caution since operations consuming
238 * characters may move the input buffer to a different location !
239 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
240 * This should be used internally by the parser
241 * only to compare to ASCII values otherwise it would break when
242 * running with UTF-8 encoding.
243 * RAW same as CUR but in the input buffer, bypass any token
244 * extraction that may have been done
245 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
246 * to compare on ASCII based substring.
247 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
248 * strings within the parser.
249 *
250 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
251 *
252 * NEXT Skip to the next character, this does the proper decoding
253 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000254 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000255 * CUR_CHAR(l) returns the current unicode character (int), set l
256 * to the number of xmlChars used for the encoding [0-5].
257 * CUR_SCHAR same but operate on a string instead of the context
258 * COPY_BUF copy the current unicode char to the target buffer, increment
259 * the index
260 * GROW, SHRINK handling of input buffers
261 */
262
263#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
264#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
265#define NXT(val) ctxt->input->cur[(val)]
266#define CUR_PTR ctxt->input->cur
267
268#define SKIP(val) do { \
269 ctxt->nbChars += (val),ctxt->input->cur += (val); \
270 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000271 if ((*ctxt->input->cur == 0) && \
272 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
273 xmlPopInput(ctxt); \
274 } while (0)
275
Daniel Veillard48b2f892001-02-25 16:11:03 +0000276#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000277 xmlParserInputShrink(ctxt->input); \
278 if ((*ctxt->input->cur == 0) && \
279 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
280 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000281 }
Owen Taylor3473f882001-02-23 17:55:21 +0000282
Daniel Veillard48b2f892001-02-25 16:11:03 +0000283#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000284 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
285 if ((*ctxt->input->cur == 0) && \
286 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
287 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000288 }
Owen Taylor3473f882001-02-23 17:55:21 +0000289
290#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
291
292#define NEXT xmlNextChar(ctxt)
293
Daniel Veillard21a0f912001-02-25 19:54:14 +0000294#define NEXT1 { \
295 ctxt->input->cur++; \
296 ctxt->nbChars++; \
297 if (*ctxt->input->cur == 0) \
298 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
299 }
300
Owen Taylor3473f882001-02-23 17:55:21 +0000301#define NEXTL(l) do { \
302 if (*(ctxt->input->cur) == '\n') { \
303 ctxt->input->line++; ctxt->input->col = 1; \
304 } else ctxt->input->col++; \
305 ctxt->token = 0; ctxt->input->cur += l; \
306 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000307 } while (0)
308
309#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
310#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
311
312#define COPY_BUF(l,b,i,v) \
313 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000314 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000315
316/**
317 * xmlSkipBlankChars:
318 * @ctxt: the XML parser context
319 *
320 * skip all blanks character found at that point in the input streams.
321 * It pops up finished entities in the process if allowable at that point.
322 *
323 * Returns the number of space chars skipped
324 */
325
326int
327xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000328 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000329
Daniel Veillard02141ea2001-04-30 11:46:40 +0000330 if (ctxt->token != 0) {
331 if (!IS_BLANK(ctxt->token))
332 return(0);
333 ctxt->token = 0;
334 res++;
335 }
Owen Taylor3473f882001-02-23 17:55:21 +0000336 /*
337 * It's Okay to use CUR/NEXT here since all the blanks are on
338 * the ASCII range.
339 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000340 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
341 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000342 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000343 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000344 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000345 cur = ctxt->input->cur;
346 while (IS_BLANK(*cur)) {
347 if (*cur == '\n') {
348 ctxt->input->line++; ctxt->input->col = 1;
349 }
350 cur++;
351 res++;
352 if (*cur == 0) {
353 ctxt->input->cur = cur;
354 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
355 cur = ctxt->input->cur;
356 }
357 }
358 ctxt->input->cur = cur;
359 } else {
360 int cur;
361 do {
362 cur = CUR;
363 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
364 NEXT;
365 cur = CUR;
366 res++;
367 }
368 while ((cur == 0) && (ctxt->inputNr > 1) &&
369 (ctxt->instate != XML_PARSER_COMMENT)) {
370 xmlPopInput(ctxt);
371 cur = CUR;
372 }
373 /*
374 * Need to handle support of entities branching here
375 */
376 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
377 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
378 }
Owen Taylor3473f882001-02-23 17:55:21 +0000379 return(res);
380}
381
382/************************************************************************
383 * *
384 * Commodity functions to handle entities *
385 * *
386 ************************************************************************/
387
388/**
389 * xmlPopInput:
390 * @ctxt: an XML parser context
391 *
392 * xmlPopInput: the current input pointed by ctxt->input came to an end
393 * pop it and return the next char.
394 *
395 * Returns the current xmlChar in the parser context
396 */
397xmlChar
398xmlPopInput(xmlParserCtxtPtr ctxt) {
399 if (ctxt->inputNr == 1) return(0); /* End of main Input */
400 if (xmlParserDebugEntities)
401 xmlGenericError(xmlGenericErrorContext,
402 "Popping input %d\n", ctxt->inputNr);
403 xmlFreeInputStream(inputPop(ctxt));
404 if ((*ctxt->input->cur == 0) &&
405 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
406 return(xmlPopInput(ctxt));
407 return(CUR);
408}
409
410/**
411 * xmlPushInput:
412 * @ctxt: an XML parser context
413 * @input: an XML parser input fragment (entity, XML fragment ...).
414 *
415 * xmlPushInput: switch to a new input stream which is stacked on top
416 * of the previous one(s).
417 */
418void
419xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
420 if (input == NULL) return;
421
422 if (xmlParserDebugEntities) {
423 if ((ctxt->input != NULL) && (ctxt->input->filename))
424 xmlGenericError(xmlGenericErrorContext,
425 "%s(%d): ", ctxt->input->filename,
426 ctxt->input->line);
427 xmlGenericError(xmlGenericErrorContext,
428 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
429 }
430 inputPush(ctxt, input);
431 GROW;
432}
433
434/**
435 * xmlParseCharRef:
436 * @ctxt: an XML parser context
437 *
438 * parse Reference declarations
439 *
440 * [66] CharRef ::= '&#' [0-9]+ ';' |
441 * '&#x' [0-9a-fA-F]+ ';'
442 *
443 * [ WFC: Legal Character ]
444 * Characters referred to using character references must match the
445 * production for Char.
446 *
447 * Returns the value parsed (as an int), 0 in case of error
448 */
449int
450xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000451 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000452 int count = 0;
453
454 if (ctxt->token != 0) {
455 val = ctxt->token;
456 ctxt->token = 0;
457 return(val);
458 }
459 /*
460 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
461 */
462 if ((RAW == '&') && (NXT(1) == '#') &&
463 (NXT(2) == 'x')) {
464 SKIP(3);
465 GROW;
466 while (RAW != ';') { /* loop blocked by count */
467 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
468 val = val * 16 + (CUR - '0');
469 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
470 val = val * 16 + (CUR - 'a') + 10;
471 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
472 val = val * 16 + (CUR - 'A') + 10;
473 else {
474 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
476 ctxt->sax->error(ctxt->userData,
477 "xmlParseCharRef: invalid hexadecimal value\n");
478 ctxt->wellFormed = 0;
479 ctxt->disableSAX = 1;
480 val = 0;
481 break;
482 }
483 NEXT;
484 count++;
485 }
486 if (RAW == ';') {
487 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
488 ctxt->nbChars ++;
489 ctxt->input->cur++;
490 }
491 } else if ((RAW == '&') && (NXT(1) == '#')) {
492 SKIP(2);
493 GROW;
494 while (RAW != ';') { /* loop blocked by count */
495 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
496 val = val * 10 + (CUR - '0');
497 else {
498 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
500 ctxt->sax->error(ctxt->userData,
501 "xmlParseCharRef: invalid decimal value\n");
502 ctxt->wellFormed = 0;
503 ctxt->disableSAX = 1;
504 val = 0;
505 break;
506 }
507 NEXT;
508 count++;
509 }
510 if (RAW == ';') {
511 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
512 ctxt->nbChars ++;
513 ctxt->input->cur++;
514 }
515 } else {
516 ctxt->errNo = XML_ERR_INVALID_CHARREF;
517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
518 ctxt->sax->error(ctxt->userData,
519 "xmlParseCharRef: invalid value\n");
520 ctxt->wellFormed = 0;
521 ctxt->disableSAX = 1;
522 }
523
524 /*
525 * [ WFC: Legal Character ]
526 * Characters referred to using character references must match the
527 * production for Char.
528 */
529 if (IS_CHAR(val)) {
530 return(val);
531 } else {
532 ctxt->errNo = XML_ERR_INVALID_CHAR;
533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000534 ctxt->sax->error(ctxt->userData,
535 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000536 val);
537 ctxt->wellFormed = 0;
538 ctxt->disableSAX = 1;
539 }
540 return(0);
541}
542
543/**
544 * xmlParseStringCharRef:
545 * @ctxt: an XML parser context
546 * @str: a pointer to an index in the string
547 *
548 * parse Reference declarations, variant parsing from a string rather
549 * than an an input flow.
550 *
551 * [66] CharRef ::= '&#' [0-9]+ ';' |
552 * '&#x' [0-9a-fA-F]+ ';'
553 *
554 * [ WFC: Legal Character ]
555 * Characters referred to using character references must match the
556 * production for Char.
557 *
558 * Returns the value parsed (as an int), 0 in case of error, str will be
559 * updated to the current value of the index
560 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000561static int
Owen Taylor3473f882001-02-23 17:55:21 +0000562xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
563 const xmlChar *ptr;
564 xmlChar cur;
565 int val = 0;
566
567 if ((str == NULL) || (*str == NULL)) return(0);
568 ptr = *str;
569 cur = *ptr;
570 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
571 ptr += 3;
572 cur = *ptr;
573 while (cur != ';') { /* Non input consuming loop */
574 if ((cur >= '0') && (cur <= '9'))
575 val = val * 16 + (cur - '0');
576 else if ((cur >= 'a') && (cur <= 'f'))
577 val = val * 16 + (cur - 'a') + 10;
578 else if ((cur >= 'A') && (cur <= 'F'))
579 val = val * 16 + (cur - 'A') + 10;
580 else {
581 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
582 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
583 ctxt->sax->error(ctxt->userData,
584 "xmlParseStringCharRef: invalid hexadecimal value\n");
585 ctxt->wellFormed = 0;
586 ctxt->disableSAX = 1;
587 val = 0;
588 break;
589 }
590 ptr++;
591 cur = *ptr;
592 }
593 if (cur == ';')
594 ptr++;
595 } else if ((cur == '&') && (ptr[1] == '#')){
596 ptr += 2;
597 cur = *ptr;
598 while (cur != ';') { /* Non input consuming loops */
599 if ((cur >= '0') && (cur <= '9'))
600 val = val * 10 + (cur - '0');
601 else {
602 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
604 ctxt->sax->error(ctxt->userData,
605 "xmlParseStringCharRef: invalid decimal value\n");
606 ctxt->wellFormed = 0;
607 ctxt->disableSAX = 1;
608 val = 0;
609 break;
610 }
611 ptr++;
612 cur = *ptr;
613 }
614 if (cur == ';')
615 ptr++;
616 } else {
617 ctxt->errNo = XML_ERR_INVALID_CHARREF;
618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
619 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000620 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000621 ctxt->wellFormed = 0;
622 ctxt->disableSAX = 1;
623 return(0);
624 }
625 *str = ptr;
626
627 /*
628 * [ WFC: Legal Character ]
629 * Characters referred to using character references must match the
630 * production for Char.
631 */
632 if (IS_CHAR(val)) {
633 return(val);
634 } else {
635 ctxt->errNo = XML_ERR_INVALID_CHAR;
636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
637 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000638 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000639 ctxt->wellFormed = 0;
640 ctxt->disableSAX = 1;
641 }
642 return(0);
643}
644
645/**
646 * xmlParserHandlePEReference:
647 * @ctxt: the parser context
648 *
649 * [69] PEReference ::= '%' Name ';'
650 *
651 * [ WFC: No Recursion ]
652 * A parsed entity must not contain a recursive
653 * reference to itself, either directly or indirectly.
654 *
655 * [ WFC: Entity Declared ]
656 * In a document without any DTD, a document with only an internal DTD
657 * subset which contains no parameter entity references, or a document
658 * with "standalone='yes'", ... ... The declaration of a parameter
659 * entity must precede any reference to it...
660 *
661 * [ VC: Entity Declared ]
662 * In a document with an external subset or external parameter entities
663 * with "standalone='no'", ... ... The declaration of a parameter entity
664 * must precede any reference to it...
665 *
666 * [ WFC: In DTD ]
667 * Parameter-entity references may only appear in the DTD.
668 * NOTE: misleading but this is handled.
669 *
670 * A PEReference may have been detected in the current input stream
671 * the handling is done accordingly to
672 * http://www.w3.org/TR/REC-xml#entproc
673 * i.e.
674 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000675 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000676 */
677void
678xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
679 xmlChar *name;
680 xmlEntityPtr entity = NULL;
681 xmlParserInputPtr input;
682
683 if (ctxt->token != 0) {
684 return;
685 }
686 if (RAW != '%') return;
687 switch(ctxt->instate) {
688 case XML_PARSER_CDATA_SECTION:
689 return;
690 case XML_PARSER_COMMENT:
691 return;
692 case XML_PARSER_START_TAG:
693 return;
694 case XML_PARSER_END_TAG:
695 return;
696 case XML_PARSER_EOF:
697 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
698 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
699 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
700 ctxt->wellFormed = 0;
701 ctxt->disableSAX = 1;
702 return;
703 case XML_PARSER_PROLOG:
704 case XML_PARSER_START:
705 case XML_PARSER_MISC:
706 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
708 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
709 ctxt->wellFormed = 0;
710 ctxt->disableSAX = 1;
711 return;
712 case XML_PARSER_ENTITY_DECL:
713 case XML_PARSER_CONTENT:
714 case XML_PARSER_ATTRIBUTE_VALUE:
715 case XML_PARSER_PI:
716 case XML_PARSER_SYSTEM_LITERAL:
717 /* we just ignore it there */
718 return;
719 case XML_PARSER_EPILOG:
720 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
722 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
723 ctxt->wellFormed = 0;
724 ctxt->disableSAX = 1;
725 return;
726 case XML_PARSER_ENTITY_VALUE:
727 /*
728 * NOTE: in the case of entity values, we don't do the
729 * substitution here since we need the literal
730 * entity value to be able to save the internal
731 * subset of the document.
732 * This will be handled by xmlStringDecodeEntities
733 */
734 return;
735 case XML_PARSER_DTD:
736 /*
737 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
738 * In the internal DTD subset, parameter-entity references
739 * can occur only where markup declarations can occur, not
740 * within markup declarations.
741 * In that case this is handled in xmlParseMarkupDecl
742 */
743 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
744 return;
745 break;
746 case XML_PARSER_IGNORE:
747 return;
748 }
749
750 NEXT;
751 name = xmlParseName(ctxt);
752 if (xmlParserDebugEntities)
753 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000754 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000755 if (name == NULL) {
756 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000758 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000759 ctxt->wellFormed = 0;
760 ctxt->disableSAX = 1;
761 } else {
762 if (RAW == ';') {
763 NEXT;
764 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
765 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
766 if (entity == NULL) {
767
768 /*
769 * [ WFC: Entity Declared ]
770 * In a document without any DTD, a document with only an
771 * internal DTD subset which contains no parameter entity
772 * references, or a document with "standalone='yes'", ...
773 * ... The declaration of a parameter entity must precede
774 * any reference to it...
775 */
776 if ((ctxt->standalone == 1) ||
777 ((ctxt->hasExternalSubset == 0) &&
778 (ctxt->hasPErefs == 0))) {
779 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
780 ctxt->sax->error(ctxt->userData,
781 "PEReference: %%%s; not found\n", name);
782 ctxt->wellFormed = 0;
783 ctxt->disableSAX = 1;
784 } else {
785 /*
786 * [ VC: Entity Declared ]
787 * In a document with an external subset or external
788 * parameter entities with "standalone='no'", ...
789 * ... The declaration of a parameter entity must precede
790 * any reference to it...
791 */
792 if ((!ctxt->disableSAX) &&
793 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
794 ctxt->vctxt.error(ctxt->vctxt.userData,
795 "PEReference: %%%s; not found\n", name);
796 } else if ((!ctxt->disableSAX) &&
797 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
798 ctxt->sax->warning(ctxt->userData,
799 "PEReference: %%%s; not found\n", name);
800 ctxt->valid = 0;
801 }
802 } else {
803 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
804 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000805 xmlChar start[4];
806 xmlCharEncoding enc;
807
Owen Taylor3473f882001-02-23 17:55:21 +0000808 /*
809 * handle the extra spaces added before and after
810 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000811 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000812 */
813 input = xmlNewEntityInputStream(ctxt, entity);
814 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000815
816 /*
817 * Get the 4 first bytes and decode the charset
818 * if enc != XML_CHAR_ENCODING_NONE
819 * plug some encoding conversion routines.
820 */
821 GROW
822 start[0] = RAW;
823 start[1] = NXT(1);
824 start[2] = NXT(2);
825 start[3] = NXT(3);
826 enc = xmlDetectCharEncoding(start, 4);
827 if (enc != XML_CHAR_ENCODING_NONE) {
828 xmlSwitchEncoding(ctxt, enc);
829 }
830
Owen Taylor3473f882001-02-23 17:55:21 +0000831 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
832 (RAW == '<') && (NXT(1) == '?') &&
833 (NXT(2) == 'x') && (NXT(3) == 'm') &&
834 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
835 xmlParseTextDecl(ctxt);
836 }
837 if (ctxt->token == 0)
838 ctxt->token = ' ';
839 } else {
840 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
841 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000842 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000843 name);
844 ctxt->wellFormed = 0;
845 ctxt->disableSAX = 1;
846 }
847 }
848 } else {
849 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
851 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000852 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000853 ctxt->wellFormed = 0;
854 ctxt->disableSAX = 1;
855 }
856 xmlFree(name);
857 }
858}
859
860/*
861 * Macro used to grow the current buffer.
862 */
863#define growBuffer(buffer) { \
864 buffer##_size *= 2; \
865 buffer = (xmlChar *) \
866 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
867 if (buffer == NULL) { \
868 perror("realloc failed"); \
869 return(NULL); \
870 } \
871}
872
873/**
874 * xmlStringDecodeEntities:
875 * @ctxt: the parser context
876 * @str: the input string
877 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
878 * @end: an end marker xmlChar, 0 if none
879 * @end2: an end marker xmlChar, 0 if none
880 * @end3: an end marker xmlChar, 0 if none
881 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000882 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000883 *
884 * [67] Reference ::= EntityRef | CharRef
885 *
886 * [69] PEReference ::= '%' Name ';'
887 *
888 * Returns A newly allocated string with the substitution done. The caller
889 * must deallocate it !
890 */
891xmlChar *
892xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
893 xmlChar end, xmlChar end2, xmlChar end3) {
894 xmlChar *buffer = NULL;
895 int buffer_size = 0;
896
897 xmlChar *current = NULL;
898 xmlEntityPtr ent;
899 int c,l;
900 int nbchars = 0;
901
902 if (str == NULL)
903 return(NULL);
904
905 if (ctxt->depth > 40) {
906 ctxt->errNo = XML_ERR_ENTITY_LOOP;
907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
908 ctxt->sax->error(ctxt->userData,
909 "Detected entity reference loop\n");
910 ctxt->wellFormed = 0;
911 ctxt->disableSAX = 1;
912 return(NULL);
913 }
914
915 /*
916 * allocate a translation buffer.
917 */
918 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
919 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
920 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000921 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000922 return(NULL);
923 }
924
925 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000926 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000927 * we are operating on already parsed values.
928 */
929 c = CUR_SCHAR(str, l);
930 while ((c != 0) && (c != end) && /* non input consuming loop */
931 (c != end2) && (c != end3)) {
932
933 if (c == 0) break;
934 if ((c == '&') && (str[1] == '#')) {
935 int val = xmlParseStringCharRef(ctxt, &str);
936 if (val != 0) {
937 COPY_BUF(0,buffer,nbchars,val);
938 }
939 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
940 if (xmlParserDebugEntities)
941 xmlGenericError(xmlGenericErrorContext,
942 "String decoding Entity Reference: %.30s\n",
943 str);
944 ent = xmlParseStringEntityRef(ctxt, &str);
945 if ((ent != NULL) &&
946 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
947 if (ent->content != NULL) {
948 COPY_BUF(0,buffer,nbchars,ent->content[0]);
949 } else {
950 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
951 ctxt->sax->error(ctxt->userData,
952 "internal error entity has no content\n");
953 }
954 } else if ((ent != NULL) && (ent->content != NULL)) {
955 xmlChar *rep;
956
957 ctxt->depth++;
958 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
959 0, 0, 0);
960 ctxt->depth--;
961 if (rep != NULL) {
962 current = rep;
963 while (*current != 0) { /* non input consuming loop */
964 buffer[nbchars++] = *current++;
965 if (nbchars >
966 buffer_size - XML_PARSER_BUFFER_SIZE) {
967 growBuffer(buffer);
968 }
969 }
970 xmlFree(rep);
971 }
972 } else if (ent != NULL) {
973 int i = xmlStrlen(ent->name);
974 const xmlChar *cur = ent->name;
975
976 buffer[nbchars++] = '&';
977 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
978 growBuffer(buffer);
979 }
980 for (;i > 0;i--)
981 buffer[nbchars++] = *cur++;
982 buffer[nbchars++] = ';';
983 }
984 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
985 if (xmlParserDebugEntities)
986 xmlGenericError(xmlGenericErrorContext,
987 "String decoding PE Reference: %.30s\n", str);
988 ent = xmlParseStringPEReference(ctxt, &str);
989 if (ent != NULL) {
990 xmlChar *rep;
991
992 ctxt->depth++;
993 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
994 0, 0, 0);
995 ctxt->depth--;
996 if (rep != NULL) {
997 current = rep;
998 while (*current != 0) { /* non input consuming loop */
999 buffer[nbchars++] = *current++;
1000 if (nbchars >
1001 buffer_size - XML_PARSER_BUFFER_SIZE) {
1002 growBuffer(buffer);
1003 }
1004 }
1005 xmlFree(rep);
1006 }
1007 }
1008 } else {
1009 COPY_BUF(l,buffer,nbchars,c);
1010 str += l;
1011 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1012 growBuffer(buffer);
1013 }
1014 }
1015 c = CUR_SCHAR(str, l);
1016 }
1017 buffer[nbchars++] = 0;
1018 return(buffer);
1019}
1020
1021
1022/************************************************************************
1023 * *
1024 * Commodity functions to handle xmlChars *
1025 * *
1026 ************************************************************************/
1027
1028/**
1029 * xmlStrndup:
1030 * @cur: the input xmlChar *
1031 * @len: the len of @cur
1032 *
1033 * a strndup for array of xmlChar's
1034 *
1035 * Returns a new xmlChar * or NULL
1036 */
1037xmlChar *
1038xmlStrndup(const xmlChar *cur, int len) {
1039 xmlChar *ret;
1040
1041 if ((cur == NULL) || (len < 0)) return(NULL);
1042 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1043 if (ret == NULL) {
1044 xmlGenericError(xmlGenericErrorContext,
1045 "malloc of %ld byte failed\n",
1046 (len + 1) * (long)sizeof(xmlChar));
1047 return(NULL);
1048 }
1049 memcpy(ret, cur, len * sizeof(xmlChar));
1050 ret[len] = 0;
1051 return(ret);
1052}
1053
1054/**
1055 * xmlStrdup:
1056 * @cur: the input xmlChar *
1057 *
1058 * a strdup for array of xmlChar's. Since they are supposed to be
1059 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1060 * a termination mark of '0'.
1061 *
1062 * Returns a new xmlChar * or NULL
1063 */
1064xmlChar *
1065xmlStrdup(const xmlChar *cur) {
1066 const xmlChar *p = cur;
1067
1068 if (cur == NULL) return(NULL);
1069 while (*p != 0) p++; /* non input consuming */
1070 return(xmlStrndup(cur, p - cur));
1071}
1072
1073/**
1074 * xmlCharStrndup:
1075 * @cur: the input char *
1076 * @len: the len of @cur
1077 *
1078 * a strndup for char's to xmlChar's
1079 *
1080 * Returns a new xmlChar * or NULL
1081 */
1082
1083xmlChar *
1084xmlCharStrndup(const char *cur, int len) {
1085 int i;
1086 xmlChar *ret;
1087
1088 if ((cur == NULL) || (len < 0)) return(NULL);
1089 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1090 if (ret == NULL) {
1091 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1092 (len + 1) * (long)sizeof(xmlChar));
1093 return(NULL);
1094 }
1095 for (i = 0;i < len;i++)
1096 ret[i] = (xmlChar) cur[i];
1097 ret[len] = 0;
1098 return(ret);
1099}
1100
1101/**
1102 * xmlCharStrdup:
1103 * @cur: the input char *
1104 * @len: the len of @cur
1105 *
1106 * a strdup for char's to xmlChar's
1107 *
1108 * Returns a new xmlChar * or NULL
1109 */
1110
1111xmlChar *
1112xmlCharStrdup(const char *cur) {
1113 const char *p = cur;
1114
1115 if (cur == NULL) return(NULL);
1116 while (*p != '\0') p++; /* non input consuming */
1117 return(xmlCharStrndup(cur, p - cur));
1118}
1119
1120/**
1121 * xmlStrcmp:
1122 * @str1: the first xmlChar *
1123 * @str2: the second xmlChar *
1124 *
1125 * a strcmp for xmlChar's
1126 *
1127 * Returns the integer result of the comparison
1128 */
1129
1130int
1131xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1132 register int tmp;
1133
1134 if (str1 == str2) return(0);
1135 if (str1 == NULL) return(-1);
1136 if (str2 == NULL) return(1);
1137 do {
1138 tmp = *str1++ - *str2;
1139 if (tmp != 0) return(tmp);
1140 } while (*str2++ != 0);
1141 return 0;
1142}
1143
1144/**
1145 * xmlStrEqual:
1146 * @str1: the first xmlChar *
1147 * @str2: the second xmlChar *
1148 *
1149 * Check if both string are equal of have same content
1150 * Should be a bit more readable and faster than xmlStrEqual()
1151 *
1152 * Returns 1 if they are equal, 0 if they are different
1153 */
1154
1155int
1156xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1157 if (str1 == str2) return(1);
1158 if (str1 == NULL) return(0);
1159 if (str2 == NULL) return(0);
1160 do {
1161 if (*str1++ != *str2) return(0);
1162 } while (*str2++);
1163 return(1);
1164}
1165
1166/**
1167 * xmlStrncmp:
1168 * @str1: the first xmlChar *
1169 * @str2: the second xmlChar *
1170 * @len: the max comparison length
1171 *
1172 * a strncmp for xmlChar's
1173 *
1174 * Returns the integer result of the comparison
1175 */
1176
1177int
1178xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1179 register int tmp;
1180
1181 if (len <= 0) return(0);
1182 if (str1 == str2) return(0);
1183 if (str1 == NULL) return(-1);
1184 if (str2 == NULL) return(1);
1185 do {
1186 tmp = *str1++ - *str2;
1187 if (tmp != 0 || --len == 0) return(tmp);
1188 } while (*str2++ != 0);
1189 return 0;
1190}
1191
Daniel Veillardb44025c2001-10-11 22:55:55 +00001192static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001193 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1194 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1195 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1196 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1197 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1198 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1199 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1200 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1201 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1202 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1203 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1204 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1205 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1206 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1207 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1208 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1209 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1210 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1211 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1212 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1213 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1214 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1215 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1216 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1217 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1218 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1219 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1220 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1221 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1222 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1223 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1224 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1225};
1226
1227/**
1228 * xmlStrcasecmp:
1229 * @str1: the first xmlChar *
1230 * @str2: the second xmlChar *
1231 *
1232 * a strcasecmp for xmlChar's
1233 *
1234 * Returns the integer result of the comparison
1235 */
1236
1237int
1238xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1239 register int tmp;
1240
1241 if (str1 == str2) return(0);
1242 if (str1 == NULL) return(-1);
1243 if (str2 == NULL) return(1);
1244 do {
1245 tmp = casemap[*str1++] - casemap[*str2];
1246 if (tmp != 0) return(tmp);
1247 } while (*str2++ != 0);
1248 return 0;
1249}
1250
1251/**
1252 * xmlStrncasecmp:
1253 * @str1: the first xmlChar *
1254 * @str2: the second xmlChar *
1255 * @len: the max comparison length
1256 *
1257 * a strncasecmp for xmlChar's
1258 *
1259 * Returns the integer result of the comparison
1260 */
1261
1262int
1263xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1264 register int tmp;
1265
1266 if (len <= 0) return(0);
1267 if (str1 == str2) return(0);
1268 if (str1 == NULL) return(-1);
1269 if (str2 == NULL) return(1);
1270 do {
1271 tmp = casemap[*str1++] - casemap[*str2];
1272 if (tmp != 0 || --len == 0) return(tmp);
1273 } while (*str2++ != 0);
1274 return 0;
1275}
1276
1277/**
1278 * xmlStrchr:
1279 * @str: the xmlChar * array
1280 * @val: the xmlChar to search
1281 *
1282 * a strchr for xmlChar's
1283 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001284 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001285 */
1286
1287const xmlChar *
1288xmlStrchr(const xmlChar *str, xmlChar val) {
1289 if (str == NULL) return(NULL);
1290 while (*str != 0) { /* non input consuming */
1291 if (*str == val) return((xmlChar *) str);
1292 str++;
1293 }
1294 return(NULL);
1295}
1296
1297/**
1298 * xmlStrstr:
1299 * @str: the xmlChar * array (haystack)
1300 * @val: the xmlChar to search (needle)
1301 *
1302 * a strstr for xmlChar's
1303 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001304 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001305 */
1306
1307const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001308xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001309 int n;
1310
1311 if (str == NULL) return(NULL);
1312 if (val == NULL) return(NULL);
1313 n = xmlStrlen(val);
1314
1315 if (n == 0) return(str);
1316 while (*str != 0) { /* non input consuming */
1317 if (*str == *val) {
1318 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1319 }
1320 str++;
1321 }
1322 return(NULL);
1323}
1324
1325/**
1326 * xmlStrcasestr:
1327 * @str: the xmlChar * array (haystack)
1328 * @val: the xmlChar to search (needle)
1329 *
1330 * a case-ignoring strstr for xmlChar's
1331 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001332 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001333 */
1334
1335const xmlChar *
1336xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1337 int n;
1338
1339 if (str == NULL) return(NULL);
1340 if (val == NULL) return(NULL);
1341 n = xmlStrlen(val);
1342
1343 if (n == 0) return(str);
1344 while (*str != 0) { /* non input consuming */
1345 if (casemap[*str] == casemap[*val])
1346 if (!xmlStrncasecmp(str, val, n)) return(str);
1347 str++;
1348 }
1349 return(NULL);
1350}
1351
1352/**
1353 * xmlStrsub:
1354 * @str: the xmlChar * array (haystack)
1355 * @start: the index of the first char (zero based)
1356 * @len: the length of the substring
1357 *
1358 * Extract a substring of a given string
1359 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001360 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001361 */
1362
1363xmlChar *
1364xmlStrsub(const xmlChar *str, int start, int len) {
1365 int i;
1366
1367 if (str == NULL) return(NULL);
1368 if (start < 0) return(NULL);
1369 if (len < 0) return(NULL);
1370
1371 for (i = 0;i < start;i++) {
1372 if (*str == 0) return(NULL);
1373 str++;
1374 }
1375 if (*str == 0) return(NULL);
1376 return(xmlStrndup(str, len));
1377}
1378
1379/**
1380 * xmlStrlen:
1381 * @str: the xmlChar * array
1382 *
1383 * length of a xmlChar's string
1384 *
1385 * Returns the number of xmlChar contained in the ARRAY.
1386 */
1387
1388int
1389xmlStrlen(const xmlChar *str) {
1390 int len = 0;
1391
1392 if (str == NULL) return(0);
1393 while (*str != 0) { /* non input consuming */
1394 str++;
1395 len++;
1396 }
1397 return(len);
1398}
1399
1400/**
1401 * xmlStrncat:
1402 * @cur: the original xmlChar * array
1403 * @add: the xmlChar * array added
1404 * @len: the length of @add
1405 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001406 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001407 * first bytes of @add.
1408 *
1409 * Returns a new xmlChar *, the original @cur is reallocated if needed
1410 * and should not be freed
1411 */
1412
1413xmlChar *
1414xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1415 int size;
1416 xmlChar *ret;
1417
1418 if ((add == NULL) || (len == 0))
1419 return(cur);
1420 if (cur == NULL)
1421 return(xmlStrndup(add, len));
1422
1423 size = xmlStrlen(cur);
1424 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1425 if (ret == NULL) {
1426 xmlGenericError(xmlGenericErrorContext,
1427 "xmlStrncat: realloc of %ld byte failed\n",
1428 (size + len + 1) * (long)sizeof(xmlChar));
1429 return(cur);
1430 }
1431 memcpy(&ret[size], add, len * sizeof(xmlChar));
1432 ret[size + len] = 0;
1433 return(ret);
1434}
1435
1436/**
1437 * xmlStrcat:
1438 * @cur: the original xmlChar * array
1439 * @add: the xmlChar * array added
1440 *
1441 * a strcat for array of xmlChar's. Since they are supposed to be
1442 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1443 * a termination mark of '0'.
1444 *
1445 * Returns a new xmlChar * containing the concatenated string.
1446 */
1447xmlChar *
1448xmlStrcat(xmlChar *cur, const xmlChar *add) {
1449 const xmlChar *p = add;
1450
1451 if (add == NULL) return(cur);
1452 if (cur == NULL)
1453 return(xmlStrdup(add));
1454
1455 while (*p != 0) p++; /* non input consuming */
1456 return(xmlStrncat(cur, add, p - add));
1457}
1458
1459/************************************************************************
1460 * *
1461 * Commodity functions, cleanup needed ? *
1462 * *
1463 ************************************************************************/
1464
1465/**
1466 * areBlanks:
1467 * @ctxt: an XML parser context
1468 * @str: a xmlChar *
1469 * @len: the size of @str
1470 *
1471 * Is this a sequence of blank chars that one can ignore ?
1472 *
1473 * Returns 1 if ignorable 0 otherwise.
1474 */
1475
1476static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1477 int i, ret;
1478 xmlNodePtr lastChild;
1479
Daniel Veillard05c13a22001-09-09 08:38:09 +00001480 /*
1481 * Don't spend time trying to differentiate them, the same callback is
1482 * used !
1483 */
1484 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001485 return(0);
1486
Owen Taylor3473f882001-02-23 17:55:21 +00001487 /*
1488 * Check for xml:space value.
1489 */
1490 if (*(ctxt->space) == 1)
1491 return(0);
1492
1493 /*
1494 * Check that the string is made of blanks
1495 */
1496 for (i = 0;i < len;i++)
1497 if (!(IS_BLANK(str[i]))) return(0);
1498
1499 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001500 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001501 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001502 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001503 if (ctxt->myDoc != NULL) {
1504 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1505 if (ret == 0) return(1);
1506 if (ret == 1) return(0);
1507 }
1508
1509 /*
1510 * Otherwise, heuristic :-\
1511 */
Owen Taylor3473f882001-02-23 17:55:21 +00001512 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001513 if ((ctxt->node->children == NULL) &&
1514 (RAW == '<') && (NXT(1) == '/')) return(0);
1515
1516 lastChild = xmlGetLastChild(ctxt->node);
1517 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001518 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1519 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001520 } else if (xmlNodeIsText(lastChild))
1521 return(0);
1522 else if ((ctxt->node->children != NULL) &&
1523 (xmlNodeIsText(ctxt->node->children)))
1524 return(0);
1525 return(1);
1526}
1527
1528/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001529 * Forward definition for recursive behavior.
Owen Taylor3473f882001-02-23 17:55:21 +00001530 */
1531void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1532void xmlParseReference(xmlParserCtxtPtr ctxt);
1533
1534/************************************************************************
1535 * *
1536 * Extra stuff for namespace support *
1537 * Relates to http://www.w3.org/TR/WD-xml-names *
1538 * *
1539 ************************************************************************/
1540
1541/**
1542 * xmlSplitQName:
1543 * @ctxt: an XML parser context
1544 * @name: an XML parser context
1545 * @prefix: a xmlChar **
1546 *
1547 * parse an UTF8 encoded XML qualified name string
1548 *
1549 * [NS 5] QName ::= (Prefix ':')? LocalPart
1550 *
1551 * [NS 6] Prefix ::= NCName
1552 *
1553 * [NS 7] LocalPart ::= NCName
1554 *
1555 * Returns the local part, and prefix is updated
1556 * to get the Prefix if any.
1557 */
1558
1559xmlChar *
1560xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1561 xmlChar buf[XML_MAX_NAMELEN + 5];
1562 xmlChar *buffer = NULL;
1563 int len = 0;
1564 int max = XML_MAX_NAMELEN;
1565 xmlChar *ret = NULL;
1566 const xmlChar *cur = name;
1567 int c;
1568
1569 *prefix = NULL;
1570
1571#ifndef XML_XML_NAMESPACE
1572 /* xml: prefix is not really a namespace */
1573 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1574 (cur[2] == 'l') && (cur[3] == ':'))
1575 return(xmlStrdup(name));
1576#endif
1577
1578 /* nasty but valid */
1579 if (cur[0] == ':')
1580 return(xmlStrdup(name));
1581
1582 c = *cur++;
1583 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1584 buf[len++] = c;
1585 c = *cur++;
1586 }
1587 if (len >= max) {
1588 /*
1589 * Okay someone managed to make a huge name, so he's ready to pay
1590 * for the processing speed.
1591 */
1592 max = len * 2;
1593
1594 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1595 if (buffer == NULL) {
1596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1597 ctxt->sax->error(ctxt->userData,
1598 "xmlSplitQName: out of memory\n");
1599 return(NULL);
1600 }
1601 memcpy(buffer, buf, len);
1602 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1603 if (len + 10 > max) {
1604 max *= 2;
1605 buffer = (xmlChar *) xmlRealloc(buffer,
1606 max * sizeof(xmlChar));
1607 if (buffer == NULL) {
1608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1609 ctxt->sax->error(ctxt->userData,
1610 "xmlSplitQName: out of memory\n");
1611 return(NULL);
1612 }
1613 }
1614 buffer[len++] = c;
1615 c = *cur++;
1616 }
1617 buffer[len] = 0;
1618 }
1619
1620 if (buffer == NULL)
1621 ret = xmlStrndup(buf, len);
1622 else {
1623 ret = buffer;
1624 buffer = NULL;
1625 max = XML_MAX_NAMELEN;
1626 }
1627
1628
1629 if (c == ':') {
1630 c = *cur++;
1631 if (c == 0) return(ret);
1632 *prefix = ret;
1633 len = 0;
1634
1635 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1636 buf[len++] = c;
1637 c = *cur++;
1638 }
1639 if (len >= max) {
1640 /*
1641 * Okay someone managed to make a huge name, so he's ready to pay
1642 * for the processing speed.
1643 */
1644 max = len * 2;
1645
1646 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1647 if (buffer == NULL) {
1648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1649 ctxt->sax->error(ctxt->userData,
1650 "xmlSplitQName: out of memory\n");
1651 return(NULL);
1652 }
1653 memcpy(buffer, buf, len);
1654 while (c != 0) { /* tested bigname2.xml */
1655 if (len + 10 > max) {
1656 max *= 2;
1657 buffer = (xmlChar *) xmlRealloc(buffer,
1658 max * sizeof(xmlChar));
1659 if (buffer == NULL) {
1660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1661 ctxt->sax->error(ctxt->userData,
1662 "xmlSplitQName: out of memory\n");
1663 return(NULL);
1664 }
1665 }
1666 buffer[len++] = c;
1667 c = *cur++;
1668 }
1669 buffer[len] = 0;
1670 }
1671
1672 if (buffer == NULL)
1673 ret = xmlStrndup(buf, len);
1674 else {
1675 ret = buffer;
1676 }
1677 }
1678
1679 return(ret);
1680}
1681
1682/************************************************************************
1683 * *
1684 * The parser itself *
1685 * Relates to http://www.w3.org/TR/REC-xml *
1686 * *
1687 ************************************************************************/
1688
Daniel Veillard76d66f42001-05-16 21:05:17 +00001689static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001690/**
1691 * xmlParseName:
1692 * @ctxt: an XML parser context
1693 *
1694 * parse an XML name.
1695 *
1696 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1697 * CombiningChar | Extender
1698 *
1699 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1700 *
1701 * [6] Names ::= Name (S Name)*
1702 *
1703 * Returns the Name parsed or NULL
1704 */
1705
1706xmlChar *
1707xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001708 const xmlChar *in;
1709 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001710 int count = 0;
1711
1712 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001713
1714 /*
1715 * Accelerator for simple ASCII names
1716 */
1717 in = ctxt->input->cur;
1718 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1719 ((*in >= 0x41) && (*in <= 0x5A)) ||
1720 (*in == '_') || (*in == ':')) {
1721 in++;
1722 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1723 ((*in >= 0x41) && (*in <= 0x5A)) ||
1724 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001725 (*in == '_') || (*in == '-') ||
1726 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001727 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001728 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001729 count = in - ctxt->input->cur;
1730 ret = xmlStrndup(ctxt->input->cur, count);
1731 ctxt->input->cur = in;
1732 return(ret);
1733 }
1734 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001735 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001736}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001737
Daniel Veillard76d66f42001-05-16 21:05:17 +00001738static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001739xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1740 xmlChar buf[XML_MAX_NAMELEN + 5];
1741 int len = 0, l;
1742 int c;
1743 int count = 0;
1744
1745 /*
1746 * Handler for more complex cases
1747 */
1748 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001749 c = CUR_CHAR(l);
1750 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1751 (!IS_LETTER(c) && (c != '_') &&
1752 (c != ':'))) {
1753 return(NULL);
1754 }
1755
1756 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1757 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1758 (c == '.') || (c == '-') ||
1759 (c == '_') || (c == ':') ||
1760 (IS_COMBINING(c)) ||
1761 (IS_EXTENDER(c)))) {
1762 if (count++ > 100) {
1763 count = 0;
1764 GROW;
1765 }
1766 COPY_BUF(l,buf,len,c);
1767 NEXTL(l);
1768 c = CUR_CHAR(l);
1769 if (len >= XML_MAX_NAMELEN) {
1770 /*
1771 * Okay someone managed to make a huge name, so he's ready to pay
1772 * for the processing speed.
1773 */
1774 xmlChar *buffer;
1775 int max = len * 2;
1776
1777 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1778 if (buffer == NULL) {
1779 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1780 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001781 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001782 return(NULL);
1783 }
1784 memcpy(buffer, buf, len);
1785 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1786 (c == '.') || (c == '-') ||
1787 (c == '_') || (c == ':') ||
1788 (IS_COMBINING(c)) ||
1789 (IS_EXTENDER(c))) {
1790 if (count++ > 100) {
1791 count = 0;
1792 GROW;
1793 }
1794 if (len + 10 > max) {
1795 max *= 2;
1796 buffer = (xmlChar *) xmlRealloc(buffer,
1797 max * sizeof(xmlChar));
1798 if (buffer == NULL) {
1799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1800 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001801 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001802 return(NULL);
1803 }
1804 }
1805 COPY_BUF(l,buffer,len,c);
1806 NEXTL(l);
1807 c = CUR_CHAR(l);
1808 }
1809 buffer[len] = 0;
1810 return(buffer);
1811 }
1812 }
1813 return(xmlStrndup(buf, len));
1814}
1815
1816/**
1817 * xmlParseStringName:
1818 * @ctxt: an XML parser context
1819 * @str: a pointer to the string pointer (IN/OUT)
1820 *
1821 * parse an XML name.
1822 *
1823 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1824 * CombiningChar | Extender
1825 *
1826 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1827 *
1828 * [6] Names ::= Name (S Name)*
1829 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001830 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001831 * is updated to the current location in the string.
1832 */
1833
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001834static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001835xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1836 xmlChar buf[XML_MAX_NAMELEN + 5];
1837 const xmlChar *cur = *str;
1838 int len = 0, l;
1839 int c;
1840
1841 c = CUR_SCHAR(cur, l);
1842 if (!IS_LETTER(c) && (c != '_') &&
1843 (c != ':')) {
1844 return(NULL);
1845 }
1846
1847 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1848 (c == '.') || (c == '-') ||
1849 (c == '_') || (c == ':') ||
1850 (IS_COMBINING(c)) ||
1851 (IS_EXTENDER(c))) {
1852 COPY_BUF(l,buf,len,c);
1853 cur += l;
1854 c = CUR_SCHAR(cur, l);
1855 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1856 /*
1857 * Okay someone managed to make a huge name, so he's ready to pay
1858 * for the processing speed.
1859 */
1860 xmlChar *buffer;
1861 int max = len * 2;
1862
1863 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1864 if (buffer == NULL) {
1865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1866 ctxt->sax->error(ctxt->userData,
1867 "xmlParseStringName: out of memory\n");
1868 return(NULL);
1869 }
1870 memcpy(buffer, buf, len);
1871 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1872 (c == '.') || (c == '-') ||
1873 (c == '_') || (c == ':') ||
1874 (IS_COMBINING(c)) ||
1875 (IS_EXTENDER(c))) {
1876 if (len + 10 > max) {
1877 max *= 2;
1878 buffer = (xmlChar *) xmlRealloc(buffer,
1879 max * sizeof(xmlChar));
1880 if (buffer == NULL) {
1881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1882 ctxt->sax->error(ctxt->userData,
1883 "xmlParseStringName: out of memory\n");
1884 return(NULL);
1885 }
1886 }
1887 COPY_BUF(l,buffer,len,c);
1888 cur += l;
1889 c = CUR_SCHAR(cur, l);
1890 }
1891 buffer[len] = 0;
1892 *str = cur;
1893 return(buffer);
1894 }
1895 }
1896 *str = cur;
1897 return(xmlStrndup(buf, len));
1898}
1899
1900/**
1901 * xmlParseNmtoken:
1902 * @ctxt: an XML parser context
1903 *
1904 * parse an XML Nmtoken.
1905 *
1906 * [7] Nmtoken ::= (NameChar)+
1907 *
1908 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1909 *
1910 * Returns the Nmtoken parsed or NULL
1911 */
1912
1913xmlChar *
1914xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1915 xmlChar buf[XML_MAX_NAMELEN + 5];
1916 int len = 0, l;
1917 int c;
1918 int count = 0;
1919
1920 GROW;
1921 c = CUR_CHAR(l);
1922
1923 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1924 (c == '.') || (c == '-') ||
1925 (c == '_') || (c == ':') ||
1926 (IS_COMBINING(c)) ||
1927 (IS_EXTENDER(c))) {
1928 if (count++ > 100) {
1929 count = 0;
1930 GROW;
1931 }
1932 COPY_BUF(l,buf,len,c);
1933 NEXTL(l);
1934 c = CUR_CHAR(l);
1935 if (len >= XML_MAX_NAMELEN) {
1936 /*
1937 * Okay someone managed to make a huge token, so he's ready to pay
1938 * for the processing speed.
1939 */
1940 xmlChar *buffer;
1941 int max = len * 2;
1942
1943 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1944 if (buffer == NULL) {
1945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1946 ctxt->sax->error(ctxt->userData,
1947 "xmlParseNmtoken: out of memory\n");
1948 return(NULL);
1949 }
1950 memcpy(buffer, buf, len);
1951 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1952 (c == '.') || (c == '-') ||
1953 (c == '_') || (c == ':') ||
1954 (IS_COMBINING(c)) ||
1955 (IS_EXTENDER(c))) {
1956 if (count++ > 100) {
1957 count = 0;
1958 GROW;
1959 }
1960 if (len + 10 > max) {
1961 max *= 2;
1962 buffer = (xmlChar *) xmlRealloc(buffer,
1963 max * sizeof(xmlChar));
1964 if (buffer == NULL) {
1965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1966 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001967 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001968 return(NULL);
1969 }
1970 }
1971 COPY_BUF(l,buffer,len,c);
1972 NEXTL(l);
1973 c = CUR_CHAR(l);
1974 }
1975 buffer[len] = 0;
1976 return(buffer);
1977 }
1978 }
1979 if (len == 0)
1980 return(NULL);
1981 return(xmlStrndup(buf, len));
1982}
1983
1984/**
1985 * xmlParseEntityValue:
1986 * @ctxt: an XML parser context
1987 * @orig: if non-NULL store a copy of the original entity value
1988 *
1989 * parse a value for ENTITY declarations
1990 *
1991 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1992 * "'" ([^%&'] | PEReference | Reference)* "'"
1993 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001994 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00001995 */
1996
1997xmlChar *
1998xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1999 xmlChar *buf = NULL;
2000 int len = 0;
2001 int size = XML_PARSER_BUFFER_SIZE;
2002 int c, l;
2003 xmlChar stop;
2004 xmlChar *ret = NULL;
2005 const xmlChar *cur = NULL;
2006 xmlParserInputPtr input;
2007
2008 if (RAW == '"') stop = '"';
2009 else if (RAW == '\'') stop = '\'';
2010 else {
2011 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2013 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2014 ctxt->wellFormed = 0;
2015 ctxt->disableSAX = 1;
2016 return(NULL);
2017 }
2018 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2019 if (buf == NULL) {
2020 xmlGenericError(xmlGenericErrorContext,
2021 "malloc of %d byte failed\n", size);
2022 return(NULL);
2023 }
2024
2025 /*
2026 * The content of the entity definition is copied in a buffer.
2027 */
2028
2029 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2030 input = ctxt->input;
2031 GROW;
2032 NEXT;
2033 c = CUR_CHAR(l);
2034 /*
2035 * NOTE: 4.4.5 Included in Literal
2036 * When a parameter entity reference appears in a literal entity
2037 * value, ... a single or double quote character in the replacement
2038 * text is always treated as a normal data character and will not
2039 * terminate the literal.
2040 * In practice it means we stop the loop only when back at parsing
2041 * the initial entity and the quote is found
2042 */
2043 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2044 (ctxt->input != input))) {
2045 if (len + 5 >= size) {
2046 size *= 2;
2047 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2048 if (buf == NULL) {
2049 xmlGenericError(xmlGenericErrorContext,
2050 "realloc of %d byte failed\n", size);
2051 return(NULL);
2052 }
2053 }
2054 COPY_BUF(l,buf,len,c);
2055 NEXTL(l);
2056 /*
2057 * Pop-up of finished entities.
2058 */
2059 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2060 xmlPopInput(ctxt);
2061
2062 GROW;
2063 c = CUR_CHAR(l);
2064 if (c == 0) {
2065 GROW;
2066 c = CUR_CHAR(l);
2067 }
2068 }
2069 buf[len] = 0;
2070
2071 /*
2072 * Raise problem w.r.t. '&' and '%' being used in non-entities
2073 * reference constructs. Note Charref will be handled in
2074 * xmlStringDecodeEntities()
2075 */
2076 cur = buf;
2077 while (*cur != 0) { /* non input consuming */
2078 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2079 xmlChar *name;
2080 xmlChar tmp = *cur;
2081
2082 cur++;
2083 name = xmlParseStringName(ctxt, &cur);
2084 if ((name == NULL) || (*cur != ';')) {
2085 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2086 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2087 ctxt->sax->error(ctxt->userData,
2088 "EntityValue: '%c' forbidden except for entities references\n",
2089 tmp);
2090 ctxt->wellFormed = 0;
2091 ctxt->disableSAX = 1;
2092 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002093 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2094 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002095 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2097 ctxt->sax->error(ctxt->userData,
2098 "EntityValue: PEReferences forbidden in internal subset\n",
2099 tmp);
2100 ctxt->wellFormed = 0;
2101 ctxt->disableSAX = 1;
2102 }
2103 if (name != NULL)
2104 xmlFree(name);
2105 }
2106 cur++;
2107 }
2108
2109 /*
2110 * Then PEReference entities are substituted.
2111 */
2112 if (c != stop) {
2113 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2114 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2115 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2116 ctxt->wellFormed = 0;
2117 ctxt->disableSAX = 1;
2118 xmlFree(buf);
2119 } else {
2120 NEXT;
2121 /*
2122 * NOTE: 4.4.7 Bypassed
2123 * When a general entity reference appears in the EntityValue in
2124 * an entity declaration, it is bypassed and left as is.
2125 * so XML_SUBSTITUTE_REF is not set here.
2126 */
2127 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2128 0, 0, 0);
2129 if (orig != NULL)
2130 *orig = buf;
2131 else
2132 xmlFree(buf);
2133 }
2134
2135 return(ret);
2136}
2137
2138/**
2139 * xmlParseAttValue:
2140 * @ctxt: an XML parser context
2141 *
2142 * parse a value for an attribute
2143 * Note: the parser won't do substitution of entities here, this
2144 * will be handled later in xmlStringGetNodeList
2145 *
2146 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2147 * "'" ([^<&'] | Reference)* "'"
2148 *
2149 * 3.3.3 Attribute-Value Normalization:
2150 * Before the value of an attribute is passed to the application or
2151 * checked for validity, the XML processor must normalize it as follows:
2152 * - a character reference is processed by appending the referenced
2153 * character to the attribute value
2154 * - an entity reference is processed by recursively processing the
2155 * replacement text of the entity
2156 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2157 * appending #x20 to the normalized value, except that only a single
2158 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2159 * parsed entity or the literal entity value of an internal parsed entity
2160 * - other characters are processed by appending them to the normalized value
2161 * If the declared value is not CDATA, then the XML processor must further
2162 * process the normalized attribute value by discarding any leading and
2163 * trailing space (#x20) characters, and by replacing sequences of space
2164 * (#x20) characters by a single space (#x20) character.
2165 * All attributes for which no declaration has been read should be treated
2166 * by a non-validating parser as if declared CDATA.
2167 *
2168 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2169 */
2170
2171xmlChar *
2172xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2173 xmlChar limit = 0;
2174 xmlChar *buf = NULL;
2175 int len = 0;
2176 int buf_size = 0;
2177 int c, l;
2178 xmlChar *current = NULL;
2179 xmlEntityPtr ent;
2180
2181
2182 SHRINK;
2183 if (NXT(0) == '"') {
2184 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2185 limit = '"';
2186 NEXT;
2187 } else if (NXT(0) == '\'') {
2188 limit = '\'';
2189 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2190 NEXT;
2191 } else {
2192 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2194 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2195 ctxt->wellFormed = 0;
2196 ctxt->disableSAX = 1;
2197 return(NULL);
2198 }
2199
2200 /*
2201 * allocate a translation buffer.
2202 */
2203 buf_size = XML_PARSER_BUFFER_SIZE;
2204 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2205 if (buf == NULL) {
2206 perror("xmlParseAttValue: malloc failed");
2207 return(NULL);
2208 }
2209
2210 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002211 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002212 */
2213 c = CUR_CHAR(l);
2214 while (((NXT(0) != limit) && /* checked */
2215 (c != '<')) || (ctxt->token != 0)) {
2216 if (c == 0) break;
2217 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002218 if (ctxt->replaceEntities) {
2219 if (len > buf_size - 10) {
2220 growBuffer(buf);
2221 }
2222 buf[len++] = '&';
2223 } else {
2224 /*
2225 * The reparsing will be done in xmlStringGetNodeList()
2226 * called by the attribute() function in SAX.c
2227 */
2228 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002229
Daniel Veillard319a7422001-09-11 09:27:09 +00002230 if (len > buf_size - 10) {
2231 growBuffer(buf);
2232 }
2233 current = &buffer[0];
2234 while (*current != 0) { /* non input consuming */
2235 buf[len++] = *current++;
2236 }
2237 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002238 }
Owen Taylor3473f882001-02-23 17:55:21 +00002239 } else if (c == '&') {
2240 if (NXT(1) == '#') {
2241 int val = xmlParseCharRef(ctxt);
2242 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002243 if (ctxt->replaceEntities) {
2244 if (len > buf_size - 10) {
2245 growBuffer(buf);
2246 }
2247 buf[len++] = '&';
2248 } else {
2249 /*
2250 * The reparsing will be done in xmlStringGetNodeList()
2251 * called by the attribute() function in SAX.c
2252 */
2253 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002254
Daniel Veillard319a7422001-09-11 09:27:09 +00002255 if (len > buf_size - 10) {
2256 growBuffer(buf);
2257 }
2258 current = &buffer[0];
2259 while (*current != 0) { /* non input consuming */
2260 buf[len++] = *current++;
2261 }
Owen Taylor3473f882001-02-23 17:55:21 +00002262 }
2263 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002264 if (len > buf_size - 10) {
2265 growBuffer(buf);
2266 }
Owen Taylor3473f882001-02-23 17:55:21 +00002267 len += xmlCopyChar(0, &buf[len], val);
2268 }
2269 } else {
2270 ent = xmlParseEntityRef(ctxt);
2271 if ((ent != NULL) &&
2272 (ctxt->replaceEntities != 0)) {
2273 xmlChar *rep;
2274
2275 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2276 rep = xmlStringDecodeEntities(ctxt, ent->content,
2277 XML_SUBSTITUTE_REF, 0, 0, 0);
2278 if (rep != NULL) {
2279 current = rep;
2280 while (*current != 0) { /* non input consuming */
2281 buf[len++] = *current++;
2282 if (len > buf_size - 10) {
2283 growBuffer(buf);
2284 }
2285 }
2286 xmlFree(rep);
2287 }
2288 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002289 if (len > buf_size - 10) {
2290 growBuffer(buf);
2291 }
Owen Taylor3473f882001-02-23 17:55:21 +00002292 if (ent->content != NULL)
2293 buf[len++] = ent->content[0];
2294 }
2295 } else if (ent != NULL) {
2296 int i = xmlStrlen(ent->name);
2297 const xmlChar *cur = ent->name;
2298
2299 /*
2300 * This may look absurd but is needed to detect
2301 * entities problems
2302 */
2303 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2304 (ent->content != NULL)) {
2305 xmlChar *rep;
2306 rep = xmlStringDecodeEntities(ctxt, ent->content,
2307 XML_SUBSTITUTE_REF, 0, 0, 0);
2308 if (rep != NULL)
2309 xmlFree(rep);
2310 }
2311
2312 /*
2313 * Just output the reference
2314 */
2315 buf[len++] = '&';
2316 if (len > buf_size - i - 10) {
2317 growBuffer(buf);
2318 }
2319 for (;i > 0;i--)
2320 buf[len++] = *cur++;
2321 buf[len++] = ';';
2322 }
2323 }
2324 } else {
2325 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2326 COPY_BUF(l,buf,len,0x20);
2327 if (len > buf_size - 10) {
2328 growBuffer(buf);
2329 }
2330 } else {
2331 COPY_BUF(l,buf,len,c);
2332 if (len > buf_size - 10) {
2333 growBuffer(buf);
2334 }
2335 }
2336 NEXTL(l);
2337 }
2338 GROW;
2339 c = CUR_CHAR(l);
2340 }
2341 buf[len++] = 0;
2342 if (RAW == '<') {
2343 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2344 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2345 ctxt->sax->error(ctxt->userData,
2346 "Unescaped '<' not allowed in attributes values\n");
2347 ctxt->wellFormed = 0;
2348 ctxt->disableSAX = 1;
2349 } else if (RAW != limit) {
2350 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2352 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2353 ctxt->wellFormed = 0;
2354 ctxt->disableSAX = 1;
2355 } else
2356 NEXT;
2357 return(buf);
2358}
2359
2360/**
2361 * xmlParseSystemLiteral:
2362 * @ctxt: an XML parser context
2363 *
2364 * parse an XML Literal
2365 *
2366 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2367 *
2368 * Returns the SystemLiteral parsed or NULL
2369 */
2370
2371xmlChar *
2372xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2373 xmlChar *buf = NULL;
2374 int len = 0;
2375 int size = XML_PARSER_BUFFER_SIZE;
2376 int cur, l;
2377 xmlChar stop;
2378 int state = ctxt->instate;
2379 int count = 0;
2380
2381 SHRINK;
2382 if (RAW == '"') {
2383 NEXT;
2384 stop = '"';
2385 } else if (RAW == '\'') {
2386 NEXT;
2387 stop = '\'';
2388 } else {
2389 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2390 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2391 ctxt->sax->error(ctxt->userData,
2392 "SystemLiteral \" or ' expected\n");
2393 ctxt->wellFormed = 0;
2394 ctxt->disableSAX = 1;
2395 return(NULL);
2396 }
2397
2398 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2399 if (buf == NULL) {
2400 xmlGenericError(xmlGenericErrorContext,
2401 "malloc of %d byte failed\n", size);
2402 return(NULL);
2403 }
2404 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2405 cur = CUR_CHAR(l);
2406 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2407 if (len + 5 >= size) {
2408 size *= 2;
2409 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2410 if (buf == NULL) {
2411 xmlGenericError(xmlGenericErrorContext,
2412 "realloc of %d byte failed\n", size);
2413 ctxt->instate = (xmlParserInputState) state;
2414 return(NULL);
2415 }
2416 }
2417 count++;
2418 if (count > 50) {
2419 GROW;
2420 count = 0;
2421 }
2422 COPY_BUF(l,buf,len,cur);
2423 NEXTL(l);
2424 cur = CUR_CHAR(l);
2425 if (cur == 0) {
2426 GROW;
2427 SHRINK;
2428 cur = CUR_CHAR(l);
2429 }
2430 }
2431 buf[len] = 0;
2432 ctxt->instate = (xmlParserInputState) state;
2433 if (!IS_CHAR(cur)) {
2434 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2436 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2437 ctxt->wellFormed = 0;
2438 ctxt->disableSAX = 1;
2439 } else {
2440 NEXT;
2441 }
2442 return(buf);
2443}
2444
2445/**
2446 * xmlParsePubidLiteral:
2447 * @ctxt: an XML parser context
2448 *
2449 * parse an XML public literal
2450 *
2451 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2452 *
2453 * Returns the PubidLiteral parsed or NULL.
2454 */
2455
2456xmlChar *
2457xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2458 xmlChar *buf = NULL;
2459 int len = 0;
2460 int size = XML_PARSER_BUFFER_SIZE;
2461 xmlChar cur;
2462 xmlChar stop;
2463 int count = 0;
2464
2465 SHRINK;
2466 if (RAW == '"') {
2467 NEXT;
2468 stop = '"';
2469 } else if (RAW == '\'') {
2470 NEXT;
2471 stop = '\'';
2472 } else {
2473 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2475 ctxt->sax->error(ctxt->userData,
2476 "SystemLiteral \" or ' expected\n");
2477 ctxt->wellFormed = 0;
2478 ctxt->disableSAX = 1;
2479 return(NULL);
2480 }
2481 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2482 if (buf == NULL) {
2483 xmlGenericError(xmlGenericErrorContext,
2484 "malloc of %d byte failed\n", size);
2485 return(NULL);
2486 }
2487 cur = CUR;
2488 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2489 if (len + 1 >= size) {
2490 size *= 2;
2491 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2492 if (buf == NULL) {
2493 xmlGenericError(xmlGenericErrorContext,
2494 "realloc of %d byte failed\n", size);
2495 return(NULL);
2496 }
2497 }
2498 buf[len++] = cur;
2499 count++;
2500 if (count > 50) {
2501 GROW;
2502 count = 0;
2503 }
2504 NEXT;
2505 cur = CUR;
2506 if (cur == 0) {
2507 GROW;
2508 SHRINK;
2509 cur = CUR;
2510 }
2511 }
2512 buf[len] = 0;
2513 if (cur != stop) {
2514 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2516 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2517 ctxt->wellFormed = 0;
2518 ctxt->disableSAX = 1;
2519 } else {
2520 NEXT;
2521 }
2522 return(buf);
2523}
2524
Daniel Veillard48b2f892001-02-25 16:11:03 +00002525void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002526/**
2527 * xmlParseCharData:
2528 * @ctxt: an XML parser context
2529 * @cdata: int indicating whether we are within a CDATA section
2530 *
2531 * parse a CharData section.
2532 * if we are within a CDATA section ']]>' marks an end of section.
2533 *
2534 * The right angle bracket (>) may be represented using the string "&gt;",
2535 * and must, for compatibility, be escaped using "&gt;" or a character
2536 * reference when it appears in the string "]]>" in content, when that
2537 * string is not marking the end of a CDATA section.
2538 *
2539 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2540 */
2541
2542void
2543xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002544 const xmlChar *in;
2545 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002546 int line = ctxt->input->line;
2547 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002548
2549 SHRINK;
2550 GROW;
2551 /*
2552 * Accelerated common case where input don't need to be
2553 * modified before passing it to the handler.
2554 */
2555 if ((ctxt->token == 0) && (!cdata)) {
2556 in = ctxt->input->cur;
2557 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002558get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002559 while (((*in >= 0x20) && (*in != '<') &&
2560 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2561 in++;
2562 if (*in == 0xA) {
2563 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002564 in++;
2565 while (*in == 0xA) {
2566 ctxt->input->line++;
2567 in++;
2568 }
2569 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002570 }
2571 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002572 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002573 if (IS_BLANK(*ctxt->input->cur)) {
2574 const xmlChar *tmp = ctxt->input->cur;
2575 ctxt->input->cur = in;
2576 if (areBlanks(ctxt, tmp, nbchar)) {
2577 if (ctxt->sax->ignorableWhitespace != NULL)
2578 ctxt->sax->ignorableWhitespace(ctxt->userData,
2579 tmp, nbchar);
2580 } else {
2581 if (ctxt->sax->characters != NULL)
2582 ctxt->sax->characters(ctxt->userData,
2583 tmp, nbchar);
2584 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002585 line = ctxt->input->line;
2586 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002587 } else {
2588 if (ctxt->sax->characters != NULL)
2589 ctxt->sax->characters(ctxt->userData,
2590 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002591 line = ctxt->input->line;
2592 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002593 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002594 }
2595 ctxt->input->cur = in;
2596 if (*in == 0xD) {
2597 in++;
2598 if (*in == 0xA) {
2599 ctxt->input->cur = in;
2600 in++;
2601 ctxt->input->line++;
2602 continue; /* while */
2603 }
2604 in--;
2605 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002606 if (*in == '<') {
2607 return;
2608 }
2609 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002610 return;
2611 }
2612 SHRINK;
2613 GROW;
2614 in = ctxt->input->cur;
2615 } while ((*in >= 0x20) && (*in <= 0x7F));
2616 nbchar = 0;
2617 }
Daniel Veillard50582112001-03-26 22:52:16 +00002618 ctxt->input->line = line;
2619 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002620 xmlParseCharDataComplex(ctxt, cdata);
2621}
2622
2623void
2624xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002625 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2626 int nbchar = 0;
2627 int cur, l;
2628 int count = 0;
2629
2630 SHRINK;
2631 GROW;
2632 cur = CUR_CHAR(l);
2633 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2634 ((cur != '&') || (ctxt->token == '&')) &&
2635 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2636 if ((cur == ']') && (NXT(1) == ']') &&
2637 (NXT(2) == '>')) {
2638 if (cdata) break;
2639 else {
2640 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2642 ctxt->sax->error(ctxt->userData,
2643 "Sequence ']]>' not allowed in content\n");
2644 /* Should this be relaxed ??? I see a "must here */
2645 ctxt->wellFormed = 0;
2646 ctxt->disableSAX = 1;
2647 }
2648 }
2649 COPY_BUF(l,buf,nbchar,cur);
2650 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2651 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002652 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002653 */
2654 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2655 if (areBlanks(ctxt, buf, nbchar)) {
2656 if (ctxt->sax->ignorableWhitespace != NULL)
2657 ctxt->sax->ignorableWhitespace(ctxt->userData,
2658 buf, nbchar);
2659 } else {
2660 if (ctxt->sax->characters != NULL)
2661 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2662 }
2663 }
2664 nbchar = 0;
2665 }
2666 count++;
2667 if (count > 50) {
2668 GROW;
2669 count = 0;
2670 }
2671 NEXTL(l);
2672 cur = CUR_CHAR(l);
2673 }
2674 if (nbchar != 0) {
2675 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002676 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002677 */
2678 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2679 if (areBlanks(ctxt, buf, nbchar)) {
2680 if (ctxt->sax->ignorableWhitespace != NULL)
2681 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2682 } else {
2683 if (ctxt->sax->characters != NULL)
2684 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2685 }
2686 }
2687 }
2688}
2689
2690/**
2691 * xmlParseExternalID:
2692 * @ctxt: an XML parser context
2693 * @publicID: a xmlChar** receiving PubidLiteral
2694 * @strict: indicate whether we should restrict parsing to only
2695 * production [75], see NOTE below
2696 *
2697 * Parse an External ID or a Public ID
2698 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002699 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002700 * 'PUBLIC' S PubidLiteral S SystemLiteral
2701 *
2702 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2703 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2704 *
2705 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2706 *
2707 * Returns the function returns SystemLiteral and in the second
2708 * case publicID receives PubidLiteral, is strict is off
2709 * it is possible to return NULL and have publicID set.
2710 */
2711
2712xmlChar *
2713xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2714 xmlChar *URI = NULL;
2715
2716 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002717
2718 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002719 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2720 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2721 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2722 SKIP(6);
2723 if (!IS_BLANK(CUR)) {
2724 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2725 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2726 ctxt->sax->error(ctxt->userData,
2727 "Space required after 'SYSTEM'\n");
2728 ctxt->wellFormed = 0;
2729 ctxt->disableSAX = 1;
2730 }
2731 SKIP_BLANKS;
2732 URI = xmlParseSystemLiteral(ctxt);
2733 if (URI == NULL) {
2734 ctxt->errNo = XML_ERR_URI_REQUIRED;
2735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2736 ctxt->sax->error(ctxt->userData,
2737 "xmlParseExternalID: SYSTEM, no URI\n");
2738 ctxt->wellFormed = 0;
2739 ctxt->disableSAX = 1;
2740 }
2741 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2742 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2743 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2744 SKIP(6);
2745 if (!IS_BLANK(CUR)) {
2746 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2748 ctxt->sax->error(ctxt->userData,
2749 "Space required after 'PUBLIC'\n");
2750 ctxt->wellFormed = 0;
2751 ctxt->disableSAX = 1;
2752 }
2753 SKIP_BLANKS;
2754 *publicID = xmlParsePubidLiteral(ctxt);
2755 if (*publicID == NULL) {
2756 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2758 ctxt->sax->error(ctxt->userData,
2759 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2760 ctxt->wellFormed = 0;
2761 ctxt->disableSAX = 1;
2762 }
2763 if (strict) {
2764 /*
2765 * We don't handle [83] so "S SystemLiteral" is required.
2766 */
2767 if (!IS_BLANK(CUR)) {
2768 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2770 ctxt->sax->error(ctxt->userData,
2771 "Space required after the Public Identifier\n");
2772 ctxt->wellFormed = 0;
2773 ctxt->disableSAX = 1;
2774 }
2775 } else {
2776 /*
2777 * We handle [83] so we return immediately, if
2778 * "S SystemLiteral" is not detected. From a purely parsing
2779 * point of view that's a nice mess.
2780 */
2781 const xmlChar *ptr;
2782 GROW;
2783
2784 ptr = CUR_PTR;
2785 if (!IS_BLANK(*ptr)) return(NULL);
2786
2787 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2788 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2789 }
2790 SKIP_BLANKS;
2791 URI = xmlParseSystemLiteral(ctxt);
2792 if (URI == NULL) {
2793 ctxt->errNo = XML_ERR_URI_REQUIRED;
2794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2795 ctxt->sax->error(ctxt->userData,
2796 "xmlParseExternalID: PUBLIC, no URI\n");
2797 ctxt->wellFormed = 0;
2798 ctxt->disableSAX = 1;
2799 }
2800 }
2801 return(URI);
2802}
2803
2804/**
2805 * xmlParseComment:
2806 * @ctxt: an XML parser context
2807 *
2808 * Skip an XML (SGML) comment <!-- .... -->
2809 * The spec says that "For compatibility, the string "--" (double-hyphen)
2810 * must not occur within comments. "
2811 *
2812 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2813 */
2814void
2815xmlParseComment(xmlParserCtxtPtr ctxt) {
2816 xmlChar *buf = NULL;
2817 int len;
2818 int size = XML_PARSER_BUFFER_SIZE;
2819 int q, ql;
2820 int r, rl;
2821 int cur, l;
2822 xmlParserInputState state;
2823 xmlParserInputPtr input = ctxt->input;
2824 int count = 0;
2825
2826 /*
2827 * Check that there is a comment right here.
2828 */
2829 if ((RAW != '<') || (NXT(1) != '!') ||
2830 (NXT(2) != '-') || (NXT(3) != '-')) return;
2831
2832 state = ctxt->instate;
2833 ctxt->instate = XML_PARSER_COMMENT;
2834 SHRINK;
2835 SKIP(4);
2836 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2837 if (buf == NULL) {
2838 xmlGenericError(xmlGenericErrorContext,
2839 "malloc of %d byte failed\n", size);
2840 ctxt->instate = state;
2841 return;
2842 }
2843 q = CUR_CHAR(ql);
2844 NEXTL(ql);
2845 r = CUR_CHAR(rl);
2846 NEXTL(rl);
2847 cur = CUR_CHAR(l);
2848 len = 0;
2849 while (IS_CHAR(cur) && /* checked */
2850 ((cur != '>') ||
2851 (r != '-') || (q != '-'))) {
2852 if ((r == '-') && (q == '-') && (len > 1)) {
2853 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2855 ctxt->sax->error(ctxt->userData,
2856 "Comment must not contain '--' (double-hyphen)`\n");
2857 ctxt->wellFormed = 0;
2858 ctxt->disableSAX = 1;
2859 }
2860 if (len + 5 >= size) {
2861 size *= 2;
2862 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2863 if (buf == NULL) {
2864 xmlGenericError(xmlGenericErrorContext,
2865 "realloc of %d byte failed\n", size);
2866 ctxt->instate = state;
2867 return;
2868 }
2869 }
2870 COPY_BUF(ql,buf,len,q);
2871 q = r;
2872 ql = rl;
2873 r = cur;
2874 rl = l;
2875
2876 count++;
2877 if (count > 50) {
2878 GROW;
2879 count = 0;
2880 }
2881 NEXTL(l);
2882 cur = CUR_CHAR(l);
2883 if (cur == 0) {
2884 SHRINK;
2885 GROW;
2886 cur = CUR_CHAR(l);
2887 }
2888 }
2889 buf[len] = 0;
2890 if (!IS_CHAR(cur)) {
2891 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2893 ctxt->sax->error(ctxt->userData,
2894 "Comment not terminated \n<!--%.50s\n", buf);
2895 ctxt->wellFormed = 0;
2896 ctxt->disableSAX = 1;
2897 xmlFree(buf);
2898 } else {
2899 if (input != ctxt->input) {
2900 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2902 ctxt->sax->error(ctxt->userData,
2903"Comment doesn't start and stop in the same entity\n");
2904 ctxt->wellFormed = 0;
2905 ctxt->disableSAX = 1;
2906 }
2907 NEXT;
2908 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2909 (!ctxt->disableSAX))
2910 ctxt->sax->comment(ctxt->userData, buf);
2911 xmlFree(buf);
2912 }
2913 ctxt->instate = state;
2914}
2915
2916/**
2917 * xmlParsePITarget:
2918 * @ctxt: an XML parser context
2919 *
2920 * parse the name of a PI
2921 *
2922 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2923 *
2924 * Returns the PITarget name or NULL
2925 */
2926
2927xmlChar *
2928xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2929 xmlChar *name;
2930
2931 name = xmlParseName(ctxt);
2932 if ((name != NULL) &&
2933 ((name[0] == 'x') || (name[0] == 'X')) &&
2934 ((name[1] == 'm') || (name[1] == 'M')) &&
2935 ((name[2] == 'l') || (name[2] == 'L'))) {
2936 int i;
2937 if ((name[0] == 'x') && (name[1] == 'm') &&
2938 (name[2] == 'l') && (name[3] == 0)) {
2939 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2941 ctxt->sax->error(ctxt->userData,
2942 "XML declaration allowed only at the start of the document\n");
2943 ctxt->wellFormed = 0;
2944 ctxt->disableSAX = 1;
2945 return(name);
2946 } else if (name[3] == 0) {
2947 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2949 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2950 ctxt->wellFormed = 0;
2951 ctxt->disableSAX = 1;
2952 return(name);
2953 }
2954 for (i = 0;;i++) {
2955 if (xmlW3CPIs[i] == NULL) break;
2956 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2957 return(name);
2958 }
2959 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2960 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2961 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002962 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002963 }
2964 }
2965 return(name);
2966}
2967
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002968#ifdef LIBXML_CATALOG_ENABLED
2969/**
2970 * xmlParseCatalogPI:
2971 * @ctxt: an XML parser context
2972 * @catalog: the PI value string
2973 *
2974 * parse an XML Catalog Processing Instruction.
2975 *
2976 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
2977 *
2978 * Occurs only if allowed by the user and if happening in the Misc
2979 * part of the document before any doctype informations
2980 * This will add the given catalog to the parsing context in order
2981 * to be used if there is a resolution need further down in the document
2982 */
2983
2984static void
2985xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
2986 xmlChar *URL = NULL;
2987 const xmlChar *tmp, *base;
2988 xmlChar marker;
2989
2990 tmp = catalog;
2991 while (IS_BLANK(*tmp)) tmp++;
2992 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
2993 goto error;
2994 tmp += 7;
2995 while (IS_BLANK(*tmp)) tmp++;
2996 if (*tmp != '=') {
2997 return;
2998 }
2999 tmp++;
3000 while (IS_BLANK(*tmp)) tmp++;
3001 marker = *tmp;
3002 if ((marker != '\'') && (marker != '"'))
3003 goto error;
3004 tmp++;
3005 base = tmp;
3006 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3007 if (*tmp == 0)
3008 goto error;
3009 URL = xmlStrndup(base, tmp - base);
3010 tmp++;
3011 while (IS_BLANK(*tmp)) tmp++;
3012 if (*tmp != 0)
3013 goto error;
3014
3015 if (URL != NULL) {
3016 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3017 xmlFree(URL);
3018 }
3019 return;
3020
3021error:
3022 ctxt->errNo = XML_WAR_CATALOG_PI;
3023 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3024 ctxt->sax->warning(ctxt->userData,
3025 "Catalog PI syntax error: %s\n", catalog);
3026 if (URL != NULL)
3027 xmlFree(URL);
3028}
3029#endif
3030
Owen Taylor3473f882001-02-23 17:55:21 +00003031/**
3032 * xmlParsePI:
3033 * @ctxt: an XML parser context
3034 *
3035 * parse an XML Processing Instruction.
3036 *
3037 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3038 *
3039 * The processing is transfered to SAX once parsed.
3040 */
3041
3042void
3043xmlParsePI(xmlParserCtxtPtr ctxt) {
3044 xmlChar *buf = NULL;
3045 int len = 0;
3046 int size = XML_PARSER_BUFFER_SIZE;
3047 int cur, l;
3048 xmlChar *target;
3049 xmlParserInputState state;
3050 int count = 0;
3051
3052 if ((RAW == '<') && (NXT(1) == '?')) {
3053 xmlParserInputPtr input = ctxt->input;
3054 state = ctxt->instate;
3055 ctxt->instate = XML_PARSER_PI;
3056 /*
3057 * this is a Processing Instruction.
3058 */
3059 SKIP(2);
3060 SHRINK;
3061
3062 /*
3063 * Parse the target name and check for special support like
3064 * namespace.
3065 */
3066 target = xmlParsePITarget(ctxt);
3067 if (target != NULL) {
3068 if ((RAW == '?') && (NXT(1) == '>')) {
3069 if (input != ctxt->input) {
3070 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3072 ctxt->sax->error(ctxt->userData,
3073 "PI declaration doesn't start and stop in the same entity\n");
3074 ctxt->wellFormed = 0;
3075 ctxt->disableSAX = 1;
3076 }
3077 SKIP(2);
3078
3079 /*
3080 * SAX: PI detected.
3081 */
3082 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3083 (ctxt->sax->processingInstruction != NULL))
3084 ctxt->sax->processingInstruction(ctxt->userData,
3085 target, NULL);
3086 ctxt->instate = state;
3087 xmlFree(target);
3088 return;
3089 }
3090 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3091 if (buf == NULL) {
3092 xmlGenericError(xmlGenericErrorContext,
3093 "malloc of %d byte failed\n", size);
3094 ctxt->instate = state;
3095 return;
3096 }
3097 cur = CUR;
3098 if (!IS_BLANK(cur)) {
3099 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3100 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3101 ctxt->sax->error(ctxt->userData,
3102 "xmlParsePI: PI %s space expected\n", target);
3103 ctxt->wellFormed = 0;
3104 ctxt->disableSAX = 1;
3105 }
3106 SKIP_BLANKS;
3107 cur = CUR_CHAR(l);
3108 while (IS_CHAR(cur) && /* checked */
3109 ((cur != '?') || (NXT(1) != '>'))) {
3110 if (len + 5 >= size) {
3111 size *= 2;
3112 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3113 if (buf == NULL) {
3114 xmlGenericError(xmlGenericErrorContext,
3115 "realloc of %d byte failed\n", size);
3116 ctxt->instate = state;
3117 return;
3118 }
3119 }
3120 count++;
3121 if (count > 50) {
3122 GROW;
3123 count = 0;
3124 }
3125 COPY_BUF(l,buf,len,cur);
3126 NEXTL(l);
3127 cur = CUR_CHAR(l);
3128 if (cur == 0) {
3129 SHRINK;
3130 GROW;
3131 cur = CUR_CHAR(l);
3132 }
3133 }
3134 buf[len] = 0;
3135 if (cur != '?') {
3136 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3138 ctxt->sax->error(ctxt->userData,
3139 "xmlParsePI: PI %s never end ...\n", target);
3140 ctxt->wellFormed = 0;
3141 ctxt->disableSAX = 1;
3142 } else {
3143 if (input != ctxt->input) {
3144 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3145 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3146 ctxt->sax->error(ctxt->userData,
3147 "PI declaration doesn't start and stop in the same entity\n");
3148 ctxt->wellFormed = 0;
3149 ctxt->disableSAX = 1;
3150 }
3151 SKIP(2);
3152
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003153#ifdef LIBXML_CATALOG_ENABLED
3154 if (((state == XML_PARSER_MISC) ||
3155 (state == XML_PARSER_START)) &&
3156 (xmlStrEqual(target, XML_CATALOG_PI))) {
3157 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3158 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3159 (allow == XML_CATA_ALLOW_ALL))
3160 xmlParseCatalogPI(ctxt, buf);
3161 }
3162#endif
3163
3164
Owen Taylor3473f882001-02-23 17:55:21 +00003165 /*
3166 * SAX: PI detected.
3167 */
3168 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3169 (ctxt->sax->processingInstruction != NULL))
3170 ctxt->sax->processingInstruction(ctxt->userData,
3171 target, buf);
3172 }
3173 xmlFree(buf);
3174 xmlFree(target);
3175 } else {
3176 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3178 ctxt->sax->error(ctxt->userData,
3179 "xmlParsePI : no target name\n");
3180 ctxt->wellFormed = 0;
3181 ctxt->disableSAX = 1;
3182 }
3183 ctxt->instate = state;
3184 }
3185}
3186
3187/**
3188 * xmlParseNotationDecl:
3189 * @ctxt: an XML parser context
3190 *
3191 * parse a notation declaration
3192 *
3193 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3194 *
3195 * Hence there is actually 3 choices:
3196 * 'PUBLIC' S PubidLiteral
3197 * 'PUBLIC' S PubidLiteral S SystemLiteral
3198 * and 'SYSTEM' S SystemLiteral
3199 *
3200 * See the NOTE on xmlParseExternalID().
3201 */
3202
3203void
3204xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3205 xmlChar *name;
3206 xmlChar *Pubid;
3207 xmlChar *Systemid;
3208
3209 if ((RAW == '<') && (NXT(1) == '!') &&
3210 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3211 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3212 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3213 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3214 xmlParserInputPtr input = ctxt->input;
3215 SHRINK;
3216 SKIP(10);
3217 if (!IS_BLANK(CUR)) {
3218 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3220 ctxt->sax->error(ctxt->userData,
3221 "Space required after '<!NOTATION'\n");
3222 ctxt->wellFormed = 0;
3223 ctxt->disableSAX = 1;
3224 return;
3225 }
3226 SKIP_BLANKS;
3227
Daniel Veillard76d66f42001-05-16 21:05:17 +00003228 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003229 if (name == NULL) {
3230 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3231 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3232 ctxt->sax->error(ctxt->userData,
3233 "NOTATION: Name expected here\n");
3234 ctxt->wellFormed = 0;
3235 ctxt->disableSAX = 1;
3236 return;
3237 }
3238 if (!IS_BLANK(CUR)) {
3239 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3241 ctxt->sax->error(ctxt->userData,
3242 "Space required after the NOTATION name'\n");
3243 ctxt->wellFormed = 0;
3244 ctxt->disableSAX = 1;
3245 return;
3246 }
3247 SKIP_BLANKS;
3248
3249 /*
3250 * Parse the IDs.
3251 */
3252 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3253 SKIP_BLANKS;
3254
3255 if (RAW == '>') {
3256 if (input != ctxt->input) {
3257 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3258 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3259 ctxt->sax->error(ctxt->userData,
3260"Notation declaration doesn't start and stop in the same entity\n");
3261 ctxt->wellFormed = 0;
3262 ctxt->disableSAX = 1;
3263 }
3264 NEXT;
3265 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3266 (ctxt->sax->notationDecl != NULL))
3267 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3268 } else {
3269 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3271 ctxt->sax->error(ctxt->userData,
3272 "'>' required to close NOTATION declaration\n");
3273 ctxt->wellFormed = 0;
3274 ctxt->disableSAX = 1;
3275 }
3276 xmlFree(name);
3277 if (Systemid != NULL) xmlFree(Systemid);
3278 if (Pubid != NULL) xmlFree(Pubid);
3279 }
3280}
3281
3282/**
3283 * xmlParseEntityDecl:
3284 * @ctxt: an XML parser context
3285 *
3286 * parse <!ENTITY declarations
3287 *
3288 * [70] EntityDecl ::= GEDecl | PEDecl
3289 *
3290 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3291 *
3292 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3293 *
3294 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3295 *
3296 * [74] PEDef ::= EntityValue | ExternalID
3297 *
3298 * [76] NDataDecl ::= S 'NDATA' S Name
3299 *
3300 * [ VC: Notation Declared ]
3301 * The Name must match the declared name of a notation.
3302 */
3303
3304void
3305xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3306 xmlChar *name = NULL;
3307 xmlChar *value = NULL;
3308 xmlChar *URI = NULL, *literal = NULL;
3309 xmlChar *ndata = NULL;
3310 int isParameter = 0;
3311 xmlChar *orig = NULL;
3312
3313 GROW;
3314 if ((RAW == '<') && (NXT(1) == '!') &&
3315 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3316 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3317 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3318 xmlParserInputPtr input = ctxt->input;
3319 ctxt->instate = XML_PARSER_ENTITY_DECL;
3320 SHRINK;
3321 SKIP(8);
3322 if (!IS_BLANK(CUR)) {
3323 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3325 ctxt->sax->error(ctxt->userData,
3326 "Space required after '<!ENTITY'\n");
3327 ctxt->wellFormed = 0;
3328 ctxt->disableSAX = 1;
3329 }
3330 SKIP_BLANKS;
3331
3332 if (RAW == '%') {
3333 NEXT;
3334 if (!IS_BLANK(CUR)) {
3335 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3337 ctxt->sax->error(ctxt->userData,
3338 "Space required after '%'\n");
3339 ctxt->wellFormed = 0;
3340 ctxt->disableSAX = 1;
3341 }
3342 SKIP_BLANKS;
3343 isParameter = 1;
3344 }
3345
Daniel Veillard76d66f42001-05-16 21:05:17 +00003346 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003347 if (name == NULL) {
3348 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3350 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3351 ctxt->wellFormed = 0;
3352 ctxt->disableSAX = 1;
3353 return;
3354 }
3355 if (!IS_BLANK(CUR)) {
3356 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3358 ctxt->sax->error(ctxt->userData,
3359 "Space required after the entity name\n");
3360 ctxt->wellFormed = 0;
3361 ctxt->disableSAX = 1;
3362 }
3363 SKIP_BLANKS;
3364
3365 /*
3366 * handle the various case of definitions...
3367 */
3368 if (isParameter) {
3369 if ((RAW == '"') || (RAW == '\'')) {
3370 value = xmlParseEntityValue(ctxt, &orig);
3371 if (value) {
3372 if ((ctxt->sax != NULL) &&
3373 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3374 ctxt->sax->entityDecl(ctxt->userData, name,
3375 XML_INTERNAL_PARAMETER_ENTITY,
3376 NULL, NULL, value);
3377 }
3378 } else {
3379 URI = xmlParseExternalID(ctxt, &literal, 1);
3380 if ((URI == NULL) && (literal == NULL)) {
3381 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3383 ctxt->sax->error(ctxt->userData,
3384 "Entity value required\n");
3385 ctxt->wellFormed = 0;
3386 ctxt->disableSAX = 1;
3387 }
3388 if (URI) {
3389 xmlURIPtr uri;
3390
3391 uri = xmlParseURI((const char *) URI);
3392 if (uri == NULL) {
3393 ctxt->errNo = XML_ERR_INVALID_URI;
3394 if ((ctxt->sax != NULL) &&
3395 (!ctxt->disableSAX) &&
3396 (ctxt->sax->error != NULL))
3397 ctxt->sax->error(ctxt->userData,
3398 "Invalid URI: %s\n", URI);
3399 ctxt->wellFormed = 0;
3400 } else {
3401 if (uri->fragment != NULL) {
3402 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3403 if ((ctxt->sax != NULL) &&
3404 (!ctxt->disableSAX) &&
3405 (ctxt->sax->error != NULL))
3406 ctxt->sax->error(ctxt->userData,
3407 "Fragment not allowed: %s\n", URI);
3408 ctxt->wellFormed = 0;
3409 } else {
3410 if ((ctxt->sax != NULL) &&
3411 (!ctxt->disableSAX) &&
3412 (ctxt->sax->entityDecl != NULL))
3413 ctxt->sax->entityDecl(ctxt->userData, name,
3414 XML_EXTERNAL_PARAMETER_ENTITY,
3415 literal, URI, NULL);
3416 }
3417 xmlFreeURI(uri);
3418 }
3419 }
3420 }
3421 } else {
3422 if ((RAW == '"') || (RAW == '\'')) {
3423 value = xmlParseEntityValue(ctxt, &orig);
3424 if ((ctxt->sax != NULL) &&
3425 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3426 ctxt->sax->entityDecl(ctxt->userData, name,
3427 XML_INTERNAL_GENERAL_ENTITY,
3428 NULL, NULL, value);
3429 } else {
3430 URI = xmlParseExternalID(ctxt, &literal, 1);
3431 if ((URI == NULL) && (literal == NULL)) {
3432 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3433 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3434 ctxt->sax->error(ctxt->userData,
3435 "Entity value required\n");
3436 ctxt->wellFormed = 0;
3437 ctxt->disableSAX = 1;
3438 }
3439 if (URI) {
3440 xmlURIPtr uri;
3441
3442 uri = xmlParseURI((const char *)URI);
3443 if (uri == NULL) {
3444 ctxt->errNo = XML_ERR_INVALID_URI;
3445 if ((ctxt->sax != NULL) &&
3446 (!ctxt->disableSAX) &&
3447 (ctxt->sax->error != NULL))
3448 ctxt->sax->error(ctxt->userData,
3449 "Invalid URI: %s\n", URI);
3450 ctxt->wellFormed = 0;
3451 } else {
3452 if (uri->fragment != NULL) {
3453 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3454 if ((ctxt->sax != NULL) &&
3455 (!ctxt->disableSAX) &&
3456 (ctxt->sax->error != NULL))
3457 ctxt->sax->error(ctxt->userData,
3458 "Fragment not allowed: %s\n", URI);
3459 ctxt->wellFormed = 0;
3460 }
3461 xmlFreeURI(uri);
3462 }
3463 }
3464 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3465 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3467 ctxt->sax->error(ctxt->userData,
3468 "Space required before 'NDATA'\n");
3469 ctxt->wellFormed = 0;
3470 ctxt->disableSAX = 1;
3471 }
3472 SKIP_BLANKS;
3473 if ((RAW == 'N') && (NXT(1) == 'D') &&
3474 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3475 (NXT(4) == 'A')) {
3476 SKIP(5);
3477 if (!IS_BLANK(CUR)) {
3478 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3480 ctxt->sax->error(ctxt->userData,
3481 "Space required after 'NDATA'\n");
3482 ctxt->wellFormed = 0;
3483 ctxt->disableSAX = 1;
3484 }
3485 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003486 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003487 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3488 (ctxt->sax->unparsedEntityDecl != NULL))
3489 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3490 literal, URI, ndata);
3491 } else {
3492 if ((ctxt->sax != NULL) &&
3493 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3494 ctxt->sax->entityDecl(ctxt->userData, name,
3495 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3496 literal, URI, NULL);
3497 }
3498 }
3499 }
3500 SKIP_BLANKS;
3501 if (RAW != '>') {
3502 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3503 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3504 ctxt->sax->error(ctxt->userData,
3505 "xmlParseEntityDecl: entity %s not terminated\n", name);
3506 ctxt->wellFormed = 0;
3507 ctxt->disableSAX = 1;
3508 } else {
3509 if (input != ctxt->input) {
3510 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3511 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3512 ctxt->sax->error(ctxt->userData,
3513"Entity declaration doesn't start and stop in the same entity\n");
3514 ctxt->wellFormed = 0;
3515 ctxt->disableSAX = 1;
3516 }
3517 NEXT;
3518 }
3519 if (orig != NULL) {
3520 /*
3521 * Ugly mechanism to save the raw entity value.
3522 */
3523 xmlEntityPtr cur = NULL;
3524
3525 if (isParameter) {
3526 if ((ctxt->sax != NULL) &&
3527 (ctxt->sax->getParameterEntity != NULL))
3528 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3529 } else {
3530 if ((ctxt->sax != NULL) &&
3531 (ctxt->sax->getEntity != NULL))
3532 cur = ctxt->sax->getEntity(ctxt->userData, name);
3533 }
3534 if (cur != NULL) {
3535 if (cur->orig != NULL)
3536 xmlFree(orig);
3537 else
3538 cur->orig = orig;
3539 } else
3540 xmlFree(orig);
3541 }
3542 if (name != NULL) xmlFree(name);
3543 if (value != NULL) xmlFree(value);
3544 if (URI != NULL) xmlFree(URI);
3545 if (literal != NULL) xmlFree(literal);
3546 if (ndata != NULL) xmlFree(ndata);
3547 }
3548}
3549
3550/**
3551 * xmlParseDefaultDecl:
3552 * @ctxt: an XML parser context
3553 * @value: Receive a possible fixed default value for the attribute
3554 *
3555 * Parse an attribute default declaration
3556 *
3557 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3558 *
3559 * [ VC: Required Attribute ]
3560 * if the default declaration is the keyword #REQUIRED, then the
3561 * attribute must be specified for all elements of the type in the
3562 * attribute-list declaration.
3563 *
3564 * [ VC: Attribute Default Legal ]
3565 * The declared default value must meet the lexical constraints of
3566 * the declared attribute type c.f. xmlValidateAttributeDecl()
3567 *
3568 * [ VC: Fixed Attribute Default ]
3569 * if an attribute has a default value declared with the #FIXED
3570 * keyword, instances of that attribute must match the default value.
3571 *
3572 * [ WFC: No < in Attribute Values ]
3573 * handled in xmlParseAttValue()
3574 *
3575 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3576 * or XML_ATTRIBUTE_FIXED.
3577 */
3578
3579int
3580xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3581 int val;
3582 xmlChar *ret;
3583
3584 *value = NULL;
3585 if ((RAW == '#') && (NXT(1) == 'R') &&
3586 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3587 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3588 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3589 (NXT(8) == 'D')) {
3590 SKIP(9);
3591 return(XML_ATTRIBUTE_REQUIRED);
3592 }
3593 if ((RAW == '#') && (NXT(1) == 'I') &&
3594 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3595 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3596 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3597 SKIP(8);
3598 return(XML_ATTRIBUTE_IMPLIED);
3599 }
3600 val = XML_ATTRIBUTE_NONE;
3601 if ((RAW == '#') && (NXT(1) == 'F') &&
3602 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3603 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3604 SKIP(6);
3605 val = XML_ATTRIBUTE_FIXED;
3606 if (!IS_BLANK(CUR)) {
3607 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3609 ctxt->sax->error(ctxt->userData,
3610 "Space required after '#FIXED'\n");
3611 ctxt->wellFormed = 0;
3612 ctxt->disableSAX = 1;
3613 }
3614 SKIP_BLANKS;
3615 }
3616 ret = xmlParseAttValue(ctxt);
3617 ctxt->instate = XML_PARSER_DTD;
3618 if (ret == NULL) {
3619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3620 ctxt->sax->error(ctxt->userData,
3621 "Attribute default value declaration error\n");
3622 ctxt->wellFormed = 0;
3623 ctxt->disableSAX = 1;
3624 } else
3625 *value = ret;
3626 return(val);
3627}
3628
3629/**
3630 * xmlParseNotationType:
3631 * @ctxt: an XML parser context
3632 *
3633 * parse an Notation attribute type.
3634 *
3635 * Note: the leading 'NOTATION' S part has already being parsed...
3636 *
3637 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3638 *
3639 * [ VC: Notation Attributes ]
3640 * Values of this type must match one of the notation names included
3641 * in the declaration; all notation names in the declaration must be declared.
3642 *
3643 * Returns: the notation attribute tree built while parsing
3644 */
3645
3646xmlEnumerationPtr
3647xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3648 xmlChar *name;
3649 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3650
3651 if (RAW != '(') {
3652 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3654 ctxt->sax->error(ctxt->userData,
3655 "'(' required to start 'NOTATION'\n");
3656 ctxt->wellFormed = 0;
3657 ctxt->disableSAX = 1;
3658 return(NULL);
3659 }
3660 SHRINK;
3661 do {
3662 NEXT;
3663 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003664 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003665 if (name == NULL) {
3666 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3668 ctxt->sax->error(ctxt->userData,
3669 "Name expected in NOTATION declaration\n");
3670 ctxt->wellFormed = 0;
3671 ctxt->disableSAX = 1;
3672 return(ret);
3673 }
3674 cur = xmlCreateEnumeration(name);
3675 xmlFree(name);
3676 if (cur == NULL) return(ret);
3677 if (last == NULL) ret = last = cur;
3678 else {
3679 last->next = cur;
3680 last = cur;
3681 }
3682 SKIP_BLANKS;
3683 } while (RAW == '|');
3684 if (RAW != ')') {
3685 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3687 ctxt->sax->error(ctxt->userData,
3688 "')' required to finish NOTATION declaration\n");
3689 ctxt->wellFormed = 0;
3690 ctxt->disableSAX = 1;
3691 if ((last != NULL) && (last != ret))
3692 xmlFreeEnumeration(last);
3693 return(ret);
3694 }
3695 NEXT;
3696 return(ret);
3697}
3698
3699/**
3700 * xmlParseEnumerationType:
3701 * @ctxt: an XML parser context
3702 *
3703 * parse an Enumeration attribute type.
3704 *
3705 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3706 *
3707 * [ VC: Enumeration ]
3708 * Values of this type must match one of the Nmtoken tokens in
3709 * the declaration
3710 *
3711 * Returns: the enumeration attribute tree built while parsing
3712 */
3713
3714xmlEnumerationPtr
3715xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3716 xmlChar *name;
3717 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3718
3719 if (RAW != '(') {
3720 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3722 ctxt->sax->error(ctxt->userData,
3723 "'(' required to start ATTLIST enumeration\n");
3724 ctxt->wellFormed = 0;
3725 ctxt->disableSAX = 1;
3726 return(NULL);
3727 }
3728 SHRINK;
3729 do {
3730 NEXT;
3731 SKIP_BLANKS;
3732 name = xmlParseNmtoken(ctxt);
3733 if (name == NULL) {
3734 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3736 ctxt->sax->error(ctxt->userData,
3737 "NmToken expected in ATTLIST enumeration\n");
3738 ctxt->wellFormed = 0;
3739 ctxt->disableSAX = 1;
3740 return(ret);
3741 }
3742 cur = xmlCreateEnumeration(name);
3743 xmlFree(name);
3744 if (cur == NULL) return(ret);
3745 if (last == NULL) ret = last = cur;
3746 else {
3747 last->next = cur;
3748 last = cur;
3749 }
3750 SKIP_BLANKS;
3751 } while (RAW == '|');
3752 if (RAW != ')') {
3753 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3755 ctxt->sax->error(ctxt->userData,
3756 "')' required to finish ATTLIST enumeration\n");
3757 ctxt->wellFormed = 0;
3758 ctxt->disableSAX = 1;
3759 return(ret);
3760 }
3761 NEXT;
3762 return(ret);
3763}
3764
3765/**
3766 * xmlParseEnumeratedType:
3767 * @ctxt: an XML parser context
3768 * @tree: the enumeration tree built while parsing
3769 *
3770 * parse an Enumerated attribute type.
3771 *
3772 * [57] EnumeratedType ::= NotationType | Enumeration
3773 *
3774 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3775 *
3776 *
3777 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3778 */
3779
3780int
3781xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3782 if ((RAW == 'N') && (NXT(1) == 'O') &&
3783 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3784 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3785 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3786 SKIP(8);
3787 if (!IS_BLANK(CUR)) {
3788 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3790 ctxt->sax->error(ctxt->userData,
3791 "Space required after 'NOTATION'\n");
3792 ctxt->wellFormed = 0;
3793 ctxt->disableSAX = 1;
3794 return(0);
3795 }
3796 SKIP_BLANKS;
3797 *tree = xmlParseNotationType(ctxt);
3798 if (*tree == NULL) return(0);
3799 return(XML_ATTRIBUTE_NOTATION);
3800 }
3801 *tree = xmlParseEnumerationType(ctxt);
3802 if (*tree == NULL) return(0);
3803 return(XML_ATTRIBUTE_ENUMERATION);
3804}
3805
3806/**
3807 * xmlParseAttributeType:
3808 * @ctxt: an XML parser context
3809 * @tree: the enumeration tree built while parsing
3810 *
3811 * parse the Attribute list def for an element
3812 *
3813 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3814 *
3815 * [55] StringType ::= 'CDATA'
3816 *
3817 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3818 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3819 *
3820 * Validity constraints for attribute values syntax are checked in
3821 * xmlValidateAttributeValue()
3822 *
3823 * [ VC: ID ]
3824 * Values of type ID must match the Name production. A name must not
3825 * appear more than once in an XML document as a value of this type;
3826 * i.e., ID values must uniquely identify the elements which bear them.
3827 *
3828 * [ VC: One ID per Element Type ]
3829 * No element type may have more than one ID attribute specified.
3830 *
3831 * [ VC: ID Attribute Default ]
3832 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3833 *
3834 * [ VC: IDREF ]
3835 * Values of type IDREF must match the Name production, and values
3836 * of type IDREFS must match Names; each IDREF Name must match the value
3837 * of an ID attribute on some element in the XML document; i.e. IDREF
3838 * values must match the value of some ID attribute.
3839 *
3840 * [ VC: Entity Name ]
3841 * Values of type ENTITY must match the Name production, values
3842 * of type ENTITIES must match Names; each Entity Name must match the
3843 * name of an unparsed entity declared in the DTD.
3844 *
3845 * [ VC: Name Token ]
3846 * Values of type NMTOKEN must match the Nmtoken production; values
3847 * of type NMTOKENS must match Nmtokens.
3848 *
3849 * Returns the attribute type
3850 */
3851int
3852xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3853 SHRINK;
3854 if ((RAW == 'C') && (NXT(1) == 'D') &&
3855 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3856 (NXT(4) == 'A')) {
3857 SKIP(5);
3858 return(XML_ATTRIBUTE_CDATA);
3859 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3860 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3861 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3862 SKIP(6);
3863 return(XML_ATTRIBUTE_IDREFS);
3864 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3865 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3866 (NXT(4) == 'F')) {
3867 SKIP(5);
3868 return(XML_ATTRIBUTE_IDREF);
3869 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3870 SKIP(2);
3871 return(XML_ATTRIBUTE_ID);
3872 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3873 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3874 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3875 SKIP(6);
3876 return(XML_ATTRIBUTE_ENTITY);
3877 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3878 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3879 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3880 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3881 SKIP(8);
3882 return(XML_ATTRIBUTE_ENTITIES);
3883 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3884 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3885 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3886 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3887 SKIP(8);
3888 return(XML_ATTRIBUTE_NMTOKENS);
3889 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3890 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3891 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3892 (NXT(6) == 'N')) {
3893 SKIP(7);
3894 return(XML_ATTRIBUTE_NMTOKEN);
3895 }
3896 return(xmlParseEnumeratedType(ctxt, tree));
3897}
3898
3899/**
3900 * xmlParseAttributeListDecl:
3901 * @ctxt: an XML parser context
3902 *
3903 * : parse the Attribute list def for an element
3904 *
3905 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3906 *
3907 * [53] AttDef ::= S Name S AttType S DefaultDecl
3908 *
3909 */
3910void
3911xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3912 xmlChar *elemName;
3913 xmlChar *attrName;
3914 xmlEnumerationPtr tree;
3915
3916 if ((RAW == '<') && (NXT(1) == '!') &&
3917 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3918 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3919 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3920 (NXT(8) == 'T')) {
3921 xmlParserInputPtr input = ctxt->input;
3922
3923 SKIP(9);
3924 if (!IS_BLANK(CUR)) {
3925 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3927 ctxt->sax->error(ctxt->userData,
3928 "Space required after '<!ATTLIST'\n");
3929 ctxt->wellFormed = 0;
3930 ctxt->disableSAX = 1;
3931 }
3932 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003933 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003934 if (elemName == NULL) {
3935 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3937 ctxt->sax->error(ctxt->userData,
3938 "ATTLIST: no name for Element\n");
3939 ctxt->wellFormed = 0;
3940 ctxt->disableSAX = 1;
3941 return;
3942 }
3943 SKIP_BLANKS;
3944 GROW;
3945 while (RAW != '>') {
3946 const xmlChar *check = CUR_PTR;
3947 int type;
3948 int def;
3949 xmlChar *defaultValue = NULL;
3950
3951 GROW;
3952 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003953 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003954 if (attrName == NULL) {
3955 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3957 ctxt->sax->error(ctxt->userData,
3958 "ATTLIST: no name for Attribute\n");
3959 ctxt->wellFormed = 0;
3960 ctxt->disableSAX = 1;
3961 break;
3962 }
3963 GROW;
3964 if (!IS_BLANK(CUR)) {
3965 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3967 ctxt->sax->error(ctxt->userData,
3968 "Space required after the attribute name\n");
3969 ctxt->wellFormed = 0;
3970 ctxt->disableSAX = 1;
3971 if (attrName != NULL)
3972 xmlFree(attrName);
3973 if (defaultValue != NULL)
3974 xmlFree(defaultValue);
3975 break;
3976 }
3977 SKIP_BLANKS;
3978
3979 type = xmlParseAttributeType(ctxt, &tree);
3980 if (type <= 0) {
3981 if (attrName != NULL)
3982 xmlFree(attrName);
3983 if (defaultValue != NULL)
3984 xmlFree(defaultValue);
3985 break;
3986 }
3987
3988 GROW;
3989 if (!IS_BLANK(CUR)) {
3990 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3992 ctxt->sax->error(ctxt->userData,
3993 "Space required after the attribute type\n");
3994 ctxt->wellFormed = 0;
3995 ctxt->disableSAX = 1;
3996 if (attrName != NULL)
3997 xmlFree(attrName);
3998 if (defaultValue != NULL)
3999 xmlFree(defaultValue);
4000 if (tree != NULL)
4001 xmlFreeEnumeration(tree);
4002 break;
4003 }
4004 SKIP_BLANKS;
4005
4006 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4007 if (def <= 0) {
4008 if (attrName != NULL)
4009 xmlFree(attrName);
4010 if (defaultValue != NULL)
4011 xmlFree(defaultValue);
4012 if (tree != NULL)
4013 xmlFreeEnumeration(tree);
4014 break;
4015 }
4016
4017 GROW;
4018 if (RAW != '>') {
4019 if (!IS_BLANK(CUR)) {
4020 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4022 ctxt->sax->error(ctxt->userData,
4023 "Space required after the attribute default value\n");
4024 ctxt->wellFormed = 0;
4025 ctxt->disableSAX = 1;
4026 if (attrName != NULL)
4027 xmlFree(attrName);
4028 if (defaultValue != NULL)
4029 xmlFree(defaultValue);
4030 if (tree != NULL)
4031 xmlFreeEnumeration(tree);
4032 break;
4033 }
4034 SKIP_BLANKS;
4035 }
4036 if (check == CUR_PTR) {
4037 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4039 ctxt->sax->error(ctxt->userData,
4040 "xmlParseAttributeListDecl: detected internal error\n");
4041 if (attrName != NULL)
4042 xmlFree(attrName);
4043 if (defaultValue != NULL)
4044 xmlFree(defaultValue);
4045 if (tree != NULL)
4046 xmlFreeEnumeration(tree);
4047 break;
4048 }
4049 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4050 (ctxt->sax->attributeDecl != NULL))
4051 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4052 type, def, defaultValue, tree);
4053 if (attrName != NULL)
4054 xmlFree(attrName);
4055 if (defaultValue != NULL)
4056 xmlFree(defaultValue);
4057 GROW;
4058 }
4059 if (RAW == '>') {
4060 if (input != ctxt->input) {
4061 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4062 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4063 ctxt->sax->error(ctxt->userData,
4064"Attribute list declaration doesn't start and stop in the same entity\n");
4065 ctxt->wellFormed = 0;
4066 ctxt->disableSAX = 1;
4067 }
4068 NEXT;
4069 }
4070
4071 xmlFree(elemName);
4072 }
4073}
4074
4075/**
4076 * xmlParseElementMixedContentDecl:
4077 * @ctxt: an XML parser context
4078 *
4079 * parse the declaration for a Mixed Element content
4080 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4081 *
4082 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4083 * '(' S? '#PCDATA' S? ')'
4084 *
4085 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4086 *
4087 * [ VC: No Duplicate Types ]
4088 * The same name must not appear more than once in a single
4089 * mixed-content declaration.
4090 *
4091 * returns: the list of the xmlElementContentPtr describing the element choices
4092 */
4093xmlElementContentPtr
4094xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
4095 xmlElementContentPtr ret = NULL, cur = NULL, n;
4096 xmlChar *elem = NULL;
4097
4098 GROW;
4099 if ((RAW == '#') && (NXT(1) == 'P') &&
4100 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4101 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4102 (NXT(6) == 'A')) {
4103 SKIP(7);
4104 SKIP_BLANKS;
4105 SHRINK;
4106 if (RAW == ')') {
4107 ctxt->entity = ctxt->input;
4108 NEXT;
4109 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4110 if (RAW == '*') {
4111 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4112 NEXT;
4113 }
4114 return(ret);
4115 }
4116 if ((RAW == '(') || (RAW == '|')) {
4117 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4118 if (ret == NULL) return(NULL);
4119 }
4120 while (RAW == '|') {
4121 NEXT;
4122 if (elem == NULL) {
4123 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4124 if (ret == NULL) return(NULL);
4125 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004126 if (cur != NULL)
4127 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004128 cur = ret;
4129 } else {
4130 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4131 if (n == NULL) return(NULL);
4132 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004133 if (n->c1 != NULL)
4134 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004135 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004136 if (n != NULL)
4137 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004138 cur = n;
4139 xmlFree(elem);
4140 }
4141 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004142 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004143 if (elem == NULL) {
4144 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4145 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4146 ctxt->sax->error(ctxt->userData,
4147 "xmlParseElementMixedContentDecl : Name expected\n");
4148 ctxt->wellFormed = 0;
4149 ctxt->disableSAX = 1;
4150 xmlFreeElementContent(cur);
4151 return(NULL);
4152 }
4153 SKIP_BLANKS;
4154 GROW;
4155 }
4156 if ((RAW == ')') && (NXT(1) == '*')) {
4157 if (elem != NULL) {
4158 cur->c2 = xmlNewElementContent(elem,
4159 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004160 if (cur->c2 != NULL)
4161 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004162 xmlFree(elem);
4163 }
4164 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4165 ctxt->entity = ctxt->input;
4166 SKIP(2);
4167 } else {
4168 if (elem != NULL) xmlFree(elem);
4169 xmlFreeElementContent(ret);
4170 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4172 ctxt->sax->error(ctxt->userData,
4173 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4174 ctxt->wellFormed = 0;
4175 ctxt->disableSAX = 1;
4176 return(NULL);
4177 }
4178
4179 } else {
4180 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4181 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4182 ctxt->sax->error(ctxt->userData,
4183 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4184 ctxt->wellFormed = 0;
4185 ctxt->disableSAX = 1;
4186 }
4187 return(ret);
4188}
4189
4190/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004191 * xmlParseElementChildrenContentD:
4192 * @ctxt: an XML parser context
4193 *
4194 * VMS version of xmlParseElementChildrenContentDecl()
4195 *
4196 * Returns the tree of xmlElementContentPtr describing the element
4197 * hierarchy.
4198 */
4199/**
Owen Taylor3473f882001-02-23 17:55:21 +00004200 * xmlParseElementChildrenContentDecl:
4201 * @ctxt: an XML parser context
4202 *
4203 * parse the declaration for a Mixed Element content
4204 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4205 *
4206 *
4207 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4208 *
4209 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4210 *
4211 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4212 *
4213 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4214 *
4215 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4216 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004217 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004218 * opening or closing parentheses in a choice, seq, or Mixed
4219 * construct is contained in the replacement text for a parameter
4220 * entity, both must be contained in the same replacement text. For
4221 * interoperability, if a parameter-entity reference appears in a
4222 * choice, seq, or Mixed construct, its replacement text should not
4223 * be empty, and neither the first nor last non-blank character of
4224 * the replacement text should be a connector (| or ,).
4225 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004226 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004227 * hierarchy.
4228 */
4229xmlElementContentPtr
4230#ifdef VMS
4231xmlParseElementChildrenContentD
4232#else
4233xmlParseElementChildrenContentDecl
4234#endif
4235(xmlParserCtxtPtr ctxt) {
4236 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4237 xmlChar *elem;
4238 xmlChar type = 0;
4239
4240 SKIP_BLANKS;
4241 GROW;
4242 if (RAW == '(') {
4243 /* Recurse on first child */
4244 NEXT;
4245 SKIP_BLANKS;
4246 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4247 SKIP_BLANKS;
4248 GROW;
4249 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004250 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004251 if (elem == NULL) {
4252 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4254 ctxt->sax->error(ctxt->userData,
4255 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4256 ctxt->wellFormed = 0;
4257 ctxt->disableSAX = 1;
4258 return(NULL);
4259 }
4260 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4261 GROW;
4262 if (RAW == '?') {
4263 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4264 NEXT;
4265 } else if (RAW == '*') {
4266 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4267 NEXT;
4268 } else if (RAW == '+') {
4269 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4270 NEXT;
4271 } else {
4272 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4273 }
4274 xmlFree(elem);
4275 GROW;
4276 }
4277 SKIP_BLANKS;
4278 SHRINK;
4279 while (RAW != ')') {
4280 /*
4281 * Each loop we parse one separator and one element.
4282 */
4283 if (RAW == ',') {
4284 if (type == 0) type = CUR;
4285
4286 /*
4287 * Detect "Name | Name , Name" error
4288 */
4289 else if (type != CUR) {
4290 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4292 ctxt->sax->error(ctxt->userData,
4293 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4294 type);
4295 ctxt->wellFormed = 0;
4296 ctxt->disableSAX = 1;
4297 if ((op != NULL) && (op != ret))
4298 xmlFreeElementContent(op);
4299 if ((last != NULL) && (last != ret) &&
4300 (last != ret->c1) && (last != ret->c2))
4301 xmlFreeElementContent(last);
4302 if (ret != NULL)
4303 xmlFreeElementContent(ret);
4304 return(NULL);
4305 }
4306 NEXT;
4307
4308 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4309 if (op == NULL) {
4310 xmlFreeElementContent(ret);
4311 return(NULL);
4312 }
4313 if (last == NULL) {
4314 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004315 if (ret != NULL)
4316 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004317 ret = cur = op;
4318 } else {
4319 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004320 if (op != NULL)
4321 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004322 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004323 if (last != NULL)
4324 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004325 cur =op;
4326 last = NULL;
4327 }
4328 } else if (RAW == '|') {
4329 if (type == 0) type = CUR;
4330
4331 /*
4332 * Detect "Name , Name | Name" error
4333 */
4334 else if (type != CUR) {
4335 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4337 ctxt->sax->error(ctxt->userData,
4338 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4339 type);
4340 ctxt->wellFormed = 0;
4341 ctxt->disableSAX = 1;
4342 if ((op != NULL) && (op != ret) && (op != last))
4343 xmlFreeElementContent(op);
4344 if ((last != NULL) && (last != ret) &&
4345 (last != ret->c1) && (last != ret->c2))
4346 xmlFreeElementContent(last);
4347 if (ret != NULL)
4348 xmlFreeElementContent(ret);
4349 return(NULL);
4350 }
4351 NEXT;
4352
4353 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4354 if (op == NULL) {
4355 if ((op != NULL) && (op != ret))
4356 xmlFreeElementContent(op);
4357 if ((last != NULL) && (last != ret) &&
4358 (last != ret->c1) && (last != ret->c2))
4359 xmlFreeElementContent(last);
4360 if (ret != NULL)
4361 xmlFreeElementContent(ret);
4362 return(NULL);
4363 }
4364 if (last == NULL) {
4365 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004366 if (ret != NULL)
4367 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004368 ret = cur = op;
4369 } else {
4370 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004371 if (op != NULL)
4372 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004373 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004374 if (last != NULL)
4375 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004376 cur =op;
4377 last = NULL;
4378 }
4379 } else {
4380 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4382 ctxt->sax->error(ctxt->userData,
4383 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4384 ctxt->wellFormed = 0;
4385 ctxt->disableSAX = 1;
4386 if ((op != NULL) && (op != ret))
4387 xmlFreeElementContent(op);
4388 if ((last != NULL) && (last != ret) &&
4389 (last != ret->c1) && (last != ret->c2))
4390 xmlFreeElementContent(last);
4391 if (ret != NULL)
4392 xmlFreeElementContent(ret);
4393 return(NULL);
4394 }
4395 GROW;
4396 SKIP_BLANKS;
4397 GROW;
4398 if (RAW == '(') {
4399 /* Recurse on second child */
4400 NEXT;
4401 SKIP_BLANKS;
4402 last = xmlParseElementChildrenContentDecl(ctxt);
4403 SKIP_BLANKS;
4404 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004405 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004406 if (elem == NULL) {
4407 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4409 ctxt->sax->error(ctxt->userData,
4410 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4411 ctxt->wellFormed = 0;
4412 ctxt->disableSAX = 1;
4413 if ((op != NULL) && (op != ret))
4414 xmlFreeElementContent(op);
4415 if ((last != NULL) && (last != ret) &&
4416 (last != ret->c1) && (last != ret->c2))
4417 xmlFreeElementContent(last);
4418 if (ret != NULL)
4419 xmlFreeElementContent(ret);
4420 return(NULL);
4421 }
4422 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4423 xmlFree(elem);
4424 if (RAW == '?') {
4425 last->ocur = XML_ELEMENT_CONTENT_OPT;
4426 NEXT;
4427 } else if (RAW == '*') {
4428 last->ocur = XML_ELEMENT_CONTENT_MULT;
4429 NEXT;
4430 } else if (RAW == '+') {
4431 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4432 NEXT;
4433 } else {
4434 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4435 }
4436 }
4437 SKIP_BLANKS;
4438 GROW;
4439 }
4440 if ((cur != NULL) && (last != NULL)) {
4441 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004442 if (last != NULL)
4443 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004444 }
4445 ctxt->entity = ctxt->input;
4446 NEXT;
4447 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004448 if (ret != NULL)
4449 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004450 NEXT;
4451 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004452 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004453 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004454 cur = ret;
4455 /*
4456 * Some normalization:
4457 * (a | b* | c?)* == (a | b | c)*
4458 */
4459 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4460 if ((cur->c1 != NULL) &&
4461 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4462 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4463 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4464 if ((cur->c2 != NULL) &&
4465 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4466 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4467 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4468 cur = cur->c2;
4469 }
4470 }
Owen Taylor3473f882001-02-23 17:55:21 +00004471 NEXT;
4472 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004473 if (ret != NULL) {
4474 int found = 0;
4475
Daniel Veillarde470df72001-04-18 21:41:07 +00004476 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004477 /*
4478 * Some normalization:
4479 * (a | b*)+ == (a | b)*
4480 * (a | b?)+ == (a | b)*
4481 */
4482 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4483 if ((cur->c1 != NULL) &&
4484 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4485 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4486 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4487 found = 1;
4488 }
4489 if ((cur->c2 != NULL) &&
4490 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4491 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4492 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4493 found = 1;
4494 }
4495 cur = cur->c2;
4496 }
4497 if (found)
4498 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4499 }
Owen Taylor3473f882001-02-23 17:55:21 +00004500 NEXT;
4501 }
4502 return(ret);
4503}
4504
4505/**
4506 * xmlParseElementContentDecl:
4507 * @ctxt: an XML parser context
4508 * @name: the name of the element being defined.
4509 * @result: the Element Content pointer will be stored here if any
4510 *
4511 * parse the declaration for an Element content either Mixed or Children,
4512 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4513 *
4514 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4515 *
4516 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4517 */
4518
4519int
4520xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4521 xmlElementContentPtr *result) {
4522
4523 xmlElementContentPtr tree = NULL;
4524 xmlParserInputPtr input = ctxt->input;
4525 int res;
4526
4527 *result = NULL;
4528
4529 if (RAW != '(') {
4530 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4532 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004533 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004534 ctxt->wellFormed = 0;
4535 ctxt->disableSAX = 1;
4536 return(-1);
4537 }
4538 NEXT;
4539 GROW;
4540 SKIP_BLANKS;
4541 if ((RAW == '#') && (NXT(1) == 'P') &&
4542 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4543 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4544 (NXT(6) == 'A')) {
4545 tree = xmlParseElementMixedContentDecl(ctxt);
4546 res = XML_ELEMENT_TYPE_MIXED;
4547 } else {
4548 tree = xmlParseElementChildrenContentDecl(ctxt);
4549 res = XML_ELEMENT_TYPE_ELEMENT;
4550 }
4551 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4552 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4554 ctxt->sax->error(ctxt->userData,
4555"Element content declaration doesn't start and stop in the same entity\n");
4556 ctxt->wellFormed = 0;
4557 ctxt->disableSAX = 1;
4558 }
4559 SKIP_BLANKS;
4560 *result = tree;
4561 return(res);
4562}
4563
4564/**
4565 * xmlParseElementDecl:
4566 * @ctxt: an XML parser context
4567 *
4568 * parse an Element declaration.
4569 *
4570 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4571 *
4572 * [ VC: Unique Element Type Declaration ]
4573 * No element type may be declared more than once
4574 *
4575 * Returns the type of the element, or -1 in case of error
4576 */
4577int
4578xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4579 xmlChar *name;
4580 int ret = -1;
4581 xmlElementContentPtr content = NULL;
4582
4583 GROW;
4584 if ((RAW == '<') && (NXT(1) == '!') &&
4585 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4586 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4587 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4588 (NXT(8) == 'T')) {
4589 xmlParserInputPtr input = ctxt->input;
4590
4591 SKIP(9);
4592 if (!IS_BLANK(CUR)) {
4593 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4595 ctxt->sax->error(ctxt->userData,
4596 "Space required after 'ELEMENT'\n");
4597 ctxt->wellFormed = 0;
4598 ctxt->disableSAX = 1;
4599 }
4600 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004601 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004602 if (name == NULL) {
4603 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4605 ctxt->sax->error(ctxt->userData,
4606 "xmlParseElementDecl: no name for Element\n");
4607 ctxt->wellFormed = 0;
4608 ctxt->disableSAX = 1;
4609 return(-1);
4610 }
4611 while ((RAW == 0) && (ctxt->inputNr > 1))
4612 xmlPopInput(ctxt);
4613 if (!IS_BLANK(CUR)) {
4614 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4616 ctxt->sax->error(ctxt->userData,
4617 "Space required after the element name\n");
4618 ctxt->wellFormed = 0;
4619 ctxt->disableSAX = 1;
4620 }
4621 SKIP_BLANKS;
4622 if ((RAW == 'E') && (NXT(1) == 'M') &&
4623 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4624 (NXT(4) == 'Y')) {
4625 SKIP(5);
4626 /*
4627 * Element must always be empty.
4628 */
4629 ret = XML_ELEMENT_TYPE_EMPTY;
4630 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4631 (NXT(2) == 'Y')) {
4632 SKIP(3);
4633 /*
4634 * Element is a generic container.
4635 */
4636 ret = XML_ELEMENT_TYPE_ANY;
4637 } else if (RAW == '(') {
4638 ret = xmlParseElementContentDecl(ctxt, name, &content);
4639 } else {
4640 /*
4641 * [ WFC: PEs in Internal Subset ] error handling.
4642 */
4643 if ((RAW == '%') && (ctxt->external == 0) &&
4644 (ctxt->inputNr == 1)) {
4645 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4647 ctxt->sax->error(ctxt->userData,
4648 "PEReference: forbidden within markup decl in internal subset\n");
4649 } else {
4650 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4652 ctxt->sax->error(ctxt->userData,
4653 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4654 }
4655 ctxt->wellFormed = 0;
4656 ctxt->disableSAX = 1;
4657 if (name != NULL) xmlFree(name);
4658 return(-1);
4659 }
4660
4661 SKIP_BLANKS;
4662 /*
4663 * Pop-up of finished entities.
4664 */
4665 while ((RAW == 0) && (ctxt->inputNr > 1))
4666 xmlPopInput(ctxt);
4667 SKIP_BLANKS;
4668
4669 if (RAW != '>') {
4670 ctxt->errNo = XML_ERR_GT_REQUIRED;
4671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4672 ctxt->sax->error(ctxt->userData,
4673 "xmlParseElementDecl: expected '>' at the end\n");
4674 ctxt->wellFormed = 0;
4675 ctxt->disableSAX = 1;
4676 } else {
4677 if (input != ctxt->input) {
4678 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4680 ctxt->sax->error(ctxt->userData,
4681"Element declaration doesn't start and stop in the same entity\n");
4682 ctxt->wellFormed = 0;
4683 ctxt->disableSAX = 1;
4684 }
4685
4686 NEXT;
4687 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4688 (ctxt->sax->elementDecl != NULL))
4689 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4690 content);
4691 }
4692 if (content != NULL) {
4693 xmlFreeElementContent(content);
4694 }
4695 if (name != NULL) {
4696 xmlFree(name);
4697 }
4698 }
4699 return(ret);
4700}
4701
4702/**
Owen Taylor3473f882001-02-23 17:55:21 +00004703 * xmlParseConditionalSections
4704 * @ctxt: an XML parser context
4705 *
4706 * [61] conditionalSect ::= includeSect | ignoreSect
4707 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4708 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4709 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4710 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4711 */
4712
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004713static void
Owen Taylor3473f882001-02-23 17:55:21 +00004714xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4715 SKIP(3);
4716 SKIP_BLANKS;
4717 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4718 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4719 (NXT(6) == 'E')) {
4720 SKIP(7);
4721 SKIP_BLANKS;
4722 if (RAW != '[') {
4723 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4724 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4725 ctxt->sax->error(ctxt->userData,
4726 "XML conditional section '[' expected\n");
4727 ctxt->wellFormed = 0;
4728 ctxt->disableSAX = 1;
4729 } else {
4730 NEXT;
4731 }
4732 if (xmlParserDebugEntities) {
4733 if ((ctxt->input != NULL) && (ctxt->input->filename))
4734 xmlGenericError(xmlGenericErrorContext,
4735 "%s(%d): ", ctxt->input->filename,
4736 ctxt->input->line);
4737 xmlGenericError(xmlGenericErrorContext,
4738 "Entering INCLUDE Conditional Section\n");
4739 }
4740
4741 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4742 (NXT(2) != '>'))) {
4743 const xmlChar *check = CUR_PTR;
4744 int cons = ctxt->input->consumed;
4745 int tok = ctxt->token;
4746
4747 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4748 xmlParseConditionalSections(ctxt);
4749 } else if (IS_BLANK(CUR)) {
4750 NEXT;
4751 } else if (RAW == '%') {
4752 xmlParsePEReference(ctxt);
4753 } else
4754 xmlParseMarkupDecl(ctxt);
4755
4756 /*
4757 * Pop-up of finished entities.
4758 */
4759 while ((RAW == 0) && (ctxt->inputNr > 1))
4760 xmlPopInput(ctxt);
4761
4762 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4763 (tok == ctxt->token)) {
4764 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4766 ctxt->sax->error(ctxt->userData,
4767 "Content error in the external subset\n");
4768 ctxt->wellFormed = 0;
4769 ctxt->disableSAX = 1;
4770 break;
4771 }
4772 }
4773 if (xmlParserDebugEntities) {
4774 if ((ctxt->input != NULL) && (ctxt->input->filename))
4775 xmlGenericError(xmlGenericErrorContext,
4776 "%s(%d): ", ctxt->input->filename,
4777 ctxt->input->line);
4778 xmlGenericError(xmlGenericErrorContext,
4779 "Leaving INCLUDE Conditional Section\n");
4780 }
4781
4782 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4783 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4784 int state;
4785 int instate;
4786 int depth = 0;
4787
4788 SKIP(6);
4789 SKIP_BLANKS;
4790 if (RAW != '[') {
4791 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4793 ctxt->sax->error(ctxt->userData,
4794 "XML conditional section '[' expected\n");
4795 ctxt->wellFormed = 0;
4796 ctxt->disableSAX = 1;
4797 } else {
4798 NEXT;
4799 }
4800 if (xmlParserDebugEntities) {
4801 if ((ctxt->input != NULL) && (ctxt->input->filename))
4802 xmlGenericError(xmlGenericErrorContext,
4803 "%s(%d): ", ctxt->input->filename,
4804 ctxt->input->line);
4805 xmlGenericError(xmlGenericErrorContext,
4806 "Entering IGNORE Conditional Section\n");
4807 }
4808
4809 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004810 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004811 * But disable SAX event generating DTD building in the meantime
4812 */
4813 state = ctxt->disableSAX;
4814 instate = ctxt->instate;
4815 ctxt->disableSAX = 1;
4816 ctxt->instate = XML_PARSER_IGNORE;
4817
4818 while (depth >= 0) {
4819 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4820 depth++;
4821 SKIP(3);
4822 continue;
4823 }
4824 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4825 if (--depth >= 0) SKIP(3);
4826 continue;
4827 }
4828 NEXT;
4829 continue;
4830 }
4831
4832 ctxt->disableSAX = state;
4833 ctxt->instate = instate;
4834
4835 if (xmlParserDebugEntities) {
4836 if ((ctxt->input != NULL) && (ctxt->input->filename))
4837 xmlGenericError(xmlGenericErrorContext,
4838 "%s(%d): ", ctxt->input->filename,
4839 ctxt->input->line);
4840 xmlGenericError(xmlGenericErrorContext,
4841 "Leaving IGNORE Conditional Section\n");
4842 }
4843
4844 } else {
4845 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4847 ctxt->sax->error(ctxt->userData,
4848 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4849 ctxt->wellFormed = 0;
4850 ctxt->disableSAX = 1;
4851 }
4852
4853 if (RAW == 0)
4854 SHRINK;
4855
4856 if (RAW == 0) {
4857 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4859 ctxt->sax->error(ctxt->userData,
4860 "XML conditional section not closed\n");
4861 ctxt->wellFormed = 0;
4862 ctxt->disableSAX = 1;
4863 } else {
4864 SKIP(3);
4865 }
4866}
4867
4868/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004869 * xmlParseMarkupDecl:
4870 * @ctxt: an XML parser context
4871 *
4872 * parse Markup declarations
4873 *
4874 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4875 * NotationDecl | PI | Comment
4876 *
4877 * [ VC: Proper Declaration/PE Nesting ]
4878 * Parameter-entity replacement text must be properly nested with
4879 * markup declarations. That is to say, if either the first character
4880 * or the last character of a markup declaration (markupdecl above) is
4881 * contained in the replacement text for a parameter-entity reference,
4882 * both must be contained in the same replacement text.
4883 *
4884 * [ WFC: PEs in Internal Subset ]
4885 * In the internal DTD subset, parameter-entity references can occur
4886 * only where markup declarations can occur, not within markup declarations.
4887 * (This does not apply to references that occur in external parameter
4888 * entities or to the external subset.)
4889 */
4890void
4891xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4892 GROW;
4893 xmlParseElementDecl(ctxt);
4894 xmlParseAttributeListDecl(ctxt);
4895 xmlParseEntityDecl(ctxt);
4896 xmlParseNotationDecl(ctxt);
4897 xmlParsePI(ctxt);
4898 xmlParseComment(ctxt);
4899 /*
4900 * This is only for internal subset. On external entities,
4901 * the replacement is done before parsing stage
4902 */
4903 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4904 xmlParsePEReference(ctxt);
4905
4906 /*
4907 * Conditional sections are allowed from entities included
4908 * by PE References in the internal subset.
4909 */
4910 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4911 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4912 xmlParseConditionalSections(ctxt);
4913 }
4914 }
4915
4916 ctxt->instate = XML_PARSER_DTD;
4917}
4918
4919/**
4920 * xmlParseTextDecl:
4921 * @ctxt: an XML parser context
4922 *
4923 * parse an XML declaration header for external entities
4924 *
4925 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4926 *
4927 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4928 */
4929
4930void
4931xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4932 xmlChar *version;
4933
4934 /*
4935 * We know that '<?xml' is here.
4936 */
4937 if ((RAW == '<') && (NXT(1) == '?') &&
4938 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4939 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4940 SKIP(5);
4941 } else {
4942 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4944 ctxt->sax->error(ctxt->userData,
4945 "Text declaration '<?xml' required\n");
4946 ctxt->wellFormed = 0;
4947 ctxt->disableSAX = 1;
4948
4949 return;
4950 }
4951
4952 if (!IS_BLANK(CUR)) {
4953 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4955 ctxt->sax->error(ctxt->userData,
4956 "Space needed after '<?xml'\n");
4957 ctxt->wellFormed = 0;
4958 ctxt->disableSAX = 1;
4959 }
4960 SKIP_BLANKS;
4961
4962 /*
4963 * We may have the VersionInfo here.
4964 */
4965 version = xmlParseVersionInfo(ctxt);
4966 if (version == NULL)
4967 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00004968 else {
4969 if (!IS_BLANK(CUR)) {
4970 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4972 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4973 ctxt->wellFormed = 0;
4974 ctxt->disableSAX = 1;
4975 }
4976 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004977 ctxt->input->version = version;
4978
4979 /*
4980 * We must have the encoding declaration
4981 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004982 xmlParseEncodingDecl(ctxt);
4983 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4984 /*
4985 * The XML REC instructs us to stop parsing right here
4986 */
4987 return;
4988 }
4989
4990 SKIP_BLANKS;
4991 if ((RAW == '?') && (NXT(1) == '>')) {
4992 SKIP(2);
4993 } else if (RAW == '>') {
4994 /* Deprecated old WD ... */
4995 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4997 ctxt->sax->error(ctxt->userData,
4998 "XML declaration must end-up with '?>'\n");
4999 ctxt->wellFormed = 0;
5000 ctxt->disableSAX = 1;
5001 NEXT;
5002 } else {
5003 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5005 ctxt->sax->error(ctxt->userData,
5006 "parsing XML declaration: '?>' expected\n");
5007 ctxt->wellFormed = 0;
5008 ctxt->disableSAX = 1;
5009 MOVETO_ENDTAG(CUR_PTR);
5010 NEXT;
5011 }
5012}
5013
5014/**
Owen Taylor3473f882001-02-23 17:55:21 +00005015 * xmlParseExternalSubset:
5016 * @ctxt: an XML parser context
5017 * @ExternalID: the external identifier
5018 * @SystemID: the system identifier (or URL)
5019 *
5020 * parse Markup declarations from an external subset
5021 *
5022 * [30] extSubset ::= textDecl? extSubsetDecl
5023 *
5024 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5025 */
5026void
5027xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5028 const xmlChar *SystemID) {
5029 GROW;
5030 if ((RAW == '<') && (NXT(1) == '?') &&
5031 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5032 (NXT(4) == 'l')) {
5033 xmlParseTextDecl(ctxt);
5034 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5035 /*
5036 * The XML REC instructs us to stop parsing right here
5037 */
5038 ctxt->instate = XML_PARSER_EOF;
5039 return;
5040 }
5041 }
5042 if (ctxt->myDoc == NULL) {
5043 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5044 }
5045 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5046 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5047
5048 ctxt->instate = XML_PARSER_DTD;
5049 ctxt->external = 1;
5050 while (((RAW == '<') && (NXT(1) == '?')) ||
5051 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005052 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005053 const xmlChar *check = CUR_PTR;
5054 int cons = ctxt->input->consumed;
5055 int tok = ctxt->token;
5056
5057 GROW;
5058 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5059 xmlParseConditionalSections(ctxt);
5060 } else if (IS_BLANK(CUR)) {
5061 NEXT;
5062 } else if (RAW == '%') {
5063 xmlParsePEReference(ctxt);
5064 } else
5065 xmlParseMarkupDecl(ctxt);
5066
5067 /*
5068 * Pop-up of finished entities.
5069 */
5070 while ((RAW == 0) && (ctxt->inputNr > 1))
5071 xmlPopInput(ctxt);
5072
5073 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5074 (tok == ctxt->token)) {
5075 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5076 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5077 ctxt->sax->error(ctxt->userData,
5078 "Content error in the external subset\n");
5079 ctxt->wellFormed = 0;
5080 ctxt->disableSAX = 1;
5081 break;
5082 }
5083 }
5084
5085 if (RAW != 0) {
5086 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5088 ctxt->sax->error(ctxt->userData,
5089 "Extra content at the end of the document\n");
5090 ctxt->wellFormed = 0;
5091 ctxt->disableSAX = 1;
5092 }
5093
5094}
5095
5096/**
5097 * xmlParseReference:
5098 * @ctxt: an XML parser context
5099 *
5100 * parse and handle entity references in content, depending on the SAX
5101 * interface, this may end-up in a call to character() if this is a
5102 * CharRef, a predefined entity, if there is no reference() callback.
5103 * or if the parser was asked to switch to that mode.
5104 *
5105 * [67] Reference ::= EntityRef | CharRef
5106 */
5107void
5108xmlParseReference(xmlParserCtxtPtr ctxt) {
5109 xmlEntityPtr ent;
5110 xmlChar *val;
5111 if (RAW != '&') return;
5112
5113 if (NXT(1) == '#') {
5114 int i = 0;
5115 xmlChar out[10];
5116 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005117 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005118
5119 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5120 /*
5121 * So we are using non-UTF-8 buffers
5122 * Check that the char fit on 8bits, if not
5123 * generate a CharRef.
5124 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005125 if (value <= 0xFF) {
5126 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005127 out[1] = 0;
5128 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5129 (!ctxt->disableSAX))
5130 ctxt->sax->characters(ctxt->userData, out, 1);
5131 } else {
5132 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005133 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005134 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005135 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005136 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5137 (!ctxt->disableSAX))
5138 ctxt->sax->reference(ctxt->userData, out);
5139 }
5140 } else {
5141 /*
5142 * Just encode the value in UTF-8
5143 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005144 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005145 out[i] = 0;
5146 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5147 (!ctxt->disableSAX))
5148 ctxt->sax->characters(ctxt->userData, out, i);
5149 }
5150 } else {
5151 ent = xmlParseEntityRef(ctxt);
5152 if (ent == NULL) return;
5153 if ((ent->name != NULL) &&
5154 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5155 xmlNodePtr list = NULL;
5156 int ret;
5157
5158
5159 /*
5160 * The first reference to the entity trigger a parsing phase
5161 * where the ent->children is filled with the result from
5162 * the parsing.
5163 */
5164 if (ent->children == NULL) {
5165 xmlChar *value;
5166 value = ent->content;
5167
5168 /*
5169 * Check that this entity is well formed
5170 */
5171 if ((value != NULL) &&
5172 (value[1] == 0) && (value[0] == '<') &&
5173 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5174 /*
5175 * DONE: get definite answer on this !!!
5176 * Lots of entity decls are used to declare a single
5177 * char
5178 * <!ENTITY lt "<">
5179 * Which seems to be valid since
5180 * 2.4: The ampersand character (&) and the left angle
5181 * bracket (<) may appear in their literal form only
5182 * when used ... They are also legal within the literal
5183 * entity value of an internal entity declaration;i
5184 * see "4.3.2 Well-Formed Parsed Entities".
5185 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5186 * Looking at the OASIS test suite and James Clark
5187 * tests, this is broken. However the XML REC uses
5188 * it. Is the XML REC not well-formed ????
5189 * This is a hack to avoid this problem
5190 *
5191 * ANSWER: since lt gt amp .. are already defined,
5192 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005193 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005194 * is lousy but acceptable.
5195 */
5196 list = xmlNewDocText(ctxt->myDoc, value);
5197 if (list != NULL) {
5198 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5199 (ent->children == NULL)) {
5200 ent->children = list;
5201 ent->last = list;
5202 list->parent = (xmlNodePtr) ent;
5203 } else {
5204 xmlFreeNodeList(list);
5205 }
5206 } else if (list != NULL) {
5207 xmlFreeNodeList(list);
5208 }
5209 } else {
5210 /*
5211 * 4.3.2: An internal general parsed entity is well-formed
5212 * if its replacement text matches the production labeled
5213 * content.
5214 */
5215 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5216 ctxt->depth++;
5217 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5218 ctxt->sax, NULL, ctxt->depth,
5219 value, &list);
5220 ctxt->depth--;
5221 } else if (ent->etype ==
5222 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5223 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005224 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005225 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005226 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005227 ctxt->depth--;
5228 } else {
5229 ret = -1;
5230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5231 ctxt->sax->error(ctxt->userData,
5232 "Internal: invalid entity type\n");
5233 }
5234 if (ret == XML_ERR_ENTITY_LOOP) {
5235 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5237 ctxt->sax->error(ctxt->userData,
5238 "Detected entity reference loop\n");
5239 ctxt->wellFormed = 0;
5240 ctxt->disableSAX = 1;
5241 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005242 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5243 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005244 (ent->children == NULL)) {
5245 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005246 if (ctxt->replaceEntities) {
5247 /*
5248 * Prune it directly in the generated document
5249 * except for single text nodes.
5250 */
5251 if ((list->type == XML_TEXT_NODE) &&
5252 (list->next == NULL)) {
5253 list->parent = (xmlNodePtr) ent;
5254 list = NULL;
5255 } else {
5256 while (list != NULL) {
5257 list->parent = (xmlNodePtr) ctxt->node;
5258 if (list->next == NULL)
5259 ent->last = list;
5260 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005261 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005262 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005263 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5264 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005265 }
5266 } else {
5267 while (list != NULL) {
5268 list->parent = (xmlNodePtr) ent;
5269 if (list->next == NULL)
5270 ent->last = list;
5271 list = list->next;
5272 }
Owen Taylor3473f882001-02-23 17:55:21 +00005273 }
5274 } else {
5275 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005276 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005277 }
5278 } else if (ret > 0) {
5279 ctxt->errNo = ret;
5280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5281 ctxt->sax->error(ctxt->userData,
5282 "Entity value required\n");
5283 ctxt->wellFormed = 0;
5284 ctxt->disableSAX = 1;
5285 } else if (list != NULL) {
5286 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005287 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005288 }
5289 }
5290 }
5291 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5292 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5293 /*
5294 * Create a node.
5295 */
5296 ctxt->sax->reference(ctxt->userData, ent->name);
5297 return;
5298 } else if (ctxt->replaceEntities) {
5299 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5300 /*
5301 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005302 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005303 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005304 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005305 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005306 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005307 cur = ent->children;
5308 while (cur != NULL) {
5309 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005310 if (firstChild == NULL){
5311 firstChild = new;
5312 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005313 xmlAddChild(ctxt->node, new);
5314 if (cur == ent->last)
5315 break;
5316 cur = cur->next;
5317 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005318 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5319 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005320 } else {
5321 /*
5322 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005323 * node with a possible previous text one which
5324 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005325 */
5326 if (ent->children->type == XML_TEXT_NODE)
5327 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5328 if ((ent->last != ent->children) &&
5329 (ent->last->type == XML_TEXT_NODE))
5330 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5331 xmlAddChildList(ctxt->node, ent->children);
5332 }
5333
Owen Taylor3473f882001-02-23 17:55:21 +00005334 /*
5335 * This is to avoid a nasty side effect, see
5336 * characters() in SAX.c
5337 */
5338 ctxt->nodemem = 0;
5339 ctxt->nodelen = 0;
5340 return;
5341 } else {
5342 /*
5343 * Probably running in SAX mode
5344 */
5345 xmlParserInputPtr input;
5346
5347 input = xmlNewEntityInputStream(ctxt, ent);
5348 xmlPushInput(ctxt, input);
5349 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5350 (RAW == '<') && (NXT(1) == '?') &&
5351 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5352 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5353 xmlParseTextDecl(ctxt);
5354 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5355 /*
5356 * The XML REC instructs us to stop parsing right here
5357 */
5358 ctxt->instate = XML_PARSER_EOF;
5359 return;
5360 }
5361 if (input->standalone == 1) {
5362 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5364 ctxt->sax->error(ctxt->userData,
5365 "external parsed entities cannot be standalone\n");
5366 ctxt->wellFormed = 0;
5367 ctxt->disableSAX = 1;
5368 }
5369 }
5370 return;
5371 }
5372 }
5373 } else {
5374 val = ent->content;
5375 if (val == NULL) return;
5376 /*
5377 * inline the entity.
5378 */
5379 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5380 (!ctxt->disableSAX))
5381 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5382 }
5383 }
5384}
5385
5386/**
5387 * xmlParseEntityRef:
5388 * @ctxt: an XML parser context
5389 *
5390 * parse ENTITY references declarations
5391 *
5392 * [68] EntityRef ::= '&' Name ';'
5393 *
5394 * [ WFC: Entity Declared ]
5395 * In a document without any DTD, a document with only an internal DTD
5396 * subset which contains no parameter entity references, or a document
5397 * with "standalone='yes'", the Name given in the entity reference
5398 * must match that in an entity declaration, except that well-formed
5399 * documents need not declare any of the following entities: amp, lt,
5400 * gt, apos, quot. The declaration of a parameter entity must precede
5401 * any reference to it. Similarly, the declaration of a general entity
5402 * must precede any reference to it which appears in a default value in an
5403 * attribute-list declaration. Note that if entities are declared in the
5404 * external subset or in external parameter entities, a non-validating
5405 * processor is not obligated to read and process their declarations;
5406 * for such documents, the rule that an entity must be declared is a
5407 * well-formedness constraint only if standalone='yes'.
5408 *
5409 * [ WFC: Parsed Entity ]
5410 * An entity reference must not contain the name of an unparsed entity
5411 *
5412 * Returns the xmlEntityPtr if found, or NULL otherwise.
5413 */
5414xmlEntityPtr
5415xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5416 xmlChar *name;
5417 xmlEntityPtr ent = NULL;
5418
5419 GROW;
5420
5421 if (RAW == '&') {
5422 NEXT;
5423 name = xmlParseName(ctxt);
5424 if (name == NULL) {
5425 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5427 ctxt->sax->error(ctxt->userData,
5428 "xmlParseEntityRef: no name\n");
5429 ctxt->wellFormed = 0;
5430 ctxt->disableSAX = 1;
5431 } else {
5432 if (RAW == ';') {
5433 NEXT;
5434 /*
5435 * Ask first SAX for entity resolution, otherwise try the
5436 * predefined set.
5437 */
5438 if (ctxt->sax != NULL) {
5439 if (ctxt->sax->getEntity != NULL)
5440 ent = ctxt->sax->getEntity(ctxt->userData, name);
5441 if (ent == NULL)
5442 ent = xmlGetPredefinedEntity(name);
5443 }
5444 /*
5445 * [ WFC: Entity Declared ]
5446 * In a document without any DTD, a document with only an
5447 * internal DTD subset which contains no parameter entity
5448 * references, or a document with "standalone='yes'", the
5449 * Name given in the entity reference must match that in an
5450 * entity declaration, except that well-formed documents
5451 * need not declare any of the following entities: amp, lt,
5452 * gt, apos, quot.
5453 * The declaration of a parameter entity must precede any
5454 * reference to it.
5455 * Similarly, the declaration of a general entity must
5456 * precede any reference to it which appears in a default
5457 * value in an attribute-list declaration. Note that if
5458 * entities are declared in the external subset or in
5459 * external parameter entities, a non-validating processor
5460 * is not obligated to read and process their declarations;
5461 * for such documents, the rule that an entity must be
5462 * declared is a well-formedness constraint only if
5463 * standalone='yes'.
5464 */
5465 if (ent == NULL) {
5466 if ((ctxt->standalone == 1) ||
5467 ((ctxt->hasExternalSubset == 0) &&
5468 (ctxt->hasPErefs == 0))) {
5469 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5471 ctxt->sax->error(ctxt->userData,
5472 "Entity '%s' not defined\n", name);
5473 ctxt->wellFormed = 0;
5474 ctxt->disableSAX = 1;
5475 } else {
5476 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005478 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005479 "Entity '%s' not defined\n", name);
5480 }
5481 }
5482
5483 /*
5484 * [ WFC: Parsed Entity ]
5485 * An entity reference must not contain the name of an
5486 * unparsed entity
5487 */
5488 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5489 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5491 ctxt->sax->error(ctxt->userData,
5492 "Entity reference to unparsed entity %s\n", name);
5493 ctxt->wellFormed = 0;
5494 ctxt->disableSAX = 1;
5495 }
5496
5497 /*
5498 * [ WFC: No External Entity References ]
5499 * Attribute values cannot contain direct or indirect
5500 * entity references to external entities.
5501 */
5502 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5503 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5504 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5506 ctxt->sax->error(ctxt->userData,
5507 "Attribute references external entity '%s'\n", name);
5508 ctxt->wellFormed = 0;
5509 ctxt->disableSAX = 1;
5510 }
5511 /*
5512 * [ WFC: No < in Attribute Values ]
5513 * The replacement text of any entity referred to directly or
5514 * indirectly in an attribute value (other than "&lt;") must
5515 * not contain a <.
5516 */
5517 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5518 (ent != NULL) &&
5519 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5520 (ent->content != NULL) &&
5521 (xmlStrchr(ent->content, '<'))) {
5522 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5524 ctxt->sax->error(ctxt->userData,
5525 "'<' in entity '%s' is not allowed in attributes values\n", name);
5526 ctxt->wellFormed = 0;
5527 ctxt->disableSAX = 1;
5528 }
5529
5530 /*
5531 * Internal check, no parameter entities here ...
5532 */
5533 else {
5534 switch (ent->etype) {
5535 case XML_INTERNAL_PARAMETER_ENTITY:
5536 case XML_EXTERNAL_PARAMETER_ENTITY:
5537 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5539 ctxt->sax->error(ctxt->userData,
5540 "Attempt to reference the parameter entity '%s'\n", name);
5541 ctxt->wellFormed = 0;
5542 ctxt->disableSAX = 1;
5543 break;
5544 default:
5545 break;
5546 }
5547 }
5548
5549 /*
5550 * [ WFC: No Recursion ]
5551 * A parsed entity must not contain a recursive reference
5552 * to itself, either directly or indirectly.
5553 * Done somewhere else
5554 */
5555
5556 } else {
5557 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5559 ctxt->sax->error(ctxt->userData,
5560 "xmlParseEntityRef: expecting ';'\n");
5561 ctxt->wellFormed = 0;
5562 ctxt->disableSAX = 1;
5563 }
5564 xmlFree(name);
5565 }
5566 }
5567 return(ent);
5568}
5569
5570/**
5571 * xmlParseStringEntityRef:
5572 * @ctxt: an XML parser context
5573 * @str: a pointer to an index in the string
5574 *
5575 * parse ENTITY references declarations, but this version parses it from
5576 * a string value.
5577 *
5578 * [68] EntityRef ::= '&' Name ';'
5579 *
5580 * [ WFC: Entity Declared ]
5581 * In a document without any DTD, a document with only an internal DTD
5582 * subset which contains no parameter entity references, or a document
5583 * with "standalone='yes'", the Name given in the entity reference
5584 * must match that in an entity declaration, except that well-formed
5585 * documents need not declare any of the following entities: amp, lt,
5586 * gt, apos, quot. The declaration of a parameter entity must precede
5587 * any reference to it. Similarly, the declaration of a general entity
5588 * must precede any reference to it which appears in a default value in an
5589 * attribute-list declaration. Note that if entities are declared in the
5590 * external subset or in external parameter entities, a non-validating
5591 * processor is not obligated to read and process their declarations;
5592 * for such documents, the rule that an entity must be declared is a
5593 * well-formedness constraint only if standalone='yes'.
5594 *
5595 * [ WFC: Parsed Entity ]
5596 * An entity reference must not contain the name of an unparsed entity
5597 *
5598 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5599 * is updated to the current location in the string.
5600 */
5601xmlEntityPtr
5602xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5603 xmlChar *name;
5604 const xmlChar *ptr;
5605 xmlChar cur;
5606 xmlEntityPtr ent = NULL;
5607
5608 if ((str == NULL) || (*str == NULL))
5609 return(NULL);
5610 ptr = *str;
5611 cur = *ptr;
5612 if (cur == '&') {
5613 ptr++;
5614 cur = *ptr;
5615 name = xmlParseStringName(ctxt, &ptr);
5616 if (name == NULL) {
5617 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5619 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005620 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005621 ctxt->wellFormed = 0;
5622 ctxt->disableSAX = 1;
5623 } else {
5624 if (*ptr == ';') {
5625 ptr++;
5626 /*
5627 * Ask first SAX for entity resolution, otherwise try the
5628 * predefined set.
5629 */
5630 if (ctxt->sax != NULL) {
5631 if (ctxt->sax->getEntity != NULL)
5632 ent = ctxt->sax->getEntity(ctxt->userData, name);
5633 if (ent == NULL)
5634 ent = xmlGetPredefinedEntity(name);
5635 }
5636 /*
5637 * [ WFC: Entity Declared ]
5638 * In a document without any DTD, a document with only an
5639 * internal DTD subset which contains no parameter entity
5640 * references, or a document with "standalone='yes'", the
5641 * Name given in the entity reference must match that in an
5642 * entity declaration, except that well-formed documents
5643 * need not declare any of the following entities: amp, lt,
5644 * gt, apos, quot.
5645 * The declaration of a parameter entity must precede any
5646 * reference to it.
5647 * Similarly, the declaration of a general entity must
5648 * precede any reference to it which appears in a default
5649 * value in an attribute-list declaration. Note that if
5650 * entities are declared in the external subset or in
5651 * external parameter entities, a non-validating processor
5652 * is not obligated to read and process their declarations;
5653 * for such documents, the rule that an entity must be
5654 * declared is a well-formedness constraint only if
5655 * standalone='yes'.
5656 */
5657 if (ent == NULL) {
5658 if ((ctxt->standalone == 1) ||
5659 ((ctxt->hasExternalSubset == 0) &&
5660 (ctxt->hasPErefs == 0))) {
5661 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5663 ctxt->sax->error(ctxt->userData,
5664 "Entity '%s' not defined\n", name);
5665 ctxt->wellFormed = 0;
5666 ctxt->disableSAX = 1;
5667 } else {
5668 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5669 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5670 ctxt->sax->warning(ctxt->userData,
5671 "Entity '%s' not defined\n", name);
5672 }
5673 }
5674
5675 /*
5676 * [ WFC: Parsed Entity ]
5677 * An entity reference must not contain the name of an
5678 * unparsed entity
5679 */
5680 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5681 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5682 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5683 ctxt->sax->error(ctxt->userData,
5684 "Entity reference to unparsed entity %s\n", name);
5685 ctxt->wellFormed = 0;
5686 ctxt->disableSAX = 1;
5687 }
5688
5689 /*
5690 * [ WFC: No External Entity References ]
5691 * Attribute values cannot contain direct or indirect
5692 * entity references to external entities.
5693 */
5694 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5695 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5696 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5698 ctxt->sax->error(ctxt->userData,
5699 "Attribute references external entity '%s'\n", name);
5700 ctxt->wellFormed = 0;
5701 ctxt->disableSAX = 1;
5702 }
5703 /*
5704 * [ WFC: No < in Attribute Values ]
5705 * The replacement text of any entity referred to directly or
5706 * indirectly in an attribute value (other than "&lt;") must
5707 * not contain a <.
5708 */
5709 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5710 (ent != NULL) &&
5711 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5712 (ent->content != NULL) &&
5713 (xmlStrchr(ent->content, '<'))) {
5714 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5716 ctxt->sax->error(ctxt->userData,
5717 "'<' in entity '%s' is not allowed in attributes values\n", name);
5718 ctxt->wellFormed = 0;
5719 ctxt->disableSAX = 1;
5720 }
5721
5722 /*
5723 * Internal check, no parameter entities here ...
5724 */
5725 else {
5726 switch (ent->etype) {
5727 case XML_INTERNAL_PARAMETER_ENTITY:
5728 case XML_EXTERNAL_PARAMETER_ENTITY:
5729 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5731 ctxt->sax->error(ctxt->userData,
5732 "Attempt to reference the parameter entity '%s'\n", name);
5733 ctxt->wellFormed = 0;
5734 ctxt->disableSAX = 1;
5735 break;
5736 default:
5737 break;
5738 }
5739 }
5740
5741 /*
5742 * [ WFC: No Recursion ]
5743 * A parsed entity must not contain a recursive reference
5744 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005745 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005746 */
5747
5748 } else {
5749 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5751 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005752 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005753 ctxt->wellFormed = 0;
5754 ctxt->disableSAX = 1;
5755 }
5756 xmlFree(name);
5757 }
5758 }
5759 *str = ptr;
5760 return(ent);
5761}
5762
5763/**
5764 * xmlParsePEReference:
5765 * @ctxt: an XML parser context
5766 *
5767 * parse PEReference declarations
5768 * The entity content is handled directly by pushing it's content as
5769 * a new input stream.
5770 *
5771 * [69] PEReference ::= '%' Name ';'
5772 *
5773 * [ WFC: No Recursion ]
5774 * A parsed entity must not contain a recursive
5775 * reference to itself, either directly or indirectly.
5776 *
5777 * [ WFC: Entity Declared ]
5778 * In a document without any DTD, a document with only an internal DTD
5779 * subset which contains no parameter entity references, or a document
5780 * with "standalone='yes'", ... ... The declaration of a parameter
5781 * entity must precede any reference to it...
5782 *
5783 * [ VC: Entity Declared ]
5784 * In a document with an external subset or external parameter entities
5785 * with "standalone='no'", ... ... The declaration of a parameter entity
5786 * must precede any reference to it...
5787 *
5788 * [ WFC: In DTD ]
5789 * Parameter-entity references may only appear in the DTD.
5790 * NOTE: misleading but this is handled.
5791 */
5792void
5793xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5794 xmlChar *name;
5795 xmlEntityPtr entity = NULL;
5796 xmlParserInputPtr input;
5797
5798 if (RAW == '%') {
5799 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005800 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005801 if (name == NULL) {
5802 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5803 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5804 ctxt->sax->error(ctxt->userData,
5805 "xmlParsePEReference: no name\n");
5806 ctxt->wellFormed = 0;
5807 ctxt->disableSAX = 1;
5808 } else {
5809 if (RAW == ';') {
5810 NEXT;
5811 if ((ctxt->sax != NULL) &&
5812 (ctxt->sax->getParameterEntity != NULL))
5813 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5814 name);
5815 if (entity == NULL) {
5816 /*
5817 * [ WFC: Entity Declared ]
5818 * In a document without any DTD, a document with only an
5819 * internal DTD subset which contains no parameter entity
5820 * references, or a document with "standalone='yes'", ...
5821 * ... The declaration of a parameter entity must precede
5822 * any reference to it...
5823 */
5824 if ((ctxt->standalone == 1) ||
5825 ((ctxt->hasExternalSubset == 0) &&
5826 (ctxt->hasPErefs == 0))) {
5827 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5828 if ((!ctxt->disableSAX) &&
5829 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5830 ctxt->sax->error(ctxt->userData,
5831 "PEReference: %%%s; not found\n", name);
5832 ctxt->wellFormed = 0;
5833 ctxt->disableSAX = 1;
5834 } else {
5835 /*
5836 * [ VC: Entity Declared ]
5837 * In a document with an external subset or external
5838 * parameter entities with "standalone='no'", ...
5839 * ... The declaration of a parameter entity must precede
5840 * any reference to it...
5841 */
5842 if ((!ctxt->disableSAX) &&
5843 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5844 ctxt->sax->warning(ctxt->userData,
5845 "PEReference: %%%s; not found\n", name);
5846 ctxt->valid = 0;
5847 }
5848 } else {
5849 /*
5850 * Internal checking in case the entity quest barfed
5851 */
5852 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5853 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5854 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5855 ctxt->sax->warning(ctxt->userData,
5856 "Internal: %%%s; is not a parameter entity\n", name);
5857 } else {
5858 /*
5859 * TODO !!!
5860 * handle the extra spaces added before and after
5861 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5862 */
5863 input = xmlNewEntityInputStream(ctxt, entity);
5864 xmlPushInput(ctxt, input);
5865 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5866 (RAW == '<') && (NXT(1) == '?') &&
5867 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5868 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5869 xmlParseTextDecl(ctxt);
5870 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5871 /*
5872 * The XML REC instructs us to stop parsing
5873 * right here
5874 */
5875 ctxt->instate = XML_PARSER_EOF;
5876 xmlFree(name);
5877 return;
5878 }
5879 }
5880 if (ctxt->token == 0)
5881 ctxt->token = ' ';
5882 }
5883 }
5884 ctxt->hasPErefs = 1;
5885 } else {
5886 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5888 ctxt->sax->error(ctxt->userData,
5889 "xmlParsePEReference: expecting ';'\n");
5890 ctxt->wellFormed = 0;
5891 ctxt->disableSAX = 1;
5892 }
5893 xmlFree(name);
5894 }
5895 }
5896}
5897
5898/**
5899 * xmlParseStringPEReference:
5900 * @ctxt: an XML parser context
5901 * @str: a pointer to an index in the string
5902 *
5903 * parse PEReference declarations
5904 *
5905 * [69] PEReference ::= '%' Name ';'
5906 *
5907 * [ WFC: No Recursion ]
5908 * A parsed entity must not contain a recursive
5909 * reference to itself, either directly or indirectly.
5910 *
5911 * [ WFC: Entity Declared ]
5912 * In a document without any DTD, a document with only an internal DTD
5913 * subset which contains no parameter entity references, or a document
5914 * with "standalone='yes'", ... ... The declaration of a parameter
5915 * entity must precede any reference to it...
5916 *
5917 * [ VC: Entity Declared ]
5918 * In a document with an external subset or external parameter entities
5919 * with "standalone='no'", ... ... The declaration of a parameter entity
5920 * must precede any reference to it...
5921 *
5922 * [ WFC: In DTD ]
5923 * Parameter-entity references may only appear in the DTD.
5924 * NOTE: misleading but this is handled.
5925 *
5926 * Returns the string of the entity content.
5927 * str is updated to the current value of the index
5928 */
5929xmlEntityPtr
5930xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5931 const xmlChar *ptr;
5932 xmlChar cur;
5933 xmlChar *name;
5934 xmlEntityPtr entity = NULL;
5935
5936 if ((str == NULL) || (*str == NULL)) return(NULL);
5937 ptr = *str;
5938 cur = *ptr;
5939 if (cur == '%') {
5940 ptr++;
5941 cur = *ptr;
5942 name = xmlParseStringName(ctxt, &ptr);
5943 if (name == NULL) {
5944 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5946 ctxt->sax->error(ctxt->userData,
5947 "xmlParseStringPEReference: no name\n");
5948 ctxt->wellFormed = 0;
5949 ctxt->disableSAX = 1;
5950 } else {
5951 cur = *ptr;
5952 if (cur == ';') {
5953 ptr++;
5954 cur = *ptr;
5955 if ((ctxt->sax != NULL) &&
5956 (ctxt->sax->getParameterEntity != NULL))
5957 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5958 name);
5959 if (entity == NULL) {
5960 /*
5961 * [ WFC: Entity Declared ]
5962 * In a document without any DTD, a document with only an
5963 * internal DTD subset which contains no parameter entity
5964 * references, or a document with "standalone='yes'", ...
5965 * ... The declaration of a parameter entity must precede
5966 * any reference to it...
5967 */
5968 if ((ctxt->standalone == 1) ||
5969 ((ctxt->hasExternalSubset == 0) &&
5970 (ctxt->hasPErefs == 0))) {
5971 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5973 ctxt->sax->error(ctxt->userData,
5974 "PEReference: %%%s; not found\n", name);
5975 ctxt->wellFormed = 0;
5976 ctxt->disableSAX = 1;
5977 } else {
5978 /*
5979 * [ VC: Entity Declared ]
5980 * In a document with an external subset or external
5981 * parameter entities with "standalone='no'", ...
5982 * ... The declaration of a parameter entity must
5983 * precede any reference to it...
5984 */
5985 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5986 ctxt->sax->warning(ctxt->userData,
5987 "PEReference: %%%s; not found\n", name);
5988 ctxt->valid = 0;
5989 }
5990 } else {
5991 /*
5992 * Internal checking in case the entity quest barfed
5993 */
5994 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5995 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5996 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5997 ctxt->sax->warning(ctxt->userData,
5998 "Internal: %%%s; is not a parameter entity\n", name);
5999 }
6000 }
6001 ctxt->hasPErefs = 1;
6002 } else {
6003 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6005 ctxt->sax->error(ctxt->userData,
6006 "xmlParseStringPEReference: expecting ';'\n");
6007 ctxt->wellFormed = 0;
6008 ctxt->disableSAX = 1;
6009 }
6010 xmlFree(name);
6011 }
6012 }
6013 *str = ptr;
6014 return(entity);
6015}
6016
6017/**
6018 * xmlParseDocTypeDecl:
6019 * @ctxt: an XML parser context
6020 *
6021 * parse a DOCTYPE declaration
6022 *
6023 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6024 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6025 *
6026 * [ VC: Root Element Type ]
6027 * The Name in the document type declaration must match the element
6028 * type of the root element.
6029 */
6030
6031void
6032xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6033 xmlChar *name = NULL;
6034 xmlChar *ExternalID = NULL;
6035 xmlChar *URI = NULL;
6036
6037 /*
6038 * We know that '<!DOCTYPE' has been detected.
6039 */
6040 SKIP(9);
6041
6042 SKIP_BLANKS;
6043
6044 /*
6045 * Parse the DOCTYPE name.
6046 */
6047 name = xmlParseName(ctxt);
6048 if (name == NULL) {
6049 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6051 ctxt->sax->error(ctxt->userData,
6052 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6053 ctxt->wellFormed = 0;
6054 ctxt->disableSAX = 1;
6055 }
6056 ctxt->intSubName = name;
6057
6058 SKIP_BLANKS;
6059
6060 /*
6061 * Check for SystemID and ExternalID
6062 */
6063 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6064
6065 if ((URI != NULL) || (ExternalID != NULL)) {
6066 ctxt->hasExternalSubset = 1;
6067 }
6068 ctxt->extSubURI = URI;
6069 ctxt->extSubSystem = ExternalID;
6070
6071 SKIP_BLANKS;
6072
6073 /*
6074 * Create and update the internal subset.
6075 */
6076 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6077 (!ctxt->disableSAX))
6078 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6079
6080 /*
6081 * Is there any internal subset declarations ?
6082 * they are handled separately in xmlParseInternalSubset()
6083 */
6084 if (RAW == '[')
6085 return;
6086
6087 /*
6088 * We should be at the end of the DOCTYPE declaration.
6089 */
6090 if (RAW != '>') {
6091 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006093 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006094 ctxt->wellFormed = 0;
6095 ctxt->disableSAX = 1;
6096 }
6097 NEXT;
6098}
6099
6100/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006101 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006102 * @ctxt: an XML parser context
6103 *
6104 * parse the internal subset declaration
6105 *
6106 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6107 */
6108
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006109static void
Owen Taylor3473f882001-02-23 17:55:21 +00006110xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6111 /*
6112 * Is there any DTD definition ?
6113 */
6114 if (RAW == '[') {
6115 ctxt->instate = XML_PARSER_DTD;
6116 NEXT;
6117 /*
6118 * Parse the succession of Markup declarations and
6119 * PEReferences.
6120 * Subsequence (markupdecl | PEReference | S)*
6121 */
6122 while (RAW != ']') {
6123 const xmlChar *check = CUR_PTR;
6124 int cons = ctxt->input->consumed;
6125
6126 SKIP_BLANKS;
6127 xmlParseMarkupDecl(ctxt);
6128 xmlParsePEReference(ctxt);
6129
6130 /*
6131 * Pop-up of finished entities.
6132 */
6133 while ((RAW == 0) && (ctxt->inputNr > 1))
6134 xmlPopInput(ctxt);
6135
6136 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6137 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6138 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6139 ctxt->sax->error(ctxt->userData,
6140 "xmlParseInternalSubset: error detected in Markup declaration\n");
6141 ctxt->wellFormed = 0;
6142 ctxt->disableSAX = 1;
6143 break;
6144 }
6145 }
6146 if (RAW == ']') {
6147 NEXT;
6148 SKIP_BLANKS;
6149 }
6150 }
6151
6152 /*
6153 * We should be at the end of the DOCTYPE declaration.
6154 */
6155 if (RAW != '>') {
6156 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006158 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006159 ctxt->wellFormed = 0;
6160 ctxt->disableSAX = 1;
6161 }
6162 NEXT;
6163}
6164
6165/**
6166 * xmlParseAttribute:
6167 * @ctxt: an XML parser context
6168 * @value: a xmlChar ** used to store the value of the attribute
6169 *
6170 * parse an attribute
6171 *
6172 * [41] Attribute ::= Name Eq AttValue
6173 *
6174 * [ WFC: No External Entity References ]
6175 * Attribute values cannot contain direct or indirect entity references
6176 * to external entities.
6177 *
6178 * [ WFC: No < in Attribute Values ]
6179 * The replacement text of any entity referred to directly or indirectly in
6180 * an attribute value (other than "&lt;") must not contain a <.
6181 *
6182 * [ VC: Attribute Value Type ]
6183 * The attribute must have been declared; the value must be of the type
6184 * declared for it.
6185 *
6186 * [25] Eq ::= S? '=' S?
6187 *
6188 * With namespace:
6189 *
6190 * [NS 11] Attribute ::= QName Eq AttValue
6191 *
6192 * Also the case QName == xmlns:??? is handled independently as a namespace
6193 * definition.
6194 *
6195 * Returns the attribute name, and the value in *value.
6196 */
6197
6198xmlChar *
6199xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6200 xmlChar *name, *val;
6201
6202 *value = NULL;
6203 name = xmlParseName(ctxt);
6204 if (name == NULL) {
6205 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6206 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6207 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6208 ctxt->wellFormed = 0;
6209 ctxt->disableSAX = 1;
6210 return(NULL);
6211 }
6212
6213 /*
6214 * read the value
6215 */
6216 SKIP_BLANKS;
6217 if (RAW == '=') {
6218 NEXT;
6219 SKIP_BLANKS;
6220 val = xmlParseAttValue(ctxt);
6221 ctxt->instate = XML_PARSER_CONTENT;
6222 } else {
6223 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6225 ctxt->sax->error(ctxt->userData,
6226 "Specification mandate value for attribute %s\n", name);
6227 ctxt->wellFormed = 0;
6228 ctxt->disableSAX = 1;
6229 xmlFree(name);
6230 return(NULL);
6231 }
6232
6233 /*
6234 * Check that xml:lang conforms to the specification
6235 * No more registered as an error, just generate a warning now
6236 * since this was deprecated in XML second edition
6237 */
6238 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6239 if (!xmlCheckLanguageID(val)) {
6240 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6241 ctxt->sax->warning(ctxt->userData,
6242 "Malformed value for xml:lang : %s\n", val);
6243 }
6244 }
6245
6246 /*
6247 * Check that xml:space conforms to the specification
6248 */
6249 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6250 if (xmlStrEqual(val, BAD_CAST "default"))
6251 *(ctxt->space) = 0;
6252 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6253 *(ctxt->space) = 1;
6254 else {
6255 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6257 ctxt->sax->error(ctxt->userData,
6258"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6259 val);
6260 ctxt->wellFormed = 0;
6261 ctxt->disableSAX = 1;
6262 }
6263 }
6264
6265 *value = val;
6266 return(name);
6267}
6268
6269/**
6270 * xmlParseStartTag:
6271 * @ctxt: an XML parser context
6272 *
6273 * parse a start of tag either for rule element or
6274 * EmptyElement. In both case we don't parse the tag closing chars.
6275 *
6276 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6277 *
6278 * [ WFC: Unique Att Spec ]
6279 * No attribute name may appear more than once in the same start-tag or
6280 * empty-element tag.
6281 *
6282 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6283 *
6284 * [ WFC: Unique Att Spec ]
6285 * No attribute name may appear more than once in the same start-tag or
6286 * empty-element tag.
6287 *
6288 * With namespace:
6289 *
6290 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6291 *
6292 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6293 *
6294 * Returns the element name parsed
6295 */
6296
6297xmlChar *
6298xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6299 xmlChar *name;
6300 xmlChar *attname;
6301 xmlChar *attvalue;
6302 const xmlChar **atts = NULL;
6303 int nbatts = 0;
6304 int maxatts = 0;
6305 int i;
6306
6307 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006308 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006309
6310 name = xmlParseName(ctxt);
6311 if (name == NULL) {
6312 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6313 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6314 ctxt->sax->error(ctxt->userData,
6315 "xmlParseStartTag: invalid element name\n");
6316 ctxt->wellFormed = 0;
6317 ctxt->disableSAX = 1;
6318 return(NULL);
6319 }
6320
6321 /*
6322 * Now parse the attributes, it ends up with the ending
6323 *
6324 * (S Attribute)* S?
6325 */
6326 SKIP_BLANKS;
6327 GROW;
6328
Daniel Veillard21a0f912001-02-25 19:54:14 +00006329 while ((RAW != '>') &&
6330 ((RAW != '/') || (NXT(1) != '>')) &&
6331 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006332 const xmlChar *q = CUR_PTR;
6333 int cons = ctxt->input->consumed;
6334
6335 attname = xmlParseAttribute(ctxt, &attvalue);
6336 if ((attname != NULL) && (attvalue != NULL)) {
6337 /*
6338 * [ WFC: Unique Att Spec ]
6339 * No attribute name may appear more than once in the same
6340 * start-tag or empty-element tag.
6341 */
6342 for (i = 0; i < nbatts;i += 2) {
6343 if (xmlStrEqual(atts[i], attname)) {
6344 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6345 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6346 ctxt->sax->error(ctxt->userData,
6347 "Attribute %s redefined\n",
6348 attname);
6349 ctxt->wellFormed = 0;
6350 ctxt->disableSAX = 1;
6351 xmlFree(attname);
6352 xmlFree(attvalue);
6353 goto failed;
6354 }
6355 }
6356
6357 /*
6358 * Add the pair to atts
6359 */
6360 if (atts == NULL) {
6361 maxatts = 10;
6362 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6363 if (atts == NULL) {
6364 xmlGenericError(xmlGenericErrorContext,
6365 "malloc of %ld byte failed\n",
6366 maxatts * (long)sizeof(xmlChar *));
6367 return(NULL);
6368 }
6369 } else if (nbatts + 4 > maxatts) {
6370 maxatts *= 2;
6371 atts = (const xmlChar **) xmlRealloc((void *) atts,
6372 maxatts * sizeof(xmlChar *));
6373 if (atts == NULL) {
6374 xmlGenericError(xmlGenericErrorContext,
6375 "realloc of %ld byte failed\n",
6376 maxatts * (long)sizeof(xmlChar *));
6377 return(NULL);
6378 }
6379 }
6380 atts[nbatts++] = attname;
6381 atts[nbatts++] = attvalue;
6382 atts[nbatts] = NULL;
6383 atts[nbatts + 1] = NULL;
6384 } else {
6385 if (attname != NULL)
6386 xmlFree(attname);
6387 if (attvalue != NULL)
6388 xmlFree(attvalue);
6389 }
6390
6391failed:
6392
6393 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6394 break;
6395 if (!IS_BLANK(RAW)) {
6396 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6397 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6398 ctxt->sax->error(ctxt->userData,
6399 "attributes construct error\n");
6400 ctxt->wellFormed = 0;
6401 ctxt->disableSAX = 1;
6402 }
6403 SKIP_BLANKS;
6404 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6405 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6406 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6407 ctxt->sax->error(ctxt->userData,
6408 "xmlParseStartTag: problem parsing attributes\n");
6409 ctxt->wellFormed = 0;
6410 ctxt->disableSAX = 1;
6411 break;
6412 }
6413 GROW;
6414 }
6415
6416 /*
6417 * SAX: Start of Element !
6418 */
6419 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6420 (!ctxt->disableSAX))
6421 ctxt->sax->startElement(ctxt->userData, name, atts);
6422
6423 if (atts != NULL) {
6424 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6425 xmlFree((void *) atts);
6426 }
6427 return(name);
6428}
6429
6430/**
6431 * xmlParseEndTag:
6432 * @ctxt: an XML parser context
6433 *
6434 * parse an end of tag
6435 *
6436 * [42] ETag ::= '</' Name S? '>'
6437 *
6438 * With namespace
6439 *
6440 * [NS 9] ETag ::= '</' QName S? '>'
6441 */
6442
6443void
6444xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6445 xmlChar *name;
6446 xmlChar *oldname;
6447
6448 GROW;
6449 if ((RAW != '<') || (NXT(1) != '/')) {
6450 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6451 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6452 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6453 ctxt->wellFormed = 0;
6454 ctxt->disableSAX = 1;
6455 return;
6456 }
6457 SKIP(2);
6458
6459 name = xmlParseName(ctxt);
6460
6461 /*
6462 * We should definitely be at the ending "S? '>'" part
6463 */
6464 GROW;
6465 SKIP_BLANKS;
6466 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6467 ctxt->errNo = XML_ERR_GT_REQUIRED;
6468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6469 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6470 ctxt->wellFormed = 0;
6471 ctxt->disableSAX = 1;
6472 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006473 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006474
6475 /*
6476 * [ WFC: Element Type Match ]
6477 * The Name in an element's end-tag must match the element type in the
6478 * start-tag.
6479 *
6480 */
6481 if ((name == NULL) || (ctxt->name == NULL) ||
6482 (!xmlStrEqual(name, ctxt->name))) {
6483 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6485 if ((name != NULL) && (ctxt->name != NULL)) {
6486 ctxt->sax->error(ctxt->userData,
6487 "Opening and ending tag mismatch: %s and %s\n",
6488 ctxt->name, name);
6489 } else if (ctxt->name != NULL) {
6490 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006491 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006492 } else {
6493 ctxt->sax->error(ctxt->userData,
6494 "Ending tag error: internal error ???\n");
6495 }
6496
6497 }
6498 ctxt->wellFormed = 0;
6499 ctxt->disableSAX = 1;
6500 }
6501
6502 /*
6503 * SAX: End of Tag
6504 */
6505 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6506 (!ctxt->disableSAX))
6507 ctxt->sax->endElement(ctxt->userData, name);
6508
6509 if (name != NULL)
6510 xmlFree(name);
6511 oldname = namePop(ctxt);
6512 spacePop(ctxt);
6513 if (oldname != NULL) {
6514#ifdef DEBUG_STACK
6515 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6516#endif
6517 xmlFree(oldname);
6518 }
6519 return;
6520}
6521
6522/**
6523 * xmlParseCDSect:
6524 * @ctxt: an XML parser context
6525 *
6526 * Parse escaped pure raw content.
6527 *
6528 * [18] CDSect ::= CDStart CData CDEnd
6529 *
6530 * [19] CDStart ::= '<![CDATA['
6531 *
6532 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6533 *
6534 * [21] CDEnd ::= ']]>'
6535 */
6536void
6537xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6538 xmlChar *buf = NULL;
6539 int len = 0;
6540 int size = XML_PARSER_BUFFER_SIZE;
6541 int r, rl;
6542 int s, sl;
6543 int cur, l;
6544 int count = 0;
6545
6546 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6547 (NXT(2) == '[') && (NXT(3) == 'C') &&
6548 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6549 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6550 (NXT(8) == '[')) {
6551 SKIP(9);
6552 } else
6553 return;
6554
6555 ctxt->instate = XML_PARSER_CDATA_SECTION;
6556 r = CUR_CHAR(rl);
6557 if (!IS_CHAR(r)) {
6558 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6560 ctxt->sax->error(ctxt->userData,
6561 "CData section not finished\n");
6562 ctxt->wellFormed = 0;
6563 ctxt->disableSAX = 1;
6564 ctxt->instate = XML_PARSER_CONTENT;
6565 return;
6566 }
6567 NEXTL(rl);
6568 s = CUR_CHAR(sl);
6569 if (!IS_CHAR(s)) {
6570 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6572 ctxt->sax->error(ctxt->userData,
6573 "CData section not finished\n");
6574 ctxt->wellFormed = 0;
6575 ctxt->disableSAX = 1;
6576 ctxt->instate = XML_PARSER_CONTENT;
6577 return;
6578 }
6579 NEXTL(sl);
6580 cur = CUR_CHAR(l);
6581 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6582 if (buf == NULL) {
6583 xmlGenericError(xmlGenericErrorContext,
6584 "malloc of %d byte failed\n", size);
6585 return;
6586 }
6587 while (IS_CHAR(cur) &&
6588 ((r != ']') || (s != ']') || (cur != '>'))) {
6589 if (len + 5 >= size) {
6590 size *= 2;
6591 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6592 if (buf == NULL) {
6593 xmlGenericError(xmlGenericErrorContext,
6594 "realloc of %d byte failed\n", size);
6595 return;
6596 }
6597 }
6598 COPY_BUF(rl,buf,len,r);
6599 r = s;
6600 rl = sl;
6601 s = cur;
6602 sl = l;
6603 count++;
6604 if (count > 50) {
6605 GROW;
6606 count = 0;
6607 }
6608 NEXTL(l);
6609 cur = CUR_CHAR(l);
6610 }
6611 buf[len] = 0;
6612 ctxt->instate = XML_PARSER_CONTENT;
6613 if (cur != '>') {
6614 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6616 ctxt->sax->error(ctxt->userData,
6617 "CData section not finished\n%.50s\n", buf);
6618 ctxt->wellFormed = 0;
6619 ctxt->disableSAX = 1;
6620 xmlFree(buf);
6621 return;
6622 }
6623 NEXTL(l);
6624
6625 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006626 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006627 */
6628 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6629 if (ctxt->sax->cdataBlock != NULL)
6630 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006631 else if (ctxt->sax->characters != NULL)
6632 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006633 }
6634 xmlFree(buf);
6635}
6636
6637/**
6638 * xmlParseContent:
6639 * @ctxt: an XML parser context
6640 *
6641 * Parse a content:
6642 *
6643 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6644 */
6645
6646void
6647xmlParseContent(xmlParserCtxtPtr ctxt) {
6648 GROW;
6649 while (((RAW != 0) || (ctxt->token != 0)) &&
6650 ((RAW != '<') || (NXT(1) != '/'))) {
6651 const xmlChar *test = CUR_PTR;
6652 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006653 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006654 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006655
6656 /*
6657 * Handle possible processed charrefs.
6658 */
6659 if (ctxt->token != 0) {
6660 xmlParseCharData(ctxt, 0);
6661 }
6662 /*
6663 * First case : a Processing Instruction.
6664 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006665 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006666 xmlParsePI(ctxt);
6667 }
6668
6669 /*
6670 * Second case : a CDSection
6671 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006672 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006673 (NXT(2) == '[') && (NXT(3) == 'C') &&
6674 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6675 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6676 (NXT(8) == '[')) {
6677 xmlParseCDSect(ctxt);
6678 }
6679
6680 /*
6681 * Third case : a comment
6682 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006683 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006684 (NXT(2) == '-') && (NXT(3) == '-')) {
6685 xmlParseComment(ctxt);
6686 ctxt->instate = XML_PARSER_CONTENT;
6687 }
6688
6689 /*
6690 * Fourth case : a sub-element.
6691 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006692 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006693 xmlParseElement(ctxt);
6694 }
6695
6696 /*
6697 * Fifth case : a reference. If if has not been resolved,
6698 * parsing returns it's Name, create the node
6699 */
6700
Daniel Veillard21a0f912001-02-25 19:54:14 +00006701 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006702 xmlParseReference(ctxt);
6703 }
6704
6705 /*
6706 * Last case, text. Note that References are handled directly.
6707 */
6708 else {
6709 xmlParseCharData(ctxt, 0);
6710 }
6711
6712 GROW;
6713 /*
6714 * Pop-up of finished entities.
6715 */
6716 while ((RAW == 0) && (ctxt->inputNr > 1))
6717 xmlPopInput(ctxt);
6718 SHRINK;
6719
6720 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6721 (tok == ctxt->token)) {
6722 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6724 ctxt->sax->error(ctxt->userData,
6725 "detected an error in element content\n");
6726 ctxt->wellFormed = 0;
6727 ctxt->disableSAX = 1;
6728 ctxt->instate = XML_PARSER_EOF;
6729 break;
6730 }
6731 }
6732}
6733
6734/**
6735 * xmlParseElement:
6736 * @ctxt: an XML parser context
6737 *
6738 * parse an XML element, this is highly recursive
6739 *
6740 * [39] element ::= EmptyElemTag | STag content ETag
6741 *
6742 * [ WFC: Element Type Match ]
6743 * The Name in an element's end-tag must match the element type in the
6744 * start-tag.
6745 *
6746 * [ VC: Element Valid ]
6747 * An element is valid if there is a declaration matching elementdecl
6748 * where the Name matches the element type and one of the following holds:
6749 * - The declaration matches EMPTY and the element has no content.
6750 * - The declaration matches children and the sequence of child elements
6751 * belongs to the language generated by the regular expression in the
6752 * content model, with optional white space (characters matching the
6753 * nonterminal S) between each pair of child elements.
6754 * - The declaration matches Mixed and the content consists of character
6755 * data and child elements whose types match names in the content model.
6756 * - The declaration matches ANY, and the types of any child elements have
6757 * been declared.
6758 */
6759
6760void
6761xmlParseElement(xmlParserCtxtPtr ctxt) {
6762 const xmlChar *openTag = CUR_PTR;
6763 xmlChar *name;
6764 xmlChar *oldname;
6765 xmlParserNodeInfo node_info;
6766 xmlNodePtr ret;
6767
6768 /* Capture start position */
6769 if (ctxt->record_info) {
6770 node_info.begin_pos = ctxt->input->consumed +
6771 (CUR_PTR - ctxt->input->base);
6772 node_info.begin_line = ctxt->input->line;
6773 }
6774
6775 if (ctxt->spaceNr == 0)
6776 spacePush(ctxt, -1);
6777 else
6778 spacePush(ctxt, *ctxt->space);
6779
6780 name = xmlParseStartTag(ctxt);
6781 if (name == NULL) {
6782 spacePop(ctxt);
6783 return;
6784 }
6785 namePush(ctxt, name);
6786 ret = ctxt->node;
6787
6788 /*
6789 * [ VC: Root Element Type ]
6790 * The Name in the document type declaration must match the element
6791 * type of the root element.
6792 */
6793 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6794 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6795 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6796
6797 /*
6798 * Check for an Empty Element.
6799 */
6800 if ((RAW == '/') && (NXT(1) == '>')) {
6801 SKIP(2);
6802 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6803 (!ctxt->disableSAX))
6804 ctxt->sax->endElement(ctxt->userData, name);
6805 oldname = namePop(ctxt);
6806 spacePop(ctxt);
6807 if (oldname != NULL) {
6808#ifdef DEBUG_STACK
6809 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6810#endif
6811 xmlFree(oldname);
6812 }
6813 if ( ret != NULL && ctxt->record_info ) {
6814 node_info.end_pos = ctxt->input->consumed +
6815 (CUR_PTR - ctxt->input->base);
6816 node_info.end_line = ctxt->input->line;
6817 node_info.node = ret;
6818 xmlParserAddNodeInfo(ctxt, &node_info);
6819 }
6820 return;
6821 }
6822 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006823 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006824 } else {
6825 ctxt->errNo = XML_ERR_GT_REQUIRED;
6826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6827 ctxt->sax->error(ctxt->userData,
6828 "Couldn't find end of Start Tag\n%.30s\n",
6829 openTag);
6830 ctxt->wellFormed = 0;
6831 ctxt->disableSAX = 1;
6832
6833 /*
6834 * end of parsing of this node.
6835 */
6836 nodePop(ctxt);
6837 oldname = namePop(ctxt);
6838 spacePop(ctxt);
6839 if (oldname != NULL) {
6840#ifdef DEBUG_STACK
6841 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6842#endif
6843 xmlFree(oldname);
6844 }
6845
6846 /*
6847 * Capture end position and add node
6848 */
6849 if ( ret != NULL && ctxt->record_info ) {
6850 node_info.end_pos = ctxt->input->consumed +
6851 (CUR_PTR - ctxt->input->base);
6852 node_info.end_line = ctxt->input->line;
6853 node_info.node = ret;
6854 xmlParserAddNodeInfo(ctxt, &node_info);
6855 }
6856 return;
6857 }
6858
6859 /*
6860 * Parse the content of the element:
6861 */
6862 xmlParseContent(ctxt);
6863 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00006864 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00006865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6866 ctxt->sax->error(ctxt->userData,
6867 "Premature end of data in tag %.30s\n", openTag);
6868 ctxt->wellFormed = 0;
6869 ctxt->disableSAX = 1;
6870
6871 /*
6872 * end of parsing of this node.
6873 */
6874 nodePop(ctxt);
6875 oldname = namePop(ctxt);
6876 spacePop(ctxt);
6877 if (oldname != NULL) {
6878#ifdef DEBUG_STACK
6879 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6880#endif
6881 xmlFree(oldname);
6882 }
6883 return;
6884 }
6885
6886 /*
6887 * parse the end of tag: '</' should be here.
6888 */
6889 xmlParseEndTag(ctxt);
6890
6891 /*
6892 * Capture end position and add node
6893 */
6894 if ( ret != NULL && ctxt->record_info ) {
6895 node_info.end_pos = ctxt->input->consumed +
6896 (CUR_PTR - ctxt->input->base);
6897 node_info.end_line = ctxt->input->line;
6898 node_info.node = ret;
6899 xmlParserAddNodeInfo(ctxt, &node_info);
6900 }
6901}
6902
6903/**
6904 * xmlParseVersionNum:
6905 * @ctxt: an XML parser context
6906 *
6907 * parse the XML version value.
6908 *
6909 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6910 *
6911 * Returns the string giving the XML version number, or NULL
6912 */
6913xmlChar *
6914xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6915 xmlChar *buf = NULL;
6916 int len = 0;
6917 int size = 10;
6918 xmlChar cur;
6919
6920 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6921 if (buf == NULL) {
6922 xmlGenericError(xmlGenericErrorContext,
6923 "malloc of %d byte failed\n", size);
6924 return(NULL);
6925 }
6926 cur = CUR;
6927 while (((cur >= 'a') && (cur <= 'z')) ||
6928 ((cur >= 'A') && (cur <= 'Z')) ||
6929 ((cur >= '0') && (cur <= '9')) ||
6930 (cur == '_') || (cur == '.') ||
6931 (cur == ':') || (cur == '-')) {
6932 if (len + 1 >= size) {
6933 size *= 2;
6934 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6935 if (buf == NULL) {
6936 xmlGenericError(xmlGenericErrorContext,
6937 "realloc of %d byte failed\n", size);
6938 return(NULL);
6939 }
6940 }
6941 buf[len++] = cur;
6942 NEXT;
6943 cur=CUR;
6944 }
6945 buf[len] = 0;
6946 return(buf);
6947}
6948
6949/**
6950 * xmlParseVersionInfo:
6951 * @ctxt: an XML parser context
6952 *
6953 * parse the XML version.
6954 *
6955 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6956 *
6957 * [25] Eq ::= S? '=' S?
6958 *
6959 * Returns the version string, e.g. "1.0"
6960 */
6961
6962xmlChar *
6963xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6964 xmlChar *version = NULL;
6965 const xmlChar *q;
6966
6967 if ((RAW == 'v') && (NXT(1) == 'e') &&
6968 (NXT(2) == 'r') && (NXT(3) == 's') &&
6969 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6970 (NXT(6) == 'n')) {
6971 SKIP(7);
6972 SKIP_BLANKS;
6973 if (RAW != '=') {
6974 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6976 ctxt->sax->error(ctxt->userData,
6977 "xmlParseVersionInfo : expected '='\n");
6978 ctxt->wellFormed = 0;
6979 ctxt->disableSAX = 1;
6980 return(NULL);
6981 }
6982 NEXT;
6983 SKIP_BLANKS;
6984 if (RAW == '"') {
6985 NEXT;
6986 q = CUR_PTR;
6987 version = xmlParseVersionNum(ctxt);
6988 if (RAW != '"') {
6989 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6991 ctxt->sax->error(ctxt->userData,
6992 "String not closed\n%.50s\n", q);
6993 ctxt->wellFormed = 0;
6994 ctxt->disableSAX = 1;
6995 } else
6996 NEXT;
6997 } else if (RAW == '\''){
6998 NEXT;
6999 q = CUR_PTR;
7000 version = xmlParseVersionNum(ctxt);
7001 if (RAW != '\'') {
7002 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7004 ctxt->sax->error(ctxt->userData,
7005 "String not closed\n%.50s\n", q);
7006 ctxt->wellFormed = 0;
7007 ctxt->disableSAX = 1;
7008 } else
7009 NEXT;
7010 } else {
7011 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7013 ctxt->sax->error(ctxt->userData,
7014 "xmlParseVersionInfo : expected ' or \"\n");
7015 ctxt->wellFormed = 0;
7016 ctxt->disableSAX = 1;
7017 }
7018 }
7019 return(version);
7020}
7021
7022/**
7023 * xmlParseEncName:
7024 * @ctxt: an XML parser context
7025 *
7026 * parse the XML encoding name
7027 *
7028 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7029 *
7030 * Returns the encoding name value or NULL
7031 */
7032xmlChar *
7033xmlParseEncName(xmlParserCtxtPtr ctxt) {
7034 xmlChar *buf = NULL;
7035 int len = 0;
7036 int size = 10;
7037 xmlChar cur;
7038
7039 cur = CUR;
7040 if (((cur >= 'a') && (cur <= 'z')) ||
7041 ((cur >= 'A') && (cur <= 'Z'))) {
7042 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7043 if (buf == NULL) {
7044 xmlGenericError(xmlGenericErrorContext,
7045 "malloc of %d byte failed\n", size);
7046 return(NULL);
7047 }
7048
7049 buf[len++] = cur;
7050 NEXT;
7051 cur = CUR;
7052 while (((cur >= 'a') && (cur <= 'z')) ||
7053 ((cur >= 'A') && (cur <= 'Z')) ||
7054 ((cur >= '0') && (cur <= '9')) ||
7055 (cur == '.') || (cur == '_') ||
7056 (cur == '-')) {
7057 if (len + 1 >= size) {
7058 size *= 2;
7059 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7060 if (buf == NULL) {
7061 xmlGenericError(xmlGenericErrorContext,
7062 "realloc of %d byte failed\n", size);
7063 return(NULL);
7064 }
7065 }
7066 buf[len++] = cur;
7067 NEXT;
7068 cur = CUR;
7069 if (cur == 0) {
7070 SHRINK;
7071 GROW;
7072 cur = CUR;
7073 }
7074 }
7075 buf[len] = 0;
7076 } else {
7077 ctxt->errNo = XML_ERR_ENCODING_NAME;
7078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7079 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7080 ctxt->wellFormed = 0;
7081 ctxt->disableSAX = 1;
7082 }
7083 return(buf);
7084}
7085
7086/**
7087 * xmlParseEncodingDecl:
7088 * @ctxt: an XML parser context
7089 *
7090 * parse the XML encoding declaration
7091 *
7092 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7093 *
7094 * this setups the conversion filters.
7095 *
7096 * Returns the encoding value or NULL
7097 */
7098
7099xmlChar *
7100xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7101 xmlChar *encoding = NULL;
7102 const xmlChar *q;
7103
7104 SKIP_BLANKS;
7105 if ((RAW == 'e') && (NXT(1) == 'n') &&
7106 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7107 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7108 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7109 SKIP(8);
7110 SKIP_BLANKS;
7111 if (RAW != '=') {
7112 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7114 ctxt->sax->error(ctxt->userData,
7115 "xmlParseEncodingDecl : expected '='\n");
7116 ctxt->wellFormed = 0;
7117 ctxt->disableSAX = 1;
7118 return(NULL);
7119 }
7120 NEXT;
7121 SKIP_BLANKS;
7122 if (RAW == '"') {
7123 NEXT;
7124 q = CUR_PTR;
7125 encoding = xmlParseEncName(ctxt);
7126 if (RAW != '"') {
7127 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7129 ctxt->sax->error(ctxt->userData,
7130 "String not closed\n%.50s\n", q);
7131 ctxt->wellFormed = 0;
7132 ctxt->disableSAX = 1;
7133 } else
7134 NEXT;
7135 } else if (RAW == '\''){
7136 NEXT;
7137 q = CUR_PTR;
7138 encoding = xmlParseEncName(ctxt);
7139 if (RAW != '\'') {
7140 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7142 ctxt->sax->error(ctxt->userData,
7143 "String not closed\n%.50s\n", q);
7144 ctxt->wellFormed = 0;
7145 ctxt->disableSAX = 1;
7146 } else
7147 NEXT;
7148 } else if (RAW == '"'){
7149 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7150 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7151 ctxt->sax->error(ctxt->userData,
7152 "xmlParseEncodingDecl : expected ' or \"\n");
7153 ctxt->wellFormed = 0;
7154 ctxt->disableSAX = 1;
7155 }
7156 if (encoding != NULL) {
7157 xmlCharEncoding enc;
7158 xmlCharEncodingHandlerPtr handler;
7159
7160 if (ctxt->input->encoding != NULL)
7161 xmlFree((xmlChar *) ctxt->input->encoding);
7162 ctxt->input->encoding = encoding;
7163
7164 enc = xmlParseCharEncoding((const char *) encoding);
7165 /*
7166 * registered set of known encodings
7167 */
7168 if (enc != XML_CHAR_ENCODING_ERROR) {
7169 xmlSwitchEncoding(ctxt, enc);
7170 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7171 xmlFree(encoding);
7172 return(NULL);
7173 }
7174 } else {
7175 /*
7176 * fallback for unknown encodings
7177 */
7178 handler = xmlFindCharEncodingHandler((const char *) encoding);
7179 if (handler != NULL) {
7180 xmlSwitchToEncoding(ctxt, handler);
7181 } else {
7182 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7184 ctxt->sax->error(ctxt->userData,
7185 "Unsupported encoding %s\n", encoding);
7186 return(NULL);
7187 }
7188 }
7189 }
7190 }
7191 return(encoding);
7192}
7193
7194/**
7195 * xmlParseSDDecl:
7196 * @ctxt: an XML parser context
7197 *
7198 * parse the XML standalone declaration
7199 *
7200 * [32] SDDecl ::= S 'standalone' Eq
7201 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7202 *
7203 * [ VC: Standalone Document Declaration ]
7204 * TODO The standalone document declaration must have the value "no"
7205 * if any external markup declarations contain declarations of:
7206 * - attributes with default values, if elements to which these
7207 * attributes apply appear in the document without specifications
7208 * of values for these attributes, or
7209 * - entities (other than amp, lt, gt, apos, quot), if references
7210 * to those entities appear in the document, or
7211 * - attributes with values subject to normalization, where the
7212 * attribute appears in the document with a value which will change
7213 * as a result of normalization, or
7214 * - element types with element content, if white space occurs directly
7215 * within any instance of those types.
7216 *
7217 * Returns 1 if standalone, 0 otherwise
7218 */
7219
7220int
7221xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7222 int standalone = -1;
7223
7224 SKIP_BLANKS;
7225 if ((RAW == 's') && (NXT(1) == 't') &&
7226 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7227 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7228 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7229 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7230 SKIP(10);
7231 SKIP_BLANKS;
7232 if (RAW != '=') {
7233 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7234 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7235 ctxt->sax->error(ctxt->userData,
7236 "XML standalone declaration : expected '='\n");
7237 ctxt->wellFormed = 0;
7238 ctxt->disableSAX = 1;
7239 return(standalone);
7240 }
7241 NEXT;
7242 SKIP_BLANKS;
7243 if (RAW == '\''){
7244 NEXT;
7245 if ((RAW == 'n') && (NXT(1) == 'o')) {
7246 standalone = 0;
7247 SKIP(2);
7248 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7249 (NXT(2) == 's')) {
7250 standalone = 1;
7251 SKIP(3);
7252 } else {
7253 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7254 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7255 ctxt->sax->error(ctxt->userData,
7256 "standalone accepts only 'yes' or 'no'\n");
7257 ctxt->wellFormed = 0;
7258 ctxt->disableSAX = 1;
7259 }
7260 if (RAW != '\'') {
7261 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7262 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7263 ctxt->sax->error(ctxt->userData, "String not closed\n");
7264 ctxt->wellFormed = 0;
7265 ctxt->disableSAX = 1;
7266 } else
7267 NEXT;
7268 } else if (RAW == '"'){
7269 NEXT;
7270 if ((RAW == 'n') && (NXT(1) == 'o')) {
7271 standalone = 0;
7272 SKIP(2);
7273 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7274 (NXT(2) == 's')) {
7275 standalone = 1;
7276 SKIP(3);
7277 } else {
7278 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7280 ctxt->sax->error(ctxt->userData,
7281 "standalone accepts only 'yes' or 'no'\n");
7282 ctxt->wellFormed = 0;
7283 ctxt->disableSAX = 1;
7284 }
7285 if (RAW != '"') {
7286 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7287 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7288 ctxt->sax->error(ctxt->userData, "String not closed\n");
7289 ctxt->wellFormed = 0;
7290 ctxt->disableSAX = 1;
7291 } else
7292 NEXT;
7293 } else {
7294 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7295 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7296 ctxt->sax->error(ctxt->userData,
7297 "Standalone value not found\n");
7298 ctxt->wellFormed = 0;
7299 ctxt->disableSAX = 1;
7300 }
7301 }
7302 return(standalone);
7303}
7304
7305/**
7306 * xmlParseXMLDecl:
7307 * @ctxt: an XML parser context
7308 *
7309 * parse an XML declaration header
7310 *
7311 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7312 */
7313
7314void
7315xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7316 xmlChar *version;
7317
7318 /*
7319 * We know that '<?xml' is here.
7320 */
7321 SKIP(5);
7322
7323 if (!IS_BLANK(RAW)) {
7324 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7326 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7327 ctxt->wellFormed = 0;
7328 ctxt->disableSAX = 1;
7329 }
7330 SKIP_BLANKS;
7331
7332 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007333 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007334 */
7335 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007336 if (version == NULL) {
7337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7338 ctxt->sax->error(ctxt->userData,
7339 "Malformed declaration expecting version\n");
7340 ctxt->wellFormed = 0;
7341 ctxt->disableSAX = 1;
7342 } else {
7343 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7344 /*
7345 * TODO: Blueberry should be detected here
7346 */
7347 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7348 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7349 version);
7350 }
7351 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007352 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007353 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007354 }
Owen Taylor3473f882001-02-23 17:55:21 +00007355
7356 /*
7357 * We may have the encoding declaration
7358 */
7359 if (!IS_BLANK(RAW)) {
7360 if ((RAW == '?') && (NXT(1) == '>')) {
7361 SKIP(2);
7362 return;
7363 }
7364 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7366 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7367 ctxt->wellFormed = 0;
7368 ctxt->disableSAX = 1;
7369 }
7370 xmlParseEncodingDecl(ctxt);
7371 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7372 /*
7373 * The XML REC instructs us to stop parsing right here
7374 */
7375 return;
7376 }
7377
7378 /*
7379 * We may have the standalone status.
7380 */
7381 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7382 if ((RAW == '?') && (NXT(1) == '>')) {
7383 SKIP(2);
7384 return;
7385 }
7386 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7388 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7389 ctxt->wellFormed = 0;
7390 ctxt->disableSAX = 1;
7391 }
7392 SKIP_BLANKS;
7393 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7394
7395 SKIP_BLANKS;
7396 if ((RAW == '?') && (NXT(1) == '>')) {
7397 SKIP(2);
7398 } else if (RAW == '>') {
7399 /* Deprecated old WD ... */
7400 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7402 ctxt->sax->error(ctxt->userData,
7403 "XML declaration must end-up with '?>'\n");
7404 ctxt->wellFormed = 0;
7405 ctxt->disableSAX = 1;
7406 NEXT;
7407 } else {
7408 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7410 ctxt->sax->error(ctxt->userData,
7411 "parsing XML declaration: '?>' expected\n");
7412 ctxt->wellFormed = 0;
7413 ctxt->disableSAX = 1;
7414 MOVETO_ENDTAG(CUR_PTR);
7415 NEXT;
7416 }
7417}
7418
7419/**
7420 * xmlParseMisc:
7421 * @ctxt: an XML parser context
7422 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007423 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007424 *
7425 * [27] Misc ::= Comment | PI | S
7426 */
7427
7428void
7429xmlParseMisc(xmlParserCtxtPtr ctxt) {
7430 while (((RAW == '<') && (NXT(1) == '?')) ||
7431 ((RAW == '<') && (NXT(1) == '!') &&
7432 (NXT(2) == '-') && (NXT(3) == '-')) ||
7433 IS_BLANK(CUR)) {
7434 if ((RAW == '<') && (NXT(1) == '?')) {
7435 xmlParsePI(ctxt);
7436 } else if (IS_BLANK(CUR)) {
7437 NEXT;
7438 } else
7439 xmlParseComment(ctxt);
7440 }
7441}
7442
7443/**
7444 * xmlParseDocument:
7445 * @ctxt: an XML parser context
7446 *
7447 * parse an XML document (and build a tree if using the standard SAX
7448 * interface).
7449 *
7450 * [1] document ::= prolog element Misc*
7451 *
7452 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7453 *
7454 * Returns 0, -1 in case of error. the parser context is augmented
7455 * as a result of the parsing.
7456 */
7457
7458int
7459xmlParseDocument(xmlParserCtxtPtr ctxt) {
7460 xmlChar start[4];
7461 xmlCharEncoding enc;
7462
7463 xmlInitParser();
7464
7465 GROW;
7466
7467 /*
7468 * SAX: beginning of the document processing.
7469 */
7470 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7471 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7472
Daniel Veillard50f34372001-08-03 12:06:36 +00007473 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007474 /*
7475 * Get the 4 first bytes and decode the charset
7476 * if enc != XML_CHAR_ENCODING_NONE
7477 * plug some encoding conversion routines.
7478 */
7479 start[0] = RAW;
7480 start[1] = NXT(1);
7481 start[2] = NXT(2);
7482 start[3] = NXT(3);
7483 enc = xmlDetectCharEncoding(start, 4);
7484 if (enc != XML_CHAR_ENCODING_NONE) {
7485 xmlSwitchEncoding(ctxt, enc);
7486 }
Owen Taylor3473f882001-02-23 17:55:21 +00007487 }
7488
7489
7490 if (CUR == 0) {
7491 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7493 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7494 ctxt->wellFormed = 0;
7495 ctxt->disableSAX = 1;
7496 }
7497
7498 /*
7499 * Check for the XMLDecl in the Prolog.
7500 */
7501 GROW;
7502 if ((RAW == '<') && (NXT(1) == '?') &&
7503 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7504 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7505
7506 /*
7507 * Note that we will switch encoding on the fly.
7508 */
7509 xmlParseXMLDecl(ctxt);
7510 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7511 /*
7512 * The XML REC instructs us to stop parsing right here
7513 */
7514 return(-1);
7515 }
7516 ctxt->standalone = ctxt->input->standalone;
7517 SKIP_BLANKS;
7518 } else {
7519 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7520 }
7521 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7522 ctxt->sax->startDocument(ctxt->userData);
7523
7524 /*
7525 * The Misc part of the Prolog
7526 */
7527 GROW;
7528 xmlParseMisc(ctxt);
7529
7530 /*
7531 * Then possibly doc type declaration(s) and more Misc
7532 * (doctypedecl Misc*)?
7533 */
7534 GROW;
7535 if ((RAW == '<') && (NXT(1) == '!') &&
7536 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7537 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7538 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7539 (NXT(8) == 'E')) {
7540
7541 ctxt->inSubset = 1;
7542 xmlParseDocTypeDecl(ctxt);
7543 if (RAW == '[') {
7544 ctxt->instate = XML_PARSER_DTD;
7545 xmlParseInternalSubset(ctxt);
7546 }
7547
7548 /*
7549 * Create and update the external subset.
7550 */
7551 ctxt->inSubset = 2;
7552 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7553 (!ctxt->disableSAX))
7554 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7555 ctxt->extSubSystem, ctxt->extSubURI);
7556 ctxt->inSubset = 0;
7557
7558
7559 ctxt->instate = XML_PARSER_PROLOG;
7560 xmlParseMisc(ctxt);
7561 }
7562
7563 /*
7564 * Time to start parsing the tree itself
7565 */
7566 GROW;
7567 if (RAW != '<') {
7568 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7570 ctxt->sax->error(ctxt->userData,
7571 "Start tag expected, '<' not found\n");
7572 ctxt->wellFormed = 0;
7573 ctxt->disableSAX = 1;
7574 ctxt->instate = XML_PARSER_EOF;
7575 } else {
7576 ctxt->instate = XML_PARSER_CONTENT;
7577 xmlParseElement(ctxt);
7578 ctxt->instate = XML_PARSER_EPILOG;
7579
7580
7581 /*
7582 * The Misc part at the end
7583 */
7584 xmlParseMisc(ctxt);
7585
7586 if (RAW != 0) {
7587 ctxt->errNo = XML_ERR_DOCUMENT_END;
7588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7589 ctxt->sax->error(ctxt->userData,
7590 "Extra content at the end of the document\n");
7591 ctxt->wellFormed = 0;
7592 ctxt->disableSAX = 1;
7593 }
7594 ctxt->instate = XML_PARSER_EOF;
7595 }
7596
7597 /*
7598 * SAX: end of the document processing.
7599 */
7600 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7601 (!ctxt->disableSAX))
7602 ctxt->sax->endDocument(ctxt->userData);
7603
7604 if (! ctxt->wellFormed) return(-1);
7605 return(0);
7606}
7607
7608/**
7609 * xmlParseExtParsedEnt:
7610 * @ctxt: an XML parser context
7611 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007612 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007613 * An external general parsed entity is well-formed if it matches the
7614 * production labeled extParsedEnt.
7615 *
7616 * [78] extParsedEnt ::= TextDecl? content
7617 *
7618 * Returns 0, -1 in case of error. the parser context is augmented
7619 * as a result of the parsing.
7620 */
7621
7622int
7623xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7624 xmlChar start[4];
7625 xmlCharEncoding enc;
7626
7627 xmlDefaultSAXHandlerInit();
7628
7629 GROW;
7630
7631 /*
7632 * SAX: beginning of the document processing.
7633 */
7634 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7635 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7636
7637 /*
7638 * Get the 4 first bytes and decode the charset
7639 * if enc != XML_CHAR_ENCODING_NONE
7640 * plug some encoding conversion routines.
7641 */
7642 start[0] = RAW;
7643 start[1] = NXT(1);
7644 start[2] = NXT(2);
7645 start[3] = NXT(3);
7646 enc = xmlDetectCharEncoding(start, 4);
7647 if (enc != XML_CHAR_ENCODING_NONE) {
7648 xmlSwitchEncoding(ctxt, enc);
7649 }
7650
7651
7652 if (CUR == 0) {
7653 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7655 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7656 ctxt->wellFormed = 0;
7657 ctxt->disableSAX = 1;
7658 }
7659
7660 /*
7661 * Check for the XMLDecl in the Prolog.
7662 */
7663 GROW;
7664 if ((RAW == '<') && (NXT(1) == '?') &&
7665 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7666 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7667
7668 /*
7669 * Note that we will switch encoding on the fly.
7670 */
7671 xmlParseXMLDecl(ctxt);
7672 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7673 /*
7674 * The XML REC instructs us to stop parsing right here
7675 */
7676 return(-1);
7677 }
7678 SKIP_BLANKS;
7679 } else {
7680 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7681 }
7682 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7683 ctxt->sax->startDocument(ctxt->userData);
7684
7685 /*
7686 * Doing validity checking on chunk doesn't make sense
7687 */
7688 ctxt->instate = XML_PARSER_CONTENT;
7689 ctxt->validate = 0;
7690 ctxt->loadsubset = 0;
7691 ctxt->depth = 0;
7692
7693 xmlParseContent(ctxt);
7694
7695 if ((RAW == '<') && (NXT(1) == '/')) {
7696 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7698 ctxt->sax->error(ctxt->userData,
7699 "chunk is not well balanced\n");
7700 ctxt->wellFormed = 0;
7701 ctxt->disableSAX = 1;
7702 } else if (RAW != 0) {
7703 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7705 ctxt->sax->error(ctxt->userData,
7706 "extra content at the end of well balanced chunk\n");
7707 ctxt->wellFormed = 0;
7708 ctxt->disableSAX = 1;
7709 }
7710
7711 /*
7712 * SAX: end of the document processing.
7713 */
7714 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7715 (!ctxt->disableSAX))
7716 ctxt->sax->endDocument(ctxt->userData);
7717
7718 if (! ctxt->wellFormed) return(-1);
7719 return(0);
7720}
7721
7722/************************************************************************
7723 * *
7724 * Progressive parsing interfaces *
7725 * *
7726 ************************************************************************/
7727
7728/**
7729 * xmlParseLookupSequence:
7730 * @ctxt: an XML parser context
7731 * @first: the first char to lookup
7732 * @next: the next char to lookup or zero
7733 * @third: the next char to lookup or zero
7734 *
7735 * Try to find if a sequence (first, next, third) or just (first next) or
7736 * (first) is available in the input stream.
7737 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7738 * to avoid rescanning sequences of bytes, it DOES change the state of the
7739 * parser, do not use liberally.
7740 *
7741 * Returns the index to the current parsing point if the full sequence
7742 * is available, -1 otherwise.
7743 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007744static int
Owen Taylor3473f882001-02-23 17:55:21 +00007745xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7746 xmlChar next, xmlChar third) {
7747 int base, len;
7748 xmlParserInputPtr in;
7749 const xmlChar *buf;
7750
7751 in = ctxt->input;
7752 if (in == NULL) return(-1);
7753 base = in->cur - in->base;
7754 if (base < 0) return(-1);
7755 if (ctxt->checkIndex > base)
7756 base = ctxt->checkIndex;
7757 if (in->buf == NULL) {
7758 buf = in->base;
7759 len = in->length;
7760 } else {
7761 buf = in->buf->buffer->content;
7762 len = in->buf->buffer->use;
7763 }
7764 /* take into account the sequence length */
7765 if (third) len -= 2;
7766 else if (next) len --;
7767 for (;base < len;base++) {
7768 if (buf[base] == first) {
7769 if (third != 0) {
7770 if ((buf[base + 1] != next) ||
7771 (buf[base + 2] != third)) continue;
7772 } else if (next != 0) {
7773 if (buf[base + 1] != next) continue;
7774 }
7775 ctxt->checkIndex = 0;
7776#ifdef DEBUG_PUSH
7777 if (next == 0)
7778 xmlGenericError(xmlGenericErrorContext,
7779 "PP: lookup '%c' found at %d\n",
7780 first, base);
7781 else if (third == 0)
7782 xmlGenericError(xmlGenericErrorContext,
7783 "PP: lookup '%c%c' found at %d\n",
7784 first, next, base);
7785 else
7786 xmlGenericError(xmlGenericErrorContext,
7787 "PP: lookup '%c%c%c' found at %d\n",
7788 first, next, third, base);
7789#endif
7790 return(base - (in->cur - in->base));
7791 }
7792 }
7793 ctxt->checkIndex = base;
7794#ifdef DEBUG_PUSH
7795 if (next == 0)
7796 xmlGenericError(xmlGenericErrorContext,
7797 "PP: lookup '%c' failed\n", first);
7798 else if (third == 0)
7799 xmlGenericError(xmlGenericErrorContext,
7800 "PP: lookup '%c%c' failed\n", first, next);
7801 else
7802 xmlGenericError(xmlGenericErrorContext,
7803 "PP: lookup '%c%c%c' failed\n", first, next, third);
7804#endif
7805 return(-1);
7806}
7807
7808/**
7809 * xmlParseTryOrFinish:
7810 * @ctxt: an XML parser context
7811 * @terminate: last chunk indicator
7812 *
7813 * Try to progress on parsing
7814 *
7815 * Returns zero if no parsing was possible
7816 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007817static int
Owen Taylor3473f882001-02-23 17:55:21 +00007818xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7819 int ret = 0;
7820 int avail;
7821 xmlChar cur, next;
7822
7823#ifdef DEBUG_PUSH
7824 switch (ctxt->instate) {
7825 case XML_PARSER_EOF:
7826 xmlGenericError(xmlGenericErrorContext,
7827 "PP: try EOF\n"); break;
7828 case XML_PARSER_START:
7829 xmlGenericError(xmlGenericErrorContext,
7830 "PP: try START\n"); break;
7831 case XML_PARSER_MISC:
7832 xmlGenericError(xmlGenericErrorContext,
7833 "PP: try MISC\n");break;
7834 case XML_PARSER_COMMENT:
7835 xmlGenericError(xmlGenericErrorContext,
7836 "PP: try COMMENT\n");break;
7837 case XML_PARSER_PROLOG:
7838 xmlGenericError(xmlGenericErrorContext,
7839 "PP: try PROLOG\n");break;
7840 case XML_PARSER_START_TAG:
7841 xmlGenericError(xmlGenericErrorContext,
7842 "PP: try START_TAG\n");break;
7843 case XML_PARSER_CONTENT:
7844 xmlGenericError(xmlGenericErrorContext,
7845 "PP: try CONTENT\n");break;
7846 case XML_PARSER_CDATA_SECTION:
7847 xmlGenericError(xmlGenericErrorContext,
7848 "PP: try CDATA_SECTION\n");break;
7849 case XML_PARSER_END_TAG:
7850 xmlGenericError(xmlGenericErrorContext,
7851 "PP: try END_TAG\n");break;
7852 case XML_PARSER_ENTITY_DECL:
7853 xmlGenericError(xmlGenericErrorContext,
7854 "PP: try ENTITY_DECL\n");break;
7855 case XML_PARSER_ENTITY_VALUE:
7856 xmlGenericError(xmlGenericErrorContext,
7857 "PP: try ENTITY_VALUE\n");break;
7858 case XML_PARSER_ATTRIBUTE_VALUE:
7859 xmlGenericError(xmlGenericErrorContext,
7860 "PP: try ATTRIBUTE_VALUE\n");break;
7861 case XML_PARSER_DTD:
7862 xmlGenericError(xmlGenericErrorContext,
7863 "PP: try DTD\n");break;
7864 case XML_PARSER_EPILOG:
7865 xmlGenericError(xmlGenericErrorContext,
7866 "PP: try EPILOG\n");break;
7867 case XML_PARSER_PI:
7868 xmlGenericError(xmlGenericErrorContext,
7869 "PP: try PI\n");break;
7870 case XML_PARSER_IGNORE:
7871 xmlGenericError(xmlGenericErrorContext,
7872 "PP: try IGNORE\n");break;
7873 }
7874#endif
7875
7876 while (1) {
7877 /*
7878 * Pop-up of finished entities.
7879 */
7880 while ((RAW == 0) && (ctxt->inputNr > 1))
7881 xmlPopInput(ctxt);
7882
7883 if (ctxt->input ==NULL) break;
7884 if (ctxt->input->buf == NULL)
7885 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7886 else
7887 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7888 if (avail < 1)
7889 goto done;
7890 switch (ctxt->instate) {
7891 case XML_PARSER_EOF:
7892 /*
7893 * Document parsing is done !
7894 */
7895 goto done;
7896 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007897 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7898 xmlChar start[4];
7899 xmlCharEncoding enc;
7900
7901 /*
7902 * Very first chars read from the document flow.
7903 */
7904 if (avail < 4)
7905 goto done;
7906
7907 /*
7908 * Get the 4 first bytes and decode the charset
7909 * if enc != XML_CHAR_ENCODING_NONE
7910 * plug some encoding conversion routines.
7911 */
7912 start[0] = RAW;
7913 start[1] = NXT(1);
7914 start[2] = NXT(2);
7915 start[3] = NXT(3);
7916 enc = xmlDetectCharEncoding(start, 4);
7917 if (enc != XML_CHAR_ENCODING_NONE) {
7918 xmlSwitchEncoding(ctxt, enc);
7919 }
7920 break;
7921 }
Owen Taylor3473f882001-02-23 17:55:21 +00007922
7923 cur = ctxt->input->cur[0];
7924 next = ctxt->input->cur[1];
7925 if (cur == 0) {
7926 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7927 ctxt->sax->setDocumentLocator(ctxt->userData,
7928 &xmlDefaultSAXLocator);
7929 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7931 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7932 ctxt->wellFormed = 0;
7933 ctxt->disableSAX = 1;
7934 ctxt->instate = XML_PARSER_EOF;
7935#ifdef DEBUG_PUSH
7936 xmlGenericError(xmlGenericErrorContext,
7937 "PP: entering EOF\n");
7938#endif
7939 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7940 ctxt->sax->endDocument(ctxt->userData);
7941 goto done;
7942 }
7943 if ((cur == '<') && (next == '?')) {
7944 /* PI or XML decl */
7945 if (avail < 5) return(ret);
7946 if ((!terminate) &&
7947 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7948 return(ret);
7949 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7950 ctxt->sax->setDocumentLocator(ctxt->userData,
7951 &xmlDefaultSAXLocator);
7952 if ((ctxt->input->cur[2] == 'x') &&
7953 (ctxt->input->cur[3] == 'm') &&
7954 (ctxt->input->cur[4] == 'l') &&
7955 (IS_BLANK(ctxt->input->cur[5]))) {
7956 ret += 5;
7957#ifdef DEBUG_PUSH
7958 xmlGenericError(xmlGenericErrorContext,
7959 "PP: Parsing XML Decl\n");
7960#endif
7961 xmlParseXMLDecl(ctxt);
7962 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7963 /*
7964 * The XML REC instructs us to stop parsing right
7965 * here
7966 */
7967 ctxt->instate = XML_PARSER_EOF;
7968 return(0);
7969 }
7970 ctxt->standalone = ctxt->input->standalone;
7971 if ((ctxt->encoding == NULL) &&
7972 (ctxt->input->encoding != NULL))
7973 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7974 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7975 (!ctxt->disableSAX))
7976 ctxt->sax->startDocument(ctxt->userData);
7977 ctxt->instate = XML_PARSER_MISC;
7978#ifdef DEBUG_PUSH
7979 xmlGenericError(xmlGenericErrorContext,
7980 "PP: entering MISC\n");
7981#endif
7982 } else {
7983 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7984 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7985 (!ctxt->disableSAX))
7986 ctxt->sax->startDocument(ctxt->userData);
7987 ctxt->instate = XML_PARSER_MISC;
7988#ifdef DEBUG_PUSH
7989 xmlGenericError(xmlGenericErrorContext,
7990 "PP: entering MISC\n");
7991#endif
7992 }
7993 } else {
7994 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7995 ctxt->sax->setDocumentLocator(ctxt->userData,
7996 &xmlDefaultSAXLocator);
7997 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7998 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7999 (!ctxt->disableSAX))
8000 ctxt->sax->startDocument(ctxt->userData);
8001 ctxt->instate = XML_PARSER_MISC;
8002#ifdef DEBUG_PUSH
8003 xmlGenericError(xmlGenericErrorContext,
8004 "PP: entering MISC\n");
8005#endif
8006 }
8007 break;
8008 case XML_PARSER_MISC:
8009 SKIP_BLANKS;
8010 if (ctxt->input->buf == NULL)
8011 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8012 else
8013 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8014 if (avail < 2)
8015 goto done;
8016 cur = ctxt->input->cur[0];
8017 next = ctxt->input->cur[1];
8018 if ((cur == '<') && (next == '?')) {
8019 if ((!terminate) &&
8020 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8021 goto done;
8022#ifdef DEBUG_PUSH
8023 xmlGenericError(xmlGenericErrorContext,
8024 "PP: Parsing PI\n");
8025#endif
8026 xmlParsePI(ctxt);
8027 } else if ((cur == '<') && (next == '!') &&
8028 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8029 if ((!terminate) &&
8030 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8031 goto done;
8032#ifdef DEBUG_PUSH
8033 xmlGenericError(xmlGenericErrorContext,
8034 "PP: Parsing Comment\n");
8035#endif
8036 xmlParseComment(ctxt);
8037 ctxt->instate = XML_PARSER_MISC;
8038 } else if ((cur == '<') && (next == '!') &&
8039 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8040 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8041 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8042 (ctxt->input->cur[8] == 'E')) {
8043 if ((!terminate) &&
8044 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8045 goto done;
8046#ifdef DEBUG_PUSH
8047 xmlGenericError(xmlGenericErrorContext,
8048 "PP: Parsing internal subset\n");
8049#endif
8050 ctxt->inSubset = 1;
8051 xmlParseDocTypeDecl(ctxt);
8052 if (RAW == '[') {
8053 ctxt->instate = XML_PARSER_DTD;
8054#ifdef DEBUG_PUSH
8055 xmlGenericError(xmlGenericErrorContext,
8056 "PP: entering DTD\n");
8057#endif
8058 } else {
8059 /*
8060 * Create and update the external subset.
8061 */
8062 ctxt->inSubset = 2;
8063 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8064 (ctxt->sax->externalSubset != NULL))
8065 ctxt->sax->externalSubset(ctxt->userData,
8066 ctxt->intSubName, ctxt->extSubSystem,
8067 ctxt->extSubURI);
8068 ctxt->inSubset = 0;
8069 ctxt->instate = XML_PARSER_PROLOG;
8070#ifdef DEBUG_PUSH
8071 xmlGenericError(xmlGenericErrorContext,
8072 "PP: entering PROLOG\n");
8073#endif
8074 }
8075 } else if ((cur == '<') && (next == '!') &&
8076 (avail < 9)) {
8077 goto done;
8078 } else {
8079 ctxt->instate = XML_PARSER_START_TAG;
8080#ifdef DEBUG_PUSH
8081 xmlGenericError(xmlGenericErrorContext,
8082 "PP: entering START_TAG\n");
8083#endif
8084 }
8085 break;
8086 case XML_PARSER_IGNORE:
8087 xmlGenericError(xmlGenericErrorContext,
8088 "PP: internal error, state == IGNORE");
8089 ctxt->instate = XML_PARSER_DTD;
8090#ifdef DEBUG_PUSH
8091 xmlGenericError(xmlGenericErrorContext,
8092 "PP: entering DTD\n");
8093#endif
8094 break;
8095 case XML_PARSER_PROLOG:
8096 SKIP_BLANKS;
8097 if (ctxt->input->buf == NULL)
8098 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8099 else
8100 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8101 if (avail < 2)
8102 goto done;
8103 cur = ctxt->input->cur[0];
8104 next = ctxt->input->cur[1];
8105 if ((cur == '<') && (next == '?')) {
8106 if ((!terminate) &&
8107 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8108 goto done;
8109#ifdef DEBUG_PUSH
8110 xmlGenericError(xmlGenericErrorContext,
8111 "PP: Parsing PI\n");
8112#endif
8113 xmlParsePI(ctxt);
8114 } else if ((cur == '<') && (next == '!') &&
8115 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8116 if ((!terminate) &&
8117 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8118 goto done;
8119#ifdef DEBUG_PUSH
8120 xmlGenericError(xmlGenericErrorContext,
8121 "PP: Parsing Comment\n");
8122#endif
8123 xmlParseComment(ctxt);
8124 ctxt->instate = XML_PARSER_PROLOG;
8125 } else if ((cur == '<') && (next == '!') &&
8126 (avail < 4)) {
8127 goto done;
8128 } else {
8129 ctxt->instate = XML_PARSER_START_TAG;
8130#ifdef DEBUG_PUSH
8131 xmlGenericError(xmlGenericErrorContext,
8132 "PP: entering START_TAG\n");
8133#endif
8134 }
8135 break;
8136 case XML_PARSER_EPILOG:
8137 SKIP_BLANKS;
8138 if (ctxt->input->buf == NULL)
8139 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8140 else
8141 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8142 if (avail < 2)
8143 goto done;
8144 cur = ctxt->input->cur[0];
8145 next = ctxt->input->cur[1];
8146 if ((cur == '<') && (next == '?')) {
8147 if ((!terminate) &&
8148 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8149 goto done;
8150#ifdef DEBUG_PUSH
8151 xmlGenericError(xmlGenericErrorContext,
8152 "PP: Parsing PI\n");
8153#endif
8154 xmlParsePI(ctxt);
8155 ctxt->instate = XML_PARSER_EPILOG;
8156 } else if ((cur == '<') && (next == '!') &&
8157 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8158 if ((!terminate) &&
8159 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8160 goto done;
8161#ifdef DEBUG_PUSH
8162 xmlGenericError(xmlGenericErrorContext,
8163 "PP: Parsing Comment\n");
8164#endif
8165 xmlParseComment(ctxt);
8166 ctxt->instate = XML_PARSER_EPILOG;
8167 } else if ((cur == '<') && (next == '!') &&
8168 (avail < 4)) {
8169 goto done;
8170 } else {
8171 ctxt->errNo = XML_ERR_DOCUMENT_END;
8172 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8173 ctxt->sax->error(ctxt->userData,
8174 "Extra content at the end of the document\n");
8175 ctxt->wellFormed = 0;
8176 ctxt->disableSAX = 1;
8177 ctxt->instate = XML_PARSER_EOF;
8178#ifdef DEBUG_PUSH
8179 xmlGenericError(xmlGenericErrorContext,
8180 "PP: entering EOF\n");
8181#endif
8182 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8183 (!ctxt->disableSAX))
8184 ctxt->sax->endDocument(ctxt->userData);
8185 goto done;
8186 }
8187 break;
8188 case XML_PARSER_START_TAG: {
8189 xmlChar *name, *oldname;
8190
8191 if ((avail < 2) && (ctxt->inputNr == 1))
8192 goto done;
8193 cur = ctxt->input->cur[0];
8194 if (cur != '<') {
8195 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8197 ctxt->sax->error(ctxt->userData,
8198 "Start tag expect, '<' not found\n");
8199 ctxt->wellFormed = 0;
8200 ctxt->disableSAX = 1;
8201 ctxt->instate = XML_PARSER_EOF;
8202#ifdef DEBUG_PUSH
8203 xmlGenericError(xmlGenericErrorContext,
8204 "PP: entering EOF\n");
8205#endif
8206 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8207 (!ctxt->disableSAX))
8208 ctxt->sax->endDocument(ctxt->userData);
8209 goto done;
8210 }
8211 if ((!terminate) &&
8212 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8213 goto done;
8214 if (ctxt->spaceNr == 0)
8215 spacePush(ctxt, -1);
8216 else
8217 spacePush(ctxt, *ctxt->space);
8218 name = xmlParseStartTag(ctxt);
8219 if (name == NULL) {
8220 spacePop(ctxt);
8221 ctxt->instate = XML_PARSER_EOF;
8222#ifdef DEBUG_PUSH
8223 xmlGenericError(xmlGenericErrorContext,
8224 "PP: entering EOF\n");
8225#endif
8226 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8227 (!ctxt->disableSAX))
8228 ctxt->sax->endDocument(ctxt->userData);
8229 goto done;
8230 }
8231 namePush(ctxt, xmlStrdup(name));
8232
8233 /*
8234 * [ VC: Root Element Type ]
8235 * The Name in the document type declaration must match
8236 * the element type of the root element.
8237 */
8238 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8239 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8240 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8241
8242 /*
8243 * Check for an Empty Element.
8244 */
8245 if ((RAW == '/') && (NXT(1) == '>')) {
8246 SKIP(2);
8247 if ((ctxt->sax != NULL) &&
8248 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8249 ctxt->sax->endElement(ctxt->userData, name);
8250 xmlFree(name);
8251 oldname = namePop(ctxt);
8252 spacePop(ctxt);
8253 if (oldname != NULL) {
8254#ifdef DEBUG_STACK
8255 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8256#endif
8257 xmlFree(oldname);
8258 }
8259 if (ctxt->name == NULL) {
8260 ctxt->instate = XML_PARSER_EPILOG;
8261#ifdef DEBUG_PUSH
8262 xmlGenericError(xmlGenericErrorContext,
8263 "PP: entering EPILOG\n");
8264#endif
8265 } else {
8266 ctxt->instate = XML_PARSER_CONTENT;
8267#ifdef DEBUG_PUSH
8268 xmlGenericError(xmlGenericErrorContext,
8269 "PP: entering CONTENT\n");
8270#endif
8271 }
8272 break;
8273 }
8274 if (RAW == '>') {
8275 NEXT;
8276 } else {
8277 ctxt->errNo = XML_ERR_GT_REQUIRED;
8278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8279 ctxt->sax->error(ctxt->userData,
8280 "Couldn't find end of Start Tag %s\n",
8281 name);
8282 ctxt->wellFormed = 0;
8283 ctxt->disableSAX = 1;
8284
8285 /*
8286 * end of parsing of this node.
8287 */
8288 nodePop(ctxt);
8289 oldname = namePop(ctxt);
8290 spacePop(ctxt);
8291 if (oldname != NULL) {
8292#ifdef DEBUG_STACK
8293 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8294#endif
8295 xmlFree(oldname);
8296 }
8297 }
8298 xmlFree(name);
8299 ctxt->instate = XML_PARSER_CONTENT;
8300#ifdef DEBUG_PUSH
8301 xmlGenericError(xmlGenericErrorContext,
8302 "PP: entering CONTENT\n");
8303#endif
8304 break;
8305 }
8306 case XML_PARSER_CONTENT: {
8307 const xmlChar *test;
8308 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008309 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008310
8311 /*
8312 * Handle preparsed entities and charRef
8313 */
8314 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008315 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008316
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008317 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008318 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8319 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008320 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008321 ctxt->token = 0;
8322 }
8323 if ((avail < 2) && (ctxt->inputNr == 1))
8324 goto done;
8325 cur = ctxt->input->cur[0];
8326 next = ctxt->input->cur[1];
8327
8328 test = CUR_PTR;
8329 cons = ctxt->input->consumed;
8330 tok = ctxt->token;
8331 if ((cur == '<') && (next == '?')) {
8332 if ((!terminate) &&
8333 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8334 goto done;
8335#ifdef DEBUG_PUSH
8336 xmlGenericError(xmlGenericErrorContext,
8337 "PP: Parsing PI\n");
8338#endif
8339 xmlParsePI(ctxt);
8340 } else if ((cur == '<') && (next == '!') &&
8341 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8342 if ((!terminate) &&
8343 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8344 goto done;
8345#ifdef DEBUG_PUSH
8346 xmlGenericError(xmlGenericErrorContext,
8347 "PP: Parsing Comment\n");
8348#endif
8349 xmlParseComment(ctxt);
8350 ctxt->instate = XML_PARSER_CONTENT;
8351 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8352 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8353 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8354 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8355 (ctxt->input->cur[8] == '[')) {
8356 SKIP(9);
8357 ctxt->instate = XML_PARSER_CDATA_SECTION;
8358#ifdef DEBUG_PUSH
8359 xmlGenericError(xmlGenericErrorContext,
8360 "PP: entering CDATA_SECTION\n");
8361#endif
8362 break;
8363 } else if ((cur == '<') && (next == '!') &&
8364 (avail < 9)) {
8365 goto done;
8366 } else if ((cur == '<') && (next == '/')) {
8367 ctxt->instate = XML_PARSER_END_TAG;
8368#ifdef DEBUG_PUSH
8369 xmlGenericError(xmlGenericErrorContext,
8370 "PP: entering END_TAG\n");
8371#endif
8372 break;
8373 } else if (cur == '<') {
8374 ctxt->instate = XML_PARSER_START_TAG;
8375#ifdef DEBUG_PUSH
8376 xmlGenericError(xmlGenericErrorContext,
8377 "PP: entering START_TAG\n");
8378#endif
8379 break;
8380 } else if (cur == '&') {
8381 if ((!terminate) &&
8382 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8383 goto done;
8384#ifdef DEBUG_PUSH
8385 xmlGenericError(xmlGenericErrorContext,
8386 "PP: Parsing Reference\n");
8387#endif
8388 xmlParseReference(ctxt);
8389 } else {
8390 /* TODO Avoid the extra copy, handle directly !!! */
8391 /*
8392 * Goal of the following test is:
8393 * - minimize calls to the SAX 'character' callback
8394 * when they are mergeable
8395 * - handle an problem for isBlank when we only parse
8396 * a sequence of blank chars and the next one is
8397 * not available to check against '<' presence.
8398 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008399 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008400 * of the parser.
8401 */
8402 if ((ctxt->inputNr == 1) &&
8403 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8404 if ((!terminate) &&
8405 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8406 goto done;
8407 }
8408 ctxt->checkIndex = 0;
8409#ifdef DEBUG_PUSH
8410 xmlGenericError(xmlGenericErrorContext,
8411 "PP: Parsing char data\n");
8412#endif
8413 xmlParseCharData(ctxt, 0);
8414 }
8415 /*
8416 * Pop-up of finished entities.
8417 */
8418 while ((RAW == 0) && (ctxt->inputNr > 1))
8419 xmlPopInput(ctxt);
8420 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8421 (tok == ctxt->token)) {
8422 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8423 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8424 ctxt->sax->error(ctxt->userData,
8425 "detected an error in element content\n");
8426 ctxt->wellFormed = 0;
8427 ctxt->disableSAX = 1;
8428 ctxt->instate = XML_PARSER_EOF;
8429 break;
8430 }
8431 break;
8432 }
8433 case XML_PARSER_CDATA_SECTION: {
8434 /*
8435 * The Push mode need to have the SAX callback for
8436 * cdataBlock merge back contiguous callbacks.
8437 */
8438 int base;
8439
8440 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8441 if (base < 0) {
8442 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8443 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8444 if (ctxt->sax->cdataBlock != NULL)
8445 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8446 XML_PARSER_BIG_BUFFER_SIZE);
8447 }
8448 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8449 ctxt->checkIndex = 0;
8450 }
8451 goto done;
8452 } else {
8453 if ((ctxt->sax != NULL) && (base > 0) &&
8454 (!ctxt->disableSAX)) {
8455 if (ctxt->sax->cdataBlock != NULL)
8456 ctxt->sax->cdataBlock(ctxt->userData,
8457 ctxt->input->cur, base);
8458 }
8459 SKIP(base + 3);
8460 ctxt->checkIndex = 0;
8461 ctxt->instate = XML_PARSER_CONTENT;
8462#ifdef DEBUG_PUSH
8463 xmlGenericError(xmlGenericErrorContext,
8464 "PP: entering CONTENT\n");
8465#endif
8466 }
8467 break;
8468 }
8469 case XML_PARSER_END_TAG:
8470 if (avail < 2)
8471 goto done;
8472 if ((!terminate) &&
8473 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8474 goto done;
8475 xmlParseEndTag(ctxt);
8476 if (ctxt->name == NULL) {
8477 ctxt->instate = XML_PARSER_EPILOG;
8478#ifdef DEBUG_PUSH
8479 xmlGenericError(xmlGenericErrorContext,
8480 "PP: entering EPILOG\n");
8481#endif
8482 } else {
8483 ctxt->instate = XML_PARSER_CONTENT;
8484#ifdef DEBUG_PUSH
8485 xmlGenericError(xmlGenericErrorContext,
8486 "PP: entering CONTENT\n");
8487#endif
8488 }
8489 break;
8490 case XML_PARSER_DTD: {
8491 /*
8492 * Sorry but progressive parsing of the internal subset
8493 * is not expected to be supported. We first check that
8494 * the full content of the internal subset is available and
8495 * the parsing is launched only at that point.
8496 * Internal subset ends up with "']' S? '>'" in an unescaped
8497 * section and not in a ']]>' sequence which are conditional
8498 * sections (whoever argued to keep that crap in XML deserve
8499 * a place in hell !).
8500 */
8501 int base, i;
8502 xmlChar *buf;
8503 xmlChar quote = 0;
8504
8505 base = ctxt->input->cur - ctxt->input->base;
8506 if (base < 0) return(0);
8507 if (ctxt->checkIndex > base)
8508 base = ctxt->checkIndex;
8509 buf = ctxt->input->buf->buffer->content;
8510 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8511 base++) {
8512 if (quote != 0) {
8513 if (buf[base] == quote)
8514 quote = 0;
8515 continue;
8516 }
8517 if (buf[base] == '"') {
8518 quote = '"';
8519 continue;
8520 }
8521 if (buf[base] == '\'') {
8522 quote = '\'';
8523 continue;
8524 }
8525 if (buf[base] == ']') {
8526 if ((unsigned int) base +1 >=
8527 ctxt->input->buf->buffer->use)
8528 break;
8529 if (buf[base + 1] == ']') {
8530 /* conditional crap, skip both ']' ! */
8531 base++;
8532 continue;
8533 }
8534 for (i = 0;
8535 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8536 i++) {
8537 if (buf[base + i] == '>')
8538 goto found_end_int_subset;
8539 }
8540 break;
8541 }
8542 }
8543 /*
8544 * We didn't found the end of the Internal subset
8545 */
8546 if (quote == 0)
8547 ctxt->checkIndex = base;
8548#ifdef DEBUG_PUSH
8549 if (next == 0)
8550 xmlGenericError(xmlGenericErrorContext,
8551 "PP: lookup of int subset end filed\n");
8552#endif
8553 goto done;
8554
8555found_end_int_subset:
8556 xmlParseInternalSubset(ctxt);
8557 ctxt->inSubset = 2;
8558 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8559 (ctxt->sax->externalSubset != NULL))
8560 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8561 ctxt->extSubSystem, ctxt->extSubURI);
8562 ctxt->inSubset = 0;
8563 ctxt->instate = XML_PARSER_PROLOG;
8564 ctxt->checkIndex = 0;
8565#ifdef DEBUG_PUSH
8566 xmlGenericError(xmlGenericErrorContext,
8567 "PP: entering PROLOG\n");
8568#endif
8569 break;
8570 }
8571 case XML_PARSER_COMMENT:
8572 xmlGenericError(xmlGenericErrorContext,
8573 "PP: internal error, state == COMMENT\n");
8574 ctxt->instate = XML_PARSER_CONTENT;
8575#ifdef DEBUG_PUSH
8576 xmlGenericError(xmlGenericErrorContext,
8577 "PP: entering CONTENT\n");
8578#endif
8579 break;
8580 case XML_PARSER_PI:
8581 xmlGenericError(xmlGenericErrorContext,
8582 "PP: internal error, state == PI\n");
8583 ctxt->instate = XML_PARSER_CONTENT;
8584#ifdef DEBUG_PUSH
8585 xmlGenericError(xmlGenericErrorContext,
8586 "PP: entering CONTENT\n");
8587#endif
8588 break;
8589 case XML_PARSER_ENTITY_DECL:
8590 xmlGenericError(xmlGenericErrorContext,
8591 "PP: internal error, state == ENTITY_DECL\n");
8592 ctxt->instate = XML_PARSER_DTD;
8593#ifdef DEBUG_PUSH
8594 xmlGenericError(xmlGenericErrorContext,
8595 "PP: entering DTD\n");
8596#endif
8597 break;
8598 case XML_PARSER_ENTITY_VALUE:
8599 xmlGenericError(xmlGenericErrorContext,
8600 "PP: internal error, state == ENTITY_VALUE\n");
8601 ctxt->instate = XML_PARSER_CONTENT;
8602#ifdef DEBUG_PUSH
8603 xmlGenericError(xmlGenericErrorContext,
8604 "PP: entering DTD\n");
8605#endif
8606 break;
8607 case XML_PARSER_ATTRIBUTE_VALUE:
8608 xmlGenericError(xmlGenericErrorContext,
8609 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8610 ctxt->instate = XML_PARSER_START_TAG;
8611#ifdef DEBUG_PUSH
8612 xmlGenericError(xmlGenericErrorContext,
8613 "PP: entering START_TAG\n");
8614#endif
8615 break;
8616 case XML_PARSER_SYSTEM_LITERAL:
8617 xmlGenericError(xmlGenericErrorContext,
8618 "PP: internal error, state == SYSTEM_LITERAL\n");
8619 ctxt->instate = XML_PARSER_START_TAG;
8620#ifdef DEBUG_PUSH
8621 xmlGenericError(xmlGenericErrorContext,
8622 "PP: entering START_TAG\n");
8623#endif
8624 break;
8625 }
8626 }
8627done:
8628#ifdef DEBUG_PUSH
8629 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8630#endif
8631 return(ret);
8632}
8633
8634/**
Owen Taylor3473f882001-02-23 17:55:21 +00008635 * xmlParseChunk:
8636 * @ctxt: an XML parser context
8637 * @chunk: an char array
8638 * @size: the size in byte of the chunk
8639 * @terminate: last chunk indicator
8640 *
8641 * Parse a Chunk of memory
8642 *
8643 * Returns zero if no error, the xmlParserErrors otherwise.
8644 */
8645int
8646xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8647 int terminate) {
8648 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8649 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8650 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8651 int cur = ctxt->input->cur - ctxt->input->base;
8652
8653 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8654 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8655 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008656 ctxt->input->end =
8657 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008658#ifdef DEBUG_PUSH
8659 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8660#endif
8661
8662 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8663 xmlParseTryOrFinish(ctxt, terminate);
8664 } else if (ctxt->instate != XML_PARSER_EOF) {
8665 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8666 xmlParserInputBufferPtr in = ctxt->input->buf;
8667 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8668 (in->raw != NULL)) {
8669 int nbchars;
8670
8671 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8672 if (nbchars < 0) {
8673 xmlGenericError(xmlGenericErrorContext,
8674 "xmlParseChunk: encoder error\n");
8675 return(XML_ERR_INVALID_ENCODING);
8676 }
8677 }
8678 }
8679 }
8680 xmlParseTryOrFinish(ctxt, terminate);
8681 if (terminate) {
8682 /*
8683 * Check for termination
8684 */
8685 if ((ctxt->instate != XML_PARSER_EOF) &&
8686 (ctxt->instate != XML_PARSER_EPILOG)) {
8687 ctxt->errNo = XML_ERR_DOCUMENT_END;
8688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8689 ctxt->sax->error(ctxt->userData,
8690 "Extra content at the end of the document\n");
8691 ctxt->wellFormed = 0;
8692 ctxt->disableSAX = 1;
8693 }
8694 if (ctxt->instate != XML_PARSER_EOF) {
8695 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8696 (!ctxt->disableSAX))
8697 ctxt->sax->endDocument(ctxt->userData);
8698 }
8699 ctxt->instate = XML_PARSER_EOF;
8700 }
8701 return((xmlParserErrors) ctxt->errNo);
8702}
8703
8704/************************************************************************
8705 * *
8706 * I/O front end functions to the parser *
8707 * *
8708 ************************************************************************/
8709
8710/**
8711 * xmlStopParser:
8712 * @ctxt: an XML parser context
8713 *
8714 * Blocks further parser processing
8715 */
8716void
8717xmlStopParser(xmlParserCtxtPtr ctxt) {
8718 ctxt->instate = XML_PARSER_EOF;
8719 if (ctxt->input != NULL)
8720 ctxt->input->cur = BAD_CAST"";
8721}
8722
8723/**
8724 * xmlCreatePushParserCtxt:
8725 * @sax: a SAX handler
8726 * @user_data: The user data returned on SAX callbacks
8727 * @chunk: a pointer to an array of chars
8728 * @size: number of chars in the array
8729 * @filename: an optional file name or URI
8730 *
8731 * Create a parser context for using the XML parser in push mode
8732 * To allow content encoding detection, @size should be >= 4
8733 * The value of @filename is used for fetching external entities
8734 * and error/warning reports.
8735 *
8736 * Returns the new parser context or NULL
8737 */
8738xmlParserCtxtPtr
8739xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8740 const char *chunk, int size, const char *filename) {
8741 xmlParserCtxtPtr ctxt;
8742 xmlParserInputPtr inputStream;
8743 xmlParserInputBufferPtr buf;
8744 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8745
8746 /*
8747 * plug some encoding conversion routines
8748 */
8749 if ((chunk != NULL) && (size >= 4))
8750 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8751
8752 buf = xmlAllocParserInputBuffer(enc);
8753 if (buf == NULL) return(NULL);
8754
8755 ctxt = xmlNewParserCtxt();
8756 if (ctxt == NULL) {
8757 xmlFree(buf);
8758 return(NULL);
8759 }
8760 if (sax != NULL) {
8761 if (ctxt->sax != &xmlDefaultSAXHandler)
8762 xmlFree(ctxt->sax);
8763 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8764 if (ctxt->sax == NULL) {
8765 xmlFree(buf);
8766 xmlFree(ctxt);
8767 return(NULL);
8768 }
8769 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8770 if (user_data != NULL)
8771 ctxt->userData = user_data;
8772 }
8773 if (filename == NULL) {
8774 ctxt->directory = NULL;
8775 } else {
8776 ctxt->directory = xmlParserGetDirectory(filename);
8777 }
8778
8779 inputStream = xmlNewInputStream(ctxt);
8780 if (inputStream == NULL) {
8781 xmlFreeParserCtxt(ctxt);
8782 return(NULL);
8783 }
8784
8785 if (filename == NULL)
8786 inputStream->filename = NULL;
8787 else
8788 inputStream->filename = xmlMemStrdup(filename);
8789 inputStream->buf = buf;
8790 inputStream->base = inputStream->buf->buffer->content;
8791 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008792 inputStream->end =
8793 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008794
8795 inputPush(ctxt, inputStream);
8796
8797 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8798 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008799 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8800 int cur = ctxt->input->cur - ctxt->input->base;
8801
Owen Taylor3473f882001-02-23 17:55:21 +00008802 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008803
8804 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8805 ctxt->input->cur = ctxt->input->base + cur;
8806 ctxt->input->end =
8807 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008808#ifdef DEBUG_PUSH
8809 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8810#endif
8811 }
8812
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008813 if (enc != XML_CHAR_ENCODING_NONE) {
8814 xmlSwitchEncoding(ctxt, enc);
8815 }
8816
Owen Taylor3473f882001-02-23 17:55:21 +00008817 return(ctxt);
8818}
8819
8820/**
8821 * xmlCreateIOParserCtxt:
8822 * @sax: a SAX handler
8823 * @user_data: The user data returned on SAX callbacks
8824 * @ioread: an I/O read function
8825 * @ioclose: an I/O close function
8826 * @ioctx: an I/O handler
8827 * @enc: the charset encoding if known
8828 *
8829 * Create a parser context for using the XML parser with an existing
8830 * I/O stream
8831 *
8832 * Returns the new parser context or NULL
8833 */
8834xmlParserCtxtPtr
8835xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8836 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8837 void *ioctx, xmlCharEncoding enc) {
8838 xmlParserCtxtPtr ctxt;
8839 xmlParserInputPtr inputStream;
8840 xmlParserInputBufferPtr buf;
8841
8842 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8843 if (buf == NULL) return(NULL);
8844
8845 ctxt = xmlNewParserCtxt();
8846 if (ctxt == NULL) {
8847 xmlFree(buf);
8848 return(NULL);
8849 }
8850 if (sax != NULL) {
8851 if (ctxt->sax != &xmlDefaultSAXHandler)
8852 xmlFree(ctxt->sax);
8853 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8854 if (ctxt->sax == NULL) {
8855 xmlFree(buf);
8856 xmlFree(ctxt);
8857 return(NULL);
8858 }
8859 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8860 if (user_data != NULL)
8861 ctxt->userData = user_data;
8862 }
8863
8864 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8865 if (inputStream == NULL) {
8866 xmlFreeParserCtxt(ctxt);
8867 return(NULL);
8868 }
8869 inputPush(ctxt, inputStream);
8870
8871 return(ctxt);
8872}
8873
8874/************************************************************************
8875 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008876 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00008877 * *
8878 ************************************************************************/
8879
8880/**
8881 * xmlIOParseDTD:
8882 * @sax: the SAX handler block or NULL
8883 * @input: an Input Buffer
8884 * @enc: the charset encoding if known
8885 *
8886 * Load and parse a DTD
8887 *
8888 * Returns the resulting xmlDtdPtr or NULL in case of error.
8889 * @input will be freed at parsing end.
8890 */
8891
8892xmlDtdPtr
8893xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8894 xmlCharEncoding enc) {
8895 xmlDtdPtr ret = NULL;
8896 xmlParserCtxtPtr ctxt;
8897 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008898 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008899
8900 if (input == NULL)
8901 return(NULL);
8902
8903 ctxt = xmlNewParserCtxt();
8904 if (ctxt == NULL) {
8905 return(NULL);
8906 }
8907
8908 /*
8909 * Set-up the SAX context
8910 */
8911 if (sax != NULL) {
8912 if (ctxt->sax != NULL)
8913 xmlFree(ctxt->sax);
8914 ctxt->sax = sax;
8915 ctxt->userData = NULL;
8916 }
8917
8918 /*
8919 * generate a parser input from the I/O handler
8920 */
8921
8922 pinput = xmlNewIOInputStream(ctxt, input, enc);
8923 if (pinput == NULL) {
8924 if (sax != NULL) ctxt->sax = NULL;
8925 xmlFreeParserCtxt(ctxt);
8926 return(NULL);
8927 }
8928
8929 /*
8930 * plug some encoding conversion routines here.
8931 */
8932 xmlPushInput(ctxt, pinput);
8933
8934 pinput->filename = NULL;
8935 pinput->line = 1;
8936 pinput->col = 1;
8937 pinput->base = ctxt->input->cur;
8938 pinput->cur = ctxt->input->cur;
8939 pinput->free = NULL;
8940
8941 /*
8942 * let's parse that entity knowing it's an external subset.
8943 */
8944 ctxt->inSubset = 2;
8945 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8946 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8947 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008948
8949 if (enc == XML_CHAR_ENCODING_NONE) {
8950 /*
8951 * Get the 4 first bytes and decode the charset
8952 * if enc != XML_CHAR_ENCODING_NONE
8953 * plug some encoding conversion routines.
8954 */
8955 start[0] = RAW;
8956 start[1] = NXT(1);
8957 start[2] = NXT(2);
8958 start[3] = NXT(3);
8959 enc = xmlDetectCharEncoding(start, 4);
8960 if (enc != XML_CHAR_ENCODING_NONE) {
8961 xmlSwitchEncoding(ctxt, enc);
8962 }
8963 }
8964
Owen Taylor3473f882001-02-23 17:55:21 +00008965 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8966
8967 if (ctxt->myDoc != NULL) {
8968 if (ctxt->wellFormed) {
8969 ret = ctxt->myDoc->extSubset;
8970 ctxt->myDoc->extSubset = NULL;
8971 } else {
8972 ret = NULL;
8973 }
8974 xmlFreeDoc(ctxt->myDoc);
8975 ctxt->myDoc = NULL;
8976 }
8977 if (sax != NULL) ctxt->sax = NULL;
8978 xmlFreeParserCtxt(ctxt);
8979
8980 return(ret);
8981}
8982
8983/**
8984 * xmlSAXParseDTD:
8985 * @sax: the SAX handler block
8986 * @ExternalID: a NAME* containing the External ID of the DTD
8987 * @SystemID: a NAME* containing the URL to the DTD
8988 *
8989 * Load and parse an external subset.
8990 *
8991 * Returns the resulting xmlDtdPtr or NULL in case of error.
8992 */
8993
8994xmlDtdPtr
8995xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8996 const xmlChar *SystemID) {
8997 xmlDtdPtr ret = NULL;
8998 xmlParserCtxtPtr ctxt;
8999 xmlParserInputPtr input = NULL;
9000 xmlCharEncoding enc;
9001
9002 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9003
9004 ctxt = xmlNewParserCtxt();
9005 if (ctxt == NULL) {
9006 return(NULL);
9007 }
9008
9009 /*
9010 * Set-up the SAX context
9011 */
9012 if (sax != NULL) {
9013 if (ctxt->sax != NULL)
9014 xmlFree(ctxt->sax);
9015 ctxt->sax = sax;
9016 ctxt->userData = NULL;
9017 }
9018
9019 /*
9020 * Ask the Entity resolver to load the damn thing
9021 */
9022
9023 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9024 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9025 if (input == NULL) {
9026 if (sax != NULL) ctxt->sax = NULL;
9027 xmlFreeParserCtxt(ctxt);
9028 return(NULL);
9029 }
9030
9031 /*
9032 * plug some encoding conversion routines here.
9033 */
9034 xmlPushInput(ctxt, input);
9035 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9036 xmlSwitchEncoding(ctxt, enc);
9037
9038 if (input->filename == NULL)
9039 input->filename = (char *) xmlStrdup(SystemID);
9040 input->line = 1;
9041 input->col = 1;
9042 input->base = ctxt->input->cur;
9043 input->cur = ctxt->input->cur;
9044 input->free = NULL;
9045
9046 /*
9047 * let's parse that entity knowing it's an external subset.
9048 */
9049 ctxt->inSubset = 2;
9050 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9051 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9052 ExternalID, SystemID);
9053 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9054
9055 if (ctxt->myDoc != NULL) {
9056 if (ctxt->wellFormed) {
9057 ret = ctxt->myDoc->extSubset;
9058 ctxt->myDoc->extSubset = NULL;
9059 } else {
9060 ret = NULL;
9061 }
9062 xmlFreeDoc(ctxt->myDoc);
9063 ctxt->myDoc = NULL;
9064 }
9065 if (sax != NULL) ctxt->sax = NULL;
9066 xmlFreeParserCtxt(ctxt);
9067
9068 return(ret);
9069}
9070
9071/**
9072 * xmlParseDTD:
9073 * @ExternalID: a NAME* containing the External ID of the DTD
9074 * @SystemID: a NAME* containing the URL to the DTD
9075 *
9076 * Load and parse an external subset.
9077 *
9078 * Returns the resulting xmlDtdPtr or NULL in case of error.
9079 */
9080
9081xmlDtdPtr
9082xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9083 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9084}
9085
9086/************************************************************************
9087 * *
9088 * Front ends when parsing an Entity *
9089 * *
9090 ************************************************************************/
9091
9092/**
Owen Taylor3473f882001-02-23 17:55:21 +00009093 * xmlParseCtxtExternalEntity:
9094 * @ctx: the existing parsing context
9095 * @URL: the URL for the entity to load
9096 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009097 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009098 *
9099 * Parse an external general entity within an existing parsing context
9100 * An external general parsed entity is well-formed if it matches the
9101 * production labeled extParsedEnt.
9102 *
9103 * [78] extParsedEnt ::= TextDecl? content
9104 *
9105 * Returns 0 if the entity is well formed, -1 in case of args problem and
9106 * the parser error code otherwise
9107 */
9108
9109int
9110xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009111 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009112 xmlParserCtxtPtr ctxt;
9113 xmlDocPtr newDoc;
9114 xmlSAXHandlerPtr oldsax = NULL;
9115 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009116 xmlChar start[4];
9117 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009118
9119 if (ctx->depth > 40) {
9120 return(XML_ERR_ENTITY_LOOP);
9121 }
9122
Daniel Veillardcda96922001-08-21 10:56:31 +00009123 if (lst != NULL)
9124 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009125 if ((URL == NULL) && (ID == NULL))
9126 return(-1);
9127 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9128 return(-1);
9129
9130
9131 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9132 if (ctxt == NULL) return(-1);
9133 ctxt->userData = ctxt;
9134 oldsax = ctxt->sax;
9135 ctxt->sax = ctx->sax;
9136 newDoc = xmlNewDoc(BAD_CAST "1.0");
9137 if (newDoc == NULL) {
9138 xmlFreeParserCtxt(ctxt);
9139 return(-1);
9140 }
9141 if (ctx->myDoc != NULL) {
9142 newDoc->intSubset = ctx->myDoc->intSubset;
9143 newDoc->extSubset = ctx->myDoc->extSubset;
9144 }
9145 if (ctx->myDoc->URL != NULL) {
9146 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9147 }
9148 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9149 if (newDoc->children == NULL) {
9150 ctxt->sax = oldsax;
9151 xmlFreeParserCtxt(ctxt);
9152 newDoc->intSubset = NULL;
9153 newDoc->extSubset = NULL;
9154 xmlFreeDoc(newDoc);
9155 return(-1);
9156 }
9157 nodePush(ctxt, newDoc->children);
9158 if (ctx->myDoc == NULL) {
9159 ctxt->myDoc = newDoc;
9160 } else {
9161 ctxt->myDoc = ctx->myDoc;
9162 newDoc->children->doc = ctx->myDoc;
9163 }
9164
Daniel Veillard87a764e2001-06-20 17:41:10 +00009165 /*
9166 * Get the 4 first bytes and decode the charset
9167 * if enc != XML_CHAR_ENCODING_NONE
9168 * plug some encoding conversion routines.
9169 */
9170 GROW
9171 start[0] = RAW;
9172 start[1] = NXT(1);
9173 start[2] = NXT(2);
9174 start[3] = NXT(3);
9175 enc = xmlDetectCharEncoding(start, 4);
9176 if (enc != XML_CHAR_ENCODING_NONE) {
9177 xmlSwitchEncoding(ctxt, enc);
9178 }
9179
Owen Taylor3473f882001-02-23 17:55:21 +00009180 /*
9181 * Parse a possible text declaration first
9182 */
Owen Taylor3473f882001-02-23 17:55:21 +00009183 if ((RAW == '<') && (NXT(1) == '?') &&
9184 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9185 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9186 xmlParseTextDecl(ctxt);
9187 }
9188
9189 /*
9190 * Doing validity checking on chunk doesn't make sense
9191 */
9192 ctxt->instate = XML_PARSER_CONTENT;
9193 ctxt->validate = ctx->validate;
9194 ctxt->loadsubset = ctx->loadsubset;
9195 ctxt->depth = ctx->depth + 1;
9196 ctxt->replaceEntities = ctx->replaceEntities;
9197 if (ctxt->validate) {
9198 ctxt->vctxt.error = ctx->vctxt.error;
9199 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009200 } else {
9201 ctxt->vctxt.error = NULL;
9202 ctxt->vctxt.warning = NULL;
9203 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009204 ctxt->vctxt.nodeTab = NULL;
9205 ctxt->vctxt.nodeNr = 0;
9206 ctxt->vctxt.nodeMax = 0;
9207 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009208
9209 xmlParseContent(ctxt);
9210
9211 if ((RAW == '<') && (NXT(1) == '/')) {
9212 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9213 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9214 ctxt->sax->error(ctxt->userData,
9215 "chunk is not well balanced\n");
9216 ctxt->wellFormed = 0;
9217 ctxt->disableSAX = 1;
9218 } else if (RAW != 0) {
9219 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9220 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9221 ctxt->sax->error(ctxt->userData,
9222 "extra content at the end of well balanced chunk\n");
9223 ctxt->wellFormed = 0;
9224 ctxt->disableSAX = 1;
9225 }
9226 if (ctxt->node != newDoc->children) {
9227 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9229 ctxt->sax->error(ctxt->userData,
9230 "chunk is not well balanced\n");
9231 ctxt->wellFormed = 0;
9232 ctxt->disableSAX = 1;
9233 }
9234
9235 if (!ctxt->wellFormed) {
9236 if (ctxt->errNo == 0)
9237 ret = 1;
9238 else
9239 ret = ctxt->errNo;
9240 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009241 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009242 xmlNodePtr cur;
9243
9244 /*
9245 * Return the newly created nodeset after unlinking it from
9246 * they pseudo parent.
9247 */
9248 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009249 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009250 while (cur != NULL) {
9251 cur->parent = NULL;
9252 cur = cur->next;
9253 }
9254 newDoc->children->children = NULL;
9255 }
9256 ret = 0;
9257 }
9258 ctxt->sax = oldsax;
9259 xmlFreeParserCtxt(ctxt);
9260 newDoc->intSubset = NULL;
9261 newDoc->extSubset = NULL;
9262 xmlFreeDoc(newDoc);
9263
9264 return(ret);
9265}
9266
9267/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009268 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009269 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009270 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009271 * @sax: the SAX handler bloc (possibly NULL)
9272 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9273 * @depth: Used for loop detection, use 0
9274 * @URL: the URL for the entity to load
9275 * @ID: the System ID for the entity to load
9276 * @list: the return value for the set of parsed nodes
9277 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009278 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009279 *
9280 * Returns 0 if the entity is well formed, -1 in case of args problem and
9281 * the parser error code otherwise
9282 */
9283
Daniel Veillard257d9102001-05-08 10:41:44 +00009284static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009285xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9286 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009287 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009288 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009289 xmlParserCtxtPtr ctxt;
9290 xmlDocPtr newDoc;
9291 xmlSAXHandlerPtr oldsax = NULL;
9292 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009293 xmlChar start[4];
9294 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009295
9296 if (depth > 40) {
9297 return(XML_ERR_ENTITY_LOOP);
9298 }
9299
9300
9301
9302 if (list != NULL)
9303 *list = NULL;
9304 if ((URL == NULL) && (ID == NULL))
9305 return(-1);
9306 if (doc == NULL) /* @@ relax but check for dereferences */
9307 return(-1);
9308
9309
9310 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9311 if (ctxt == NULL) return(-1);
9312 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009313 if (oldctxt != NULL) {
9314 ctxt->_private = oldctxt->_private;
9315 ctxt->loadsubset = oldctxt->loadsubset;
9316 ctxt->validate = oldctxt->validate;
9317 ctxt->external = oldctxt->external;
9318 } else {
9319 /*
9320 * Doing validity checking on chunk without context
9321 * doesn't make sense
9322 */
9323 ctxt->_private = NULL;
9324 ctxt->validate = 0;
9325 ctxt->external = 2;
9326 ctxt->loadsubset = 0;
9327 }
Owen Taylor3473f882001-02-23 17:55:21 +00009328 if (sax != NULL) {
9329 oldsax = ctxt->sax;
9330 ctxt->sax = sax;
9331 if (user_data != NULL)
9332 ctxt->userData = user_data;
9333 }
9334 newDoc = xmlNewDoc(BAD_CAST "1.0");
9335 if (newDoc == NULL) {
9336 xmlFreeParserCtxt(ctxt);
9337 return(-1);
9338 }
9339 if (doc != NULL) {
9340 newDoc->intSubset = doc->intSubset;
9341 newDoc->extSubset = doc->extSubset;
9342 }
9343 if (doc->URL != NULL) {
9344 newDoc->URL = xmlStrdup(doc->URL);
9345 }
9346 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9347 if (newDoc->children == NULL) {
9348 if (sax != NULL)
9349 ctxt->sax = oldsax;
9350 xmlFreeParserCtxt(ctxt);
9351 newDoc->intSubset = NULL;
9352 newDoc->extSubset = NULL;
9353 xmlFreeDoc(newDoc);
9354 return(-1);
9355 }
9356 nodePush(ctxt, newDoc->children);
9357 if (doc == NULL) {
9358 ctxt->myDoc = newDoc;
9359 } else {
9360 ctxt->myDoc = doc;
9361 newDoc->children->doc = doc;
9362 }
9363
Daniel Veillard87a764e2001-06-20 17:41:10 +00009364 /*
9365 * Get the 4 first bytes and decode the charset
9366 * if enc != XML_CHAR_ENCODING_NONE
9367 * plug some encoding conversion routines.
9368 */
9369 GROW;
9370 start[0] = RAW;
9371 start[1] = NXT(1);
9372 start[2] = NXT(2);
9373 start[3] = NXT(3);
9374 enc = xmlDetectCharEncoding(start, 4);
9375 if (enc != XML_CHAR_ENCODING_NONE) {
9376 xmlSwitchEncoding(ctxt, enc);
9377 }
9378
Owen Taylor3473f882001-02-23 17:55:21 +00009379 /*
9380 * Parse a possible text declaration first
9381 */
Owen Taylor3473f882001-02-23 17:55:21 +00009382 if ((RAW == '<') && (NXT(1) == '?') &&
9383 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9384 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9385 xmlParseTextDecl(ctxt);
9386 }
9387
Owen Taylor3473f882001-02-23 17:55:21 +00009388 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009389 ctxt->depth = depth;
9390
9391 xmlParseContent(ctxt);
9392
9393 if ((RAW == '<') && (NXT(1) == '/')) {
9394 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9395 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9396 ctxt->sax->error(ctxt->userData,
9397 "chunk is not well balanced\n");
9398 ctxt->wellFormed = 0;
9399 ctxt->disableSAX = 1;
9400 } else if (RAW != 0) {
9401 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9403 ctxt->sax->error(ctxt->userData,
9404 "extra content at the end of well balanced chunk\n");
9405 ctxt->wellFormed = 0;
9406 ctxt->disableSAX = 1;
9407 }
9408 if (ctxt->node != newDoc->children) {
9409 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9411 ctxt->sax->error(ctxt->userData,
9412 "chunk is not well balanced\n");
9413 ctxt->wellFormed = 0;
9414 ctxt->disableSAX = 1;
9415 }
9416
9417 if (!ctxt->wellFormed) {
9418 if (ctxt->errNo == 0)
9419 ret = 1;
9420 else
9421 ret = ctxt->errNo;
9422 } else {
9423 if (list != NULL) {
9424 xmlNodePtr cur;
9425
9426 /*
9427 * Return the newly created nodeset after unlinking it from
9428 * they pseudo parent.
9429 */
9430 cur = newDoc->children->children;
9431 *list = cur;
9432 while (cur != NULL) {
9433 cur->parent = NULL;
9434 cur = cur->next;
9435 }
9436 newDoc->children->children = NULL;
9437 }
9438 ret = 0;
9439 }
9440 if (sax != NULL)
9441 ctxt->sax = oldsax;
9442 xmlFreeParserCtxt(ctxt);
9443 newDoc->intSubset = NULL;
9444 newDoc->extSubset = NULL;
9445 xmlFreeDoc(newDoc);
9446
9447 return(ret);
9448}
9449
9450/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009451 * xmlParseExternalEntity:
9452 * @doc: the document the chunk pertains to
9453 * @sax: the SAX handler bloc (possibly NULL)
9454 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9455 * @depth: Used for loop detection, use 0
9456 * @URL: the URL for the entity to load
9457 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009458 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009459 *
9460 * Parse an external general entity
9461 * An external general parsed entity is well-formed if it matches the
9462 * production labeled extParsedEnt.
9463 *
9464 * [78] extParsedEnt ::= TextDecl? content
9465 *
9466 * Returns 0 if the entity is well formed, -1 in case of args problem and
9467 * the parser error code otherwise
9468 */
9469
9470int
9471xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009472 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009473 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009474 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009475}
9476
9477/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009478 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009479 * @doc: the document the chunk pertains to
9480 * @sax: the SAX handler bloc (possibly NULL)
9481 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9482 * @depth: Used for loop detection, use 0
9483 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009484 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009485 *
9486 * Parse a well-balanced chunk of an XML document
9487 * called by the parser
9488 * The allowed sequence for the Well Balanced Chunk is the one defined by
9489 * the content production in the XML grammar:
9490 *
9491 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9492 *
9493 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9494 * the parser error code otherwise
9495 */
9496
9497int
9498xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009499 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009500 xmlParserCtxtPtr ctxt;
9501 xmlDocPtr newDoc;
9502 xmlSAXHandlerPtr oldsax = NULL;
9503 int size;
9504 int ret = 0;
9505
9506 if (depth > 40) {
9507 return(XML_ERR_ENTITY_LOOP);
9508 }
9509
9510
Daniel Veillardcda96922001-08-21 10:56:31 +00009511 if (lst != NULL)
9512 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009513 if (string == NULL)
9514 return(-1);
9515
9516 size = xmlStrlen(string);
9517
9518 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9519 if (ctxt == NULL) return(-1);
9520 ctxt->userData = ctxt;
9521 if (sax != NULL) {
9522 oldsax = ctxt->sax;
9523 ctxt->sax = sax;
9524 if (user_data != NULL)
9525 ctxt->userData = user_data;
9526 }
9527 newDoc = xmlNewDoc(BAD_CAST "1.0");
9528 if (newDoc == NULL) {
9529 xmlFreeParserCtxt(ctxt);
9530 return(-1);
9531 }
9532 if (doc != NULL) {
9533 newDoc->intSubset = doc->intSubset;
9534 newDoc->extSubset = doc->extSubset;
9535 }
9536 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9537 if (newDoc->children == NULL) {
9538 if (sax != NULL)
9539 ctxt->sax = oldsax;
9540 xmlFreeParserCtxt(ctxt);
9541 newDoc->intSubset = NULL;
9542 newDoc->extSubset = NULL;
9543 xmlFreeDoc(newDoc);
9544 return(-1);
9545 }
9546 nodePush(ctxt, newDoc->children);
9547 if (doc == NULL) {
9548 ctxt->myDoc = newDoc;
9549 } else {
9550 ctxt->myDoc = doc;
9551 newDoc->children->doc = doc;
9552 }
9553 ctxt->instate = XML_PARSER_CONTENT;
9554 ctxt->depth = depth;
9555
9556 /*
9557 * Doing validity checking on chunk doesn't make sense
9558 */
9559 ctxt->validate = 0;
9560 ctxt->loadsubset = 0;
9561
9562 xmlParseContent(ctxt);
9563
9564 if ((RAW == '<') && (NXT(1) == '/')) {
9565 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9566 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9567 ctxt->sax->error(ctxt->userData,
9568 "chunk is not well balanced\n");
9569 ctxt->wellFormed = 0;
9570 ctxt->disableSAX = 1;
9571 } else if (RAW != 0) {
9572 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9574 ctxt->sax->error(ctxt->userData,
9575 "extra content at the end of well balanced chunk\n");
9576 ctxt->wellFormed = 0;
9577 ctxt->disableSAX = 1;
9578 }
9579 if (ctxt->node != newDoc->children) {
9580 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9582 ctxt->sax->error(ctxt->userData,
9583 "chunk is not well balanced\n");
9584 ctxt->wellFormed = 0;
9585 ctxt->disableSAX = 1;
9586 }
9587
9588 if (!ctxt->wellFormed) {
9589 if (ctxt->errNo == 0)
9590 ret = 1;
9591 else
9592 ret = ctxt->errNo;
9593 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009594 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009595 xmlNodePtr cur;
9596
9597 /*
9598 * Return the newly created nodeset after unlinking it from
9599 * they pseudo parent.
9600 */
9601 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009602 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009603 while (cur != NULL) {
9604 cur->parent = NULL;
9605 cur = cur->next;
9606 }
9607 newDoc->children->children = NULL;
9608 }
9609 ret = 0;
9610 }
9611 if (sax != NULL)
9612 ctxt->sax = oldsax;
9613 xmlFreeParserCtxt(ctxt);
9614 newDoc->intSubset = NULL;
9615 newDoc->extSubset = NULL;
9616 xmlFreeDoc(newDoc);
9617
9618 return(ret);
9619}
9620
9621/**
9622 * xmlSAXParseEntity:
9623 * @sax: the SAX handler block
9624 * @filename: the filename
9625 *
9626 * parse an XML external entity out of context and build a tree.
9627 * It use the given SAX function block to handle the parsing callback.
9628 * If sax is NULL, fallback to the default DOM tree building routines.
9629 *
9630 * [78] extParsedEnt ::= TextDecl? content
9631 *
9632 * This correspond to a "Well Balanced" chunk
9633 *
9634 * Returns the resulting document tree
9635 */
9636
9637xmlDocPtr
9638xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9639 xmlDocPtr ret;
9640 xmlParserCtxtPtr ctxt;
9641 char *directory = NULL;
9642
9643 ctxt = xmlCreateFileParserCtxt(filename);
9644 if (ctxt == NULL) {
9645 return(NULL);
9646 }
9647 if (sax != NULL) {
9648 if (ctxt->sax != NULL)
9649 xmlFree(ctxt->sax);
9650 ctxt->sax = sax;
9651 ctxt->userData = NULL;
9652 }
9653
9654 if ((ctxt->directory == NULL) && (directory == NULL))
9655 directory = xmlParserGetDirectory(filename);
9656
9657 xmlParseExtParsedEnt(ctxt);
9658
9659 if (ctxt->wellFormed)
9660 ret = ctxt->myDoc;
9661 else {
9662 ret = NULL;
9663 xmlFreeDoc(ctxt->myDoc);
9664 ctxt->myDoc = NULL;
9665 }
9666 if (sax != NULL)
9667 ctxt->sax = NULL;
9668 xmlFreeParserCtxt(ctxt);
9669
9670 return(ret);
9671}
9672
9673/**
9674 * xmlParseEntity:
9675 * @filename: the filename
9676 *
9677 * parse an XML external entity out of context and build a tree.
9678 *
9679 * [78] extParsedEnt ::= TextDecl? content
9680 *
9681 * This correspond to a "Well Balanced" chunk
9682 *
9683 * Returns the resulting document tree
9684 */
9685
9686xmlDocPtr
9687xmlParseEntity(const char *filename) {
9688 return(xmlSAXParseEntity(NULL, filename));
9689}
9690
9691/**
9692 * xmlCreateEntityParserCtxt:
9693 * @URL: the entity URL
9694 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009695 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009696 *
9697 * Create a parser context for an external entity
9698 * Automatic support for ZLIB/Compress compressed document is provided
9699 * by default if found at compile-time.
9700 *
9701 * Returns the new parser context or NULL
9702 */
9703xmlParserCtxtPtr
9704xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9705 const xmlChar *base) {
9706 xmlParserCtxtPtr ctxt;
9707 xmlParserInputPtr inputStream;
9708 char *directory = NULL;
9709 xmlChar *uri;
9710
9711 ctxt = xmlNewParserCtxt();
9712 if (ctxt == NULL) {
9713 return(NULL);
9714 }
9715
9716 uri = xmlBuildURI(URL, base);
9717
9718 if (uri == NULL) {
9719 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9720 if (inputStream == NULL) {
9721 xmlFreeParserCtxt(ctxt);
9722 return(NULL);
9723 }
9724
9725 inputPush(ctxt, inputStream);
9726
9727 if ((ctxt->directory == NULL) && (directory == NULL))
9728 directory = xmlParserGetDirectory((char *)URL);
9729 if ((ctxt->directory == NULL) && (directory != NULL))
9730 ctxt->directory = directory;
9731 } else {
9732 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9733 if (inputStream == NULL) {
9734 xmlFree(uri);
9735 xmlFreeParserCtxt(ctxt);
9736 return(NULL);
9737 }
9738
9739 inputPush(ctxt, inputStream);
9740
9741 if ((ctxt->directory == NULL) && (directory == NULL))
9742 directory = xmlParserGetDirectory((char *)uri);
9743 if ((ctxt->directory == NULL) && (directory != NULL))
9744 ctxt->directory = directory;
9745 xmlFree(uri);
9746 }
9747
9748 return(ctxt);
9749}
9750
9751/************************************************************************
9752 * *
9753 * Front ends when parsing from a file *
9754 * *
9755 ************************************************************************/
9756
9757/**
9758 * xmlCreateFileParserCtxt:
9759 * @filename: the filename
9760 *
9761 * Create a parser context for a file content.
9762 * Automatic support for ZLIB/Compress compressed document is provided
9763 * by default if found at compile-time.
9764 *
9765 * Returns the new parser context or NULL
9766 */
9767xmlParserCtxtPtr
9768xmlCreateFileParserCtxt(const char *filename)
9769{
9770 xmlParserCtxtPtr ctxt;
9771 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009772 char *directory = NULL;
9773
Owen Taylor3473f882001-02-23 17:55:21 +00009774 ctxt = xmlNewParserCtxt();
9775 if (ctxt == NULL) {
9776 if (xmlDefaultSAXHandler.error != NULL) {
9777 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9778 }
9779 return(NULL);
9780 }
9781
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009782 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009783 if (inputStream == NULL) {
9784 xmlFreeParserCtxt(ctxt);
9785 return(NULL);
9786 }
9787
Owen Taylor3473f882001-02-23 17:55:21 +00009788 inputPush(ctxt, inputStream);
9789 if ((ctxt->directory == NULL) && (directory == NULL))
9790 directory = xmlParserGetDirectory(filename);
9791 if ((ctxt->directory == NULL) && (directory != NULL))
9792 ctxt->directory = directory;
9793
9794 return(ctxt);
9795}
9796
9797/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009798 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009799 * @sax: the SAX handler block
9800 * @filename: the filename
9801 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9802 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009803 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009804 *
9805 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9806 * compressed document is provided by default if found at compile-time.
9807 * It use the given SAX function block to handle the parsing callback.
9808 * If sax is NULL, fallback to the default DOM tree building routines.
9809 *
Daniel Veillarda293c322001-10-02 13:54:14 +00009810 * User data (void *) is stored within the parser context, so it is
9811 * available nearly everywhere in libxml.
9812 *
Owen Taylor3473f882001-02-23 17:55:21 +00009813 * Returns the resulting document tree
9814 */
9815
9816xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009817xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9818 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009819 xmlDocPtr ret;
9820 xmlParserCtxtPtr ctxt;
9821 char *directory = NULL;
9822
Daniel Veillard635ef722001-10-29 11:48:19 +00009823 xmlInitParser();
9824
Owen Taylor3473f882001-02-23 17:55:21 +00009825 ctxt = xmlCreateFileParserCtxt(filename);
9826 if (ctxt == NULL) {
9827 return(NULL);
9828 }
9829 if (sax != NULL) {
9830 if (ctxt->sax != NULL)
9831 xmlFree(ctxt->sax);
9832 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009833 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009834 if (data!=NULL) {
9835 ctxt->_private=data;
9836 }
Owen Taylor3473f882001-02-23 17:55:21 +00009837
9838 if ((ctxt->directory == NULL) && (directory == NULL))
9839 directory = xmlParserGetDirectory(filename);
9840 if ((ctxt->directory == NULL) && (directory != NULL))
9841 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9842
9843 xmlParseDocument(ctxt);
9844
9845 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9846 else {
9847 ret = NULL;
9848 xmlFreeDoc(ctxt->myDoc);
9849 ctxt->myDoc = NULL;
9850 }
9851 if (sax != NULL)
9852 ctxt->sax = NULL;
9853 xmlFreeParserCtxt(ctxt);
9854
9855 return(ret);
9856}
9857
9858/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009859 * xmlSAXParseFile:
9860 * @sax: the SAX handler block
9861 * @filename: the filename
9862 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9863 * documents
9864 *
9865 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9866 * compressed document is provided by default if found at compile-time.
9867 * It use the given SAX function block to handle the parsing callback.
9868 * If sax is NULL, fallback to the default DOM tree building routines.
9869 *
9870 * Returns the resulting document tree
9871 */
9872
9873xmlDocPtr
9874xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9875 int recovery) {
9876 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
9877}
9878
9879/**
Owen Taylor3473f882001-02-23 17:55:21 +00009880 * xmlRecoverDoc:
9881 * @cur: a pointer to an array of xmlChar
9882 *
9883 * parse an XML in-memory document and build a tree.
9884 * In the case the document is not Well Formed, a tree is built anyway
9885 *
9886 * Returns the resulting document tree
9887 */
9888
9889xmlDocPtr
9890xmlRecoverDoc(xmlChar *cur) {
9891 return(xmlSAXParseDoc(NULL, cur, 1));
9892}
9893
9894/**
9895 * xmlParseFile:
9896 * @filename: the filename
9897 *
9898 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9899 * compressed document is provided by default if found at compile-time.
9900 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +00009901 * Returns the resulting document tree if the file was wellformed,
9902 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +00009903 */
9904
9905xmlDocPtr
9906xmlParseFile(const char *filename) {
9907 return(xmlSAXParseFile(NULL, filename, 0));
9908}
9909
9910/**
9911 * xmlRecoverFile:
9912 * @filename: the filename
9913 *
9914 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9915 * compressed document is provided by default if found at compile-time.
9916 * In the case the document is not Well Formed, a tree is built anyway
9917 *
9918 * Returns the resulting document tree
9919 */
9920
9921xmlDocPtr
9922xmlRecoverFile(const char *filename) {
9923 return(xmlSAXParseFile(NULL, filename, 1));
9924}
9925
9926
9927/**
9928 * xmlSetupParserForBuffer:
9929 * @ctxt: an XML parser context
9930 * @buffer: a xmlChar * buffer
9931 * @filename: a file name
9932 *
9933 * Setup the parser context to parse a new buffer; Clears any prior
9934 * contents from the parser context. The buffer parameter must not be
9935 * NULL, but the filename parameter can be
9936 */
9937void
9938xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9939 const char* filename)
9940{
9941 xmlParserInputPtr input;
9942
9943 input = xmlNewInputStream(ctxt);
9944 if (input == NULL) {
9945 perror("malloc");
9946 xmlFree(ctxt);
9947 return;
9948 }
9949
9950 xmlClearParserCtxt(ctxt);
9951 if (filename != NULL)
9952 input->filename = xmlMemStrdup(filename);
9953 input->base = buffer;
9954 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009955 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009956 inputPush(ctxt, input);
9957}
9958
9959/**
9960 * xmlSAXUserParseFile:
9961 * @sax: a SAX handler
9962 * @user_data: The user data returned on SAX callbacks
9963 * @filename: a file name
9964 *
9965 * parse an XML file and call the given SAX handler routines.
9966 * Automatic support for ZLIB/Compress compressed document is provided
9967 *
9968 * Returns 0 in case of success or a error number otherwise
9969 */
9970int
9971xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9972 const char *filename) {
9973 int ret = 0;
9974 xmlParserCtxtPtr ctxt;
9975
9976 ctxt = xmlCreateFileParserCtxt(filename);
9977 if (ctxt == NULL) return -1;
9978 if (ctxt->sax != &xmlDefaultSAXHandler)
9979 xmlFree(ctxt->sax);
9980 ctxt->sax = sax;
9981 if (user_data != NULL)
9982 ctxt->userData = user_data;
9983
9984 xmlParseDocument(ctxt);
9985
9986 if (ctxt->wellFormed)
9987 ret = 0;
9988 else {
9989 if (ctxt->errNo != 0)
9990 ret = ctxt->errNo;
9991 else
9992 ret = -1;
9993 }
9994 if (sax != NULL)
9995 ctxt->sax = NULL;
9996 xmlFreeParserCtxt(ctxt);
9997
9998 return ret;
9999}
10000
10001/************************************************************************
10002 * *
10003 * Front ends when parsing from memory *
10004 * *
10005 ************************************************************************/
10006
10007/**
10008 * xmlCreateMemoryParserCtxt:
10009 * @buffer: a pointer to a char array
10010 * @size: the size of the array
10011 *
10012 * Create a parser context for an XML in-memory document.
10013 *
10014 * Returns the new parser context or NULL
10015 */
10016xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010017xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010018 xmlParserCtxtPtr ctxt;
10019 xmlParserInputPtr input;
10020 xmlParserInputBufferPtr buf;
10021
10022 if (buffer == NULL)
10023 return(NULL);
10024 if (size <= 0)
10025 return(NULL);
10026
10027 ctxt = xmlNewParserCtxt();
10028 if (ctxt == NULL)
10029 return(NULL);
10030
10031 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10032 if (buf == NULL) return(NULL);
10033
10034 input = xmlNewInputStream(ctxt);
10035 if (input == NULL) {
10036 xmlFreeParserCtxt(ctxt);
10037 return(NULL);
10038 }
10039
10040 input->filename = NULL;
10041 input->buf = buf;
10042 input->base = input->buf->buffer->content;
10043 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010044 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010045
10046 inputPush(ctxt, input);
10047 return(ctxt);
10048}
10049
10050/**
10051 * xmlSAXParseMemory:
10052 * @sax: the SAX handler block
10053 * @buffer: an pointer to a char array
10054 * @size: the size of the array
10055 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10056 * documents
10057 *
10058 * parse an XML in-memory block and use the given SAX function block
10059 * to handle the parsing callback. If sax is NULL, fallback to the default
10060 * DOM tree building routines.
10061 *
10062 * Returns the resulting document tree
10063 */
10064xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010065xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10066 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010067 xmlDocPtr ret;
10068 xmlParserCtxtPtr ctxt;
10069
10070 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10071 if (ctxt == NULL) return(NULL);
10072 if (sax != NULL) {
10073 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010074 }
10075
10076 xmlParseDocument(ctxt);
10077
10078 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10079 else {
10080 ret = NULL;
10081 xmlFreeDoc(ctxt->myDoc);
10082 ctxt->myDoc = NULL;
10083 }
10084 if (sax != NULL)
10085 ctxt->sax = NULL;
10086 xmlFreeParserCtxt(ctxt);
10087
10088 return(ret);
10089}
10090
10091/**
10092 * xmlParseMemory:
10093 * @buffer: an pointer to a char array
10094 * @size: the size of the array
10095 *
10096 * parse an XML in-memory block and build a tree.
10097 *
10098 * Returns the resulting document tree
10099 */
10100
Daniel Veillard50822cb2001-07-26 20:05:51 +000010101xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010102 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10103}
10104
10105/**
10106 * xmlRecoverMemory:
10107 * @buffer: an pointer to a char array
10108 * @size: the size of the array
10109 *
10110 * parse an XML in-memory block and build a tree.
10111 * In the case the document is not Well Formed, a tree is built anyway
10112 *
10113 * Returns the resulting document tree
10114 */
10115
Daniel Veillard50822cb2001-07-26 20:05:51 +000010116xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010117 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10118}
10119
10120/**
10121 * xmlSAXUserParseMemory:
10122 * @sax: a SAX handler
10123 * @user_data: The user data returned on SAX callbacks
10124 * @buffer: an in-memory XML document input
10125 * @size: the length of the XML document in bytes
10126 *
10127 * A better SAX parsing routine.
10128 * parse an XML in-memory buffer and call the given SAX handler routines.
10129 *
10130 * Returns 0 in case of success or a error number otherwise
10131 */
10132int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010133 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010134 int ret = 0;
10135 xmlParserCtxtPtr ctxt;
10136 xmlSAXHandlerPtr oldsax = NULL;
10137
10138 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10139 if (ctxt == NULL) return -1;
10140 if (sax != NULL) {
10141 oldsax = ctxt->sax;
10142 ctxt->sax = sax;
10143 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010144 if (user_data != NULL)
10145 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010146
10147 xmlParseDocument(ctxt);
10148
10149 if (ctxt->wellFormed)
10150 ret = 0;
10151 else {
10152 if (ctxt->errNo != 0)
10153 ret = ctxt->errNo;
10154 else
10155 ret = -1;
10156 }
10157 if (sax != NULL) {
10158 ctxt->sax = oldsax;
10159 }
10160 xmlFreeParserCtxt(ctxt);
10161
10162 return ret;
10163}
10164
10165/**
10166 * xmlCreateDocParserCtxt:
10167 * @cur: a pointer to an array of xmlChar
10168 *
10169 * Creates a parser context for an XML in-memory document.
10170 *
10171 * Returns the new parser context or NULL
10172 */
10173xmlParserCtxtPtr
10174xmlCreateDocParserCtxt(xmlChar *cur) {
10175 int len;
10176
10177 if (cur == NULL)
10178 return(NULL);
10179 len = xmlStrlen(cur);
10180 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10181}
10182
10183/**
10184 * xmlSAXParseDoc:
10185 * @sax: the SAX handler block
10186 * @cur: a pointer to an array of xmlChar
10187 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10188 * documents
10189 *
10190 * parse an XML in-memory document and build a tree.
10191 * It use the given SAX function block to handle the parsing callback.
10192 * If sax is NULL, fallback to the default DOM tree building routines.
10193 *
10194 * Returns the resulting document tree
10195 */
10196
10197xmlDocPtr
10198xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10199 xmlDocPtr ret;
10200 xmlParserCtxtPtr ctxt;
10201
10202 if (cur == NULL) return(NULL);
10203
10204
10205 ctxt = xmlCreateDocParserCtxt(cur);
10206 if (ctxt == NULL) return(NULL);
10207 if (sax != NULL) {
10208 ctxt->sax = sax;
10209 ctxt->userData = NULL;
10210 }
10211
10212 xmlParseDocument(ctxt);
10213 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10214 else {
10215 ret = NULL;
10216 xmlFreeDoc(ctxt->myDoc);
10217 ctxt->myDoc = NULL;
10218 }
10219 if (sax != NULL)
10220 ctxt->sax = NULL;
10221 xmlFreeParserCtxt(ctxt);
10222
10223 return(ret);
10224}
10225
10226/**
10227 * xmlParseDoc:
10228 * @cur: a pointer to an array of xmlChar
10229 *
10230 * parse an XML in-memory document and build a tree.
10231 *
10232 * Returns the resulting document tree
10233 */
10234
10235xmlDocPtr
10236xmlParseDoc(xmlChar *cur) {
10237 return(xmlSAXParseDoc(NULL, cur, 0));
10238}
10239
Daniel Veillard8107a222002-01-13 14:10:10 +000010240/************************************************************************
10241 * *
10242 * Specific function to keep track of entities references *
10243 * and used by the XSLT debugger *
10244 * *
10245 ************************************************************************/
10246
10247static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10248
10249/**
10250 * xmlAddEntityReference:
10251 * @ent : A valid entity
10252 * @firstNode : A valid first node for children of entity
10253 * @lastNode : A valid last node of children entity
10254 *
10255 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10256 */
10257static void
10258xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10259 xmlNodePtr lastNode)
10260{
10261 if (xmlEntityRefFunc != NULL) {
10262 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10263 }
10264}
10265
10266
10267/**
10268 * xmlSetEntityReferenceFunc:
10269 * @func : A valid function
10270 *
10271 * Set the function to call call back when a xml reference has been made
10272 */
10273void
10274xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10275{
10276 xmlEntityRefFunc = func;
10277}
Owen Taylor3473f882001-02-23 17:55:21 +000010278
10279/************************************************************************
10280 * *
10281 * Miscellaneous *
10282 * *
10283 ************************************************************************/
10284
10285#ifdef LIBXML_XPATH_ENABLED
10286#include <libxml/xpath.h>
10287#endif
10288
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010289extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010290static int xmlParserInitialized = 0;
10291
10292/**
10293 * xmlInitParser:
10294 *
10295 * Initialization function for the XML parser.
10296 * This is not reentrant. Call once before processing in case of
10297 * use in multithreaded programs.
10298 */
10299
10300void
10301xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010302 if (xmlParserInitialized != 0)
10303 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010304
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010305 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10306 (xmlGenericError == NULL))
10307 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010308 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010309 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010310 xmlInitCharEncodingHandlers();
10311 xmlInitializePredefinedEntities();
10312 xmlDefaultSAXHandlerInit();
10313 xmlRegisterDefaultInputCallbacks();
10314 xmlRegisterDefaultOutputCallbacks();
10315#ifdef LIBXML_HTML_ENABLED
10316 htmlInitAutoClose();
10317 htmlDefaultSAXHandlerInit();
10318#endif
10319#ifdef LIBXML_XPATH_ENABLED
10320 xmlXPathInit();
10321#endif
10322 xmlParserInitialized = 1;
10323}
10324
10325/**
10326 * xmlCleanupParser:
10327 *
10328 * Cleanup function for the XML parser. It tries to reclaim all
10329 * parsing related global memory allocated for the parser processing.
10330 * It doesn't deallocate any document related memory. Calling this
10331 * function should not prevent reusing the parser.
10332 */
10333
10334void
10335xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010336 xmlCleanupCharEncodingHandlers();
10337 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010338#ifdef LIBXML_CATALOG_ENABLED
10339 xmlCatalogCleanup();
10340#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010341 xmlCleanupThreads();
10342 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010343}