blob: 601430cbb3229bbeceea21ee6c2172beafb61547 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Owen Taylor3473f882001-02-23 17:55:21 +000035#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000044#include <libxml/threads.h>
45#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/tree.h>
47#include <libxml/parser.h>
48#include <libxml/parserInternals.h>
49#include <libxml/valid.h>
50#include <libxml/entities.h>
51#include <libxml/xmlerror.h>
52#include <libxml/encoding.h>
53#include <libxml/xmlIO.h>
54#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000055#ifdef LIBXML_CATALOG_ENABLED
56#include <libxml/catalog.h>
57#endif
Owen Taylor3473f882001-02-23 17:55:21 +000058
59#ifdef HAVE_CTYPE_H
60#include <ctype.h>
61#endif
62#ifdef HAVE_STDLIB_H
63#include <stdlib.h>
64#endif
65#ifdef HAVE_SYS_STAT_H
66#include <sys/stat.h>
67#endif
68#ifdef HAVE_FCNTL_H
69#include <fcntl.h>
70#endif
71#ifdef HAVE_UNISTD_H
72#include <unistd.h>
73#endif
74#ifdef HAVE_ZLIB_H
75#include <zlib.h>
76#endif
77
78
Daniel Veillard21a0f912001-02-25 19:54:14 +000079#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000080#define XML_PARSER_BUFFER_SIZE 100
81
82/*
Owen Taylor3473f882001-02-23 17:55:21 +000083 * List of XML prefixed PI allowed by W3C specs
84 */
85
Daniel Veillardb44025c2001-10-11 22:55:55 +000086static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000087 "xml-stylesheet",
88 NULL
89};
90
91/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
92void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
93xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
94 const xmlChar **str);
95
Daniel Veillard257d9102001-05-08 10:41:44 +000096static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000097xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
98 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000099 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000100 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000101
102/************************************************************************
103 * *
104 * Parser stacks related functions and macros *
105 * *
106 ************************************************************************/
107
108xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
109 const xmlChar ** str);
110
111/*
112 * Generic function for accessing stacks in the Parser Context
113 */
114
115#define PUSH_AND_POP(scope, type, name) \
116scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
117 if (ctxt->name##Nr >= ctxt->name##Max) { \
118 ctxt->name##Max *= 2; \
119 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
120 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
121 if (ctxt->name##Tab == NULL) { \
122 xmlGenericError(xmlGenericErrorContext, \
123 "realloc failed !\n"); \
124 return(0); \
125 } \
126 } \
127 ctxt->name##Tab[ctxt->name##Nr] = value; \
128 ctxt->name = value; \
129 return(ctxt->name##Nr++); \
130} \
131scope type name##Pop(xmlParserCtxtPtr ctxt) { \
132 type ret; \
133 if (ctxt->name##Nr <= 0) return(0); \
134 ctxt->name##Nr--; \
135 if (ctxt->name##Nr > 0) \
136 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
137 else \
138 ctxt->name = NULL; \
139 ret = ctxt->name##Tab[ctxt->name##Nr]; \
140 ctxt->name##Tab[ctxt->name##Nr] = 0; \
141 return(ret); \
142} \
143
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000144/**
145 * inputPop:
146 * @ctxt: an XML parser context
147 *
148 * Pops the top parser input from the input stack
149 *
150 * Returns the input just removed
151 */
152/**
153 * inputPush:
154 * @ctxt: an XML parser context
155 * @input: the parser input
156 *
157 * Pushes a new parser input on top of the input stack
158 */
159/**
160 * namePop:
161 * @ctxt: an XML parser context
162 *
163 * Pops the top element name from the name stack
164 *
165 * Returns the name just removed
166 */
167/**
168 * namePush:
169 * @ctxt: an XML parser context
170 * @name: the element name
171 *
172 * Pushes a new element name on top of the name stack
173 */
174/**
175 * nodePop:
176 * @ctxt: an XML parser context
177 *
178 * Pops the top element node from the node stack
179 *
180 * Returns the node just removed
181 */
182/**
183 * nodePush:
184 * @ctxt: an XML parser context
185 * @node: the element node
186 *
187 * Pushes a new element node on top of the node stack
188 */
Owen Taylor3473f882001-02-23 17:55:21 +0000189/*
190 * Those macros actually generate the functions
191 */
192PUSH_AND_POP(extern, xmlParserInputPtr, input)
193PUSH_AND_POP(extern, xmlNodePtr, node)
194PUSH_AND_POP(extern, xmlChar*, name)
195
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000196static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000197 if (ctxt->spaceNr >= ctxt->spaceMax) {
198 ctxt->spaceMax *= 2;
199 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
200 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
201 if (ctxt->spaceTab == NULL) {
202 xmlGenericError(xmlGenericErrorContext,
203 "realloc failed !\n");
204 return(0);
205 }
206 }
207 ctxt->spaceTab[ctxt->spaceNr] = val;
208 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
209 return(ctxt->spaceNr++);
210}
211
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000212static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000213 int ret;
214 if (ctxt->spaceNr <= 0) return(0);
215 ctxt->spaceNr--;
216 if (ctxt->spaceNr > 0)
217 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
218 else
219 ctxt->space = NULL;
220 ret = ctxt->spaceTab[ctxt->spaceNr];
221 ctxt->spaceTab[ctxt->spaceNr] = -1;
222 return(ret);
223}
224
225/*
226 * Macros for accessing the content. Those should be used only by the parser,
227 * and not exported.
228 *
229 * Dirty macros, i.e. one often need to make assumption on the context to
230 * use them
231 *
232 * CUR_PTR return the current pointer to the xmlChar to be parsed.
233 * To be used with extreme caution since operations consuming
234 * characters may move the input buffer to a different location !
235 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
236 * This should be used internally by the parser
237 * only to compare to ASCII values otherwise it would break when
238 * running with UTF-8 encoding.
239 * RAW same as CUR but in the input buffer, bypass any token
240 * extraction that may have been done
241 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
242 * to compare on ASCII based substring.
243 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
244 * strings within the parser.
245 *
246 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
247 *
248 * NEXT Skip to the next character, this does the proper decoding
249 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000250 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000251 * CUR_CHAR(l) returns the current unicode character (int), set l
252 * to the number of xmlChars used for the encoding [0-5].
253 * CUR_SCHAR same but operate on a string instead of the context
254 * COPY_BUF copy the current unicode char to the target buffer, increment
255 * the index
256 * GROW, SHRINK handling of input buffers
257 */
258
259#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
260#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
261#define NXT(val) ctxt->input->cur[(val)]
262#define CUR_PTR ctxt->input->cur
263
264#define SKIP(val) do { \
265 ctxt->nbChars += (val),ctxt->input->cur += (val); \
266 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000267 if ((*ctxt->input->cur == 0) && \
268 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
269 xmlPopInput(ctxt); \
270 } while (0)
271
Daniel Veillard48b2f892001-02-25 16:11:03 +0000272#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000273 xmlParserInputShrink(ctxt->input); \
274 if ((*ctxt->input->cur == 0) && \
275 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
276 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000277 }
Owen Taylor3473f882001-02-23 17:55:21 +0000278
Daniel Veillard48b2f892001-02-25 16:11:03 +0000279#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000280 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
281 if ((*ctxt->input->cur == 0) && \
282 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
283 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000284 }
Owen Taylor3473f882001-02-23 17:55:21 +0000285
286#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
287
288#define NEXT xmlNextChar(ctxt)
289
Daniel Veillard21a0f912001-02-25 19:54:14 +0000290#define NEXT1 { \
291 ctxt->input->cur++; \
292 ctxt->nbChars++; \
293 if (*ctxt->input->cur == 0) \
294 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
295 }
296
Owen Taylor3473f882001-02-23 17:55:21 +0000297#define NEXTL(l) do { \
298 if (*(ctxt->input->cur) == '\n') { \
299 ctxt->input->line++; ctxt->input->col = 1; \
300 } else ctxt->input->col++; \
301 ctxt->token = 0; ctxt->input->cur += l; \
302 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000303 } while (0)
304
305#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
306#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
307
308#define COPY_BUF(l,b,i,v) \
309 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000311
312/**
313 * xmlSkipBlankChars:
314 * @ctxt: the XML parser context
315 *
316 * skip all blanks character found at that point in the input streams.
317 * It pops up finished entities in the process if allowable at that point.
318 *
319 * Returns the number of space chars skipped
320 */
321
322int
323xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000324 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000325
Daniel Veillard02141ea2001-04-30 11:46:40 +0000326 if (ctxt->token != 0) {
327 if (!IS_BLANK(ctxt->token))
328 return(0);
329 ctxt->token = 0;
330 res++;
331 }
Owen Taylor3473f882001-02-23 17:55:21 +0000332 /*
333 * It's Okay to use CUR/NEXT here since all the blanks are on
334 * the ASCII range.
335 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000336 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
337 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000338 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000339 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000340 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000341 cur = ctxt->input->cur;
342 while (IS_BLANK(*cur)) {
343 if (*cur == '\n') {
344 ctxt->input->line++; ctxt->input->col = 1;
345 }
346 cur++;
347 res++;
348 if (*cur == 0) {
349 ctxt->input->cur = cur;
350 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
351 cur = ctxt->input->cur;
352 }
353 }
354 ctxt->input->cur = cur;
355 } else {
356 int cur;
357 do {
358 cur = CUR;
359 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
360 NEXT;
361 cur = CUR;
362 res++;
363 }
364 while ((cur == 0) && (ctxt->inputNr > 1) &&
365 (ctxt->instate != XML_PARSER_COMMENT)) {
366 xmlPopInput(ctxt);
367 cur = CUR;
368 }
369 /*
370 * Need to handle support of entities branching here
371 */
372 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
373 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
374 }
Owen Taylor3473f882001-02-23 17:55:21 +0000375 return(res);
376}
377
378/************************************************************************
379 * *
380 * Commodity functions to handle entities *
381 * *
382 ************************************************************************/
383
384/**
385 * xmlPopInput:
386 * @ctxt: an XML parser context
387 *
388 * xmlPopInput: the current input pointed by ctxt->input came to an end
389 * pop it and return the next char.
390 *
391 * Returns the current xmlChar in the parser context
392 */
393xmlChar
394xmlPopInput(xmlParserCtxtPtr ctxt) {
395 if (ctxt->inputNr == 1) return(0); /* End of main Input */
396 if (xmlParserDebugEntities)
397 xmlGenericError(xmlGenericErrorContext,
398 "Popping input %d\n", ctxt->inputNr);
399 xmlFreeInputStream(inputPop(ctxt));
400 if ((*ctxt->input->cur == 0) &&
401 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
402 return(xmlPopInput(ctxt));
403 return(CUR);
404}
405
406/**
407 * xmlPushInput:
408 * @ctxt: an XML parser context
409 * @input: an XML parser input fragment (entity, XML fragment ...).
410 *
411 * xmlPushInput: switch to a new input stream which is stacked on top
412 * of the previous one(s).
413 */
414void
415xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
416 if (input == NULL) return;
417
418 if (xmlParserDebugEntities) {
419 if ((ctxt->input != NULL) && (ctxt->input->filename))
420 xmlGenericError(xmlGenericErrorContext,
421 "%s(%d): ", ctxt->input->filename,
422 ctxt->input->line);
423 xmlGenericError(xmlGenericErrorContext,
424 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
425 }
426 inputPush(ctxt, input);
427 GROW;
428}
429
430/**
431 * xmlParseCharRef:
432 * @ctxt: an XML parser context
433 *
434 * parse Reference declarations
435 *
436 * [66] CharRef ::= '&#' [0-9]+ ';' |
437 * '&#x' [0-9a-fA-F]+ ';'
438 *
439 * [ WFC: Legal Character ]
440 * Characters referred to using character references must match the
441 * production for Char.
442 *
443 * Returns the value parsed (as an int), 0 in case of error
444 */
445int
446xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000447 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000448 int count = 0;
449
450 if (ctxt->token != 0) {
451 val = ctxt->token;
452 ctxt->token = 0;
453 return(val);
454 }
455 /*
456 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
457 */
458 if ((RAW == '&') && (NXT(1) == '#') &&
459 (NXT(2) == 'x')) {
460 SKIP(3);
461 GROW;
462 while (RAW != ';') { /* loop blocked by count */
463 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
464 val = val * 16 + (CUR - '0');
465 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
466 val = val * 16 + (CUR - 'a') + 10;
467 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
468 val = val * 16 + (CUR - 'A') + 10;
469 else {
470 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
472 ctxt->sax->error(ctxt->userData,
473 "xmlParseCharRef: invalid hexadecimal value\n");
474 ctxt->wellFormed = 0;
475 ctxt->disableSAX = 1;
476 val = 0;
477 break;
478 }
479 NEXT;
480 count++;
481 }
482 if (RAW == ';') {
483 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
484 ctxt->nbChars ++;
485 ctxt->input->cur++;
486 }
487 } else if ((RAW == '&') && (NXT(1) == '#')) {
488 SKIP(2);
489 GROW;
490 while (RAW != ';') { /* loop blocked by count */
491 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
492 val = val * 10 + (CUR - '0');
493 else {
494 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
495 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
496 ctxt->sax->error(ctxt->userData,
497 "xmlParseCharRef: invalid decimal value\n");
498 ctxt->wellFormed = 0;
499 ctxt->disableSAX = 1;
500 val = 0;
501 break;
502 }
503 NEXT;
504 count++;
505 }
506 if (RAW == ';') {
507 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
508 ctxt->nbChars ++;
509 ctxt->input->cur++;
510 }
511 } else {
512 ctxt->errNo = XML_ERR_INVALID_CHARREF;
513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
514 ctxt->sax->error(ctxt->userData,
515 "xmlParseCharRef: invalid value\n");
516 ctxt->wellFormed = 0;
517 ctxt->disableSAX = 1;
518 }
519
520 /*
521 * [ WFC: Legal Character ]
522 * Characters referred to using character references must match the
523 * production for Char.
524 */
525 if (IS_CHAR(val)) {
526 return(val);
527 } else {
528 ctxt->errNo = XML_ERR_INVALID_CHAR;
529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000530 ctxt->sax->error(ctxt->userData,
531 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000532 val);
533 ctxt->wellFormed = 0;
534 ctxt->disableSAX = 1;
535 }
536 return(0);
537}
538
539/**
540 * xmlParseStringCharRef:
541 * @ctxt: an XML parser context
542 * @str: a pointer to an index in the string
543 *
544 * parse Reference declarations, variant parsing from a string rather
545 * than an an input flow.
546 *
547 * [66] CharRef ::= '&#' [0-9]+ ';' |
548 * '&#x' [0-9a-fA-F]+ ';'
549 *
550 * [ WFC: Legal Character ]
551 * Characters referred to using character references must match the
552 * production for Char.
553 *
554 * Returns the value parsed (as an int), 0 in case of error, str will be
555 * updated to the current value of the index
556 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000557static int
Owen Taylor3473f882001-02-23 17:55:21 +0000558xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
559 const xmlChar *ptr;
560 xmlChar cur;
561 int val = 0;
562
563 if ((str == NULL) || (*str == NULL)) return(0);
564 ptr = *str;
565 cur = *ptr;
566 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
567 ptr += 3;
568 cur = *ptr;
569 while (cur != ';') { /* Non input consuming loop */
570 if ((cur >= '0') && (cur <= '9'))
571 val = val * 16 + (cur - '0');
572 else if ((cur >= 'a') && (cur <= 'f'))
573 val = val * 16 + (cur - 'a') + 10;
574 else if ((cur >= 'A') && (cur <= 'F'))
575 val = val * 16 + (cur - 'A') + 10;
576 else {
577 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
578 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
579 ctxt->sax->error(ctxt->userData,
580 "xmlParseStringCharRef: invalid hexadecimal value\n");
581 ctxt->wellFormed = 0;
582 ctxt->disableSAX = 1;
583 val = 0;
584 break;
585 }
586 ptr++;
587 cur = *ptr;
588 }
589 if (cur == ';')
590 ptr++;
591 } else if ((cur == '&') && (ptr[1] == '#')){
592 ptr += 2;
593 cur = *ptr;
594 while (cur != ';') { /* Non input consuming loops */
595 if ((cur >= '0') && (cur <= '9'))
596 val = val * 10 + (cur - '0');
597 else {
598 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
600 ctxt->sax->error(ctxt->userData,
601 "xmlParseStringCharRef: invalid decimal value\n");
602 ctxt->wellFormed = 0;
603 ctxt->disableSAX = 1;
604 val = 0;
605 break;
606 }
607 ptr++;
608 cur = *ptr;
609 }
610 if (cur == ';')
611 ptr++;
612 } else {
613 ctxt->errNo = XML_ERR_INVALID_CHARREF;
614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
615 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000616 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000617 ctxt->wellFormed = 0;
618 ctxt->disableSAX = 1;
619 return(0);
620 }
621 *str = ptr;
622
623 /*
624 * [ WFC: Legal Character ]
625 * Characters referred to using character references must match the
626 * production for Char.
627 */
628 if (IS_CHAR(val)) {
629 return(val);
630 } else {
631 ctxt->errNo = XML_ERR_INVALID_CHAR;
632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
633 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000634 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000635 ctxt->wellFormed = 0;
636 ctxt->disableSAX = 1;
637 }
638 return(0);
639}
640
641/**
642 * xmlParserHandlePEReference:
643 * @ctxt: the parser context
644 *
645 * [69] PEReference ::= '%' Name ';'
646 *
647 * [ WFC: No Recursion ]
648 * A parsed entity must not contain a recursive
649 * reference to itself, either directly or indirectly.
650 *
651 * [ WFC: Entity Declared ]
652 * In a document without any DTD, a document with only an internal DTD
653 * subset which contains no parameter entity references, or a document
654 * with "standalone='yes'", ... ... The declaration of a parameter
655 * entity must precede any reference to it...
656 *
657 * [ VC: Entity Declared ]
658 * In a document with an external subset or external parameter entities
659 * with "standalone='no'", ... ... The declaration of a parameter entity
660 * must precede any reference to it...
661 *
662 * [ WFC: In DTD ]
663 * Parameter-entity references may only appear in the DTD.
664 * NOTE: misleading but this is handled.
665 *
666 * A PEReference may have been detected in the current input stream
667 * the handling is done accordingly to
668 * http://www.w3.org/TR/REC-xml#entproc
669 * i.e.
670 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000671 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000672 */
673void
674xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
675 xmlChar *name;
676 xmlEntityPtr entity = NULL;
677 xmlParserInputPtr input;
678
679 if (ctxt->token != 0) {
680 return;
681 }
682 if (RAW != '%') return;
683 switch(ctxt->instate) {
684 case XML_PARSER_CDATA_SECTION:
685 return;
686 case XML_PARSER_COMMENT:
687 return;
688 case XML_PARSER_START_TAG:
689 return;
690 case XML_PARSER_END_TAG:
691 return;
692 case XML_PARSER_EOF:
693 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
695 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
696 ctxt->wellFormed = 0;
697 ctxt->disableSAX = 1;
698 return;
699 case XML_PARSER_PROLOG:
700 case XML_PARSER_START:
701 case XML_PARSER_MISC:
702 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
704 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
705 ctxt->wellFormed = 0;
706 ctxt->disableSAX = 1;
707 return;
708 case XML_PARSER_ENTITY_DECL:
709 case XML_PARSER_CONTENT:
710 case XML_PARSER_ATTRIBUTE_VALUE:
711 case XML_PARSER_PI:
712 case XML_PARSER_SYSTEM_LITERAL:
713 /* we just ignore it there */
714 return;
715 case XML_PARSER_EPILOG:
716 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
717 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
718 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
719 ctxt->wellFormed = 0;
720 ctxt->disableSAX = 1;
721 return;
722 case XML_PARSER_ENTITY_VALUE:
723 /*
724 * NOTE: in the case of entity values, we don't do the
725 * substitution here since we need the literal
726 * entity value to be able to save the internal
727 * subset of the document.
728 * This will be handled by xmlStringDecodeEntities
729 */
730 return;
731 case XML_PARSER_DTD:
732 /*
733 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
734 * In the internal DTD subset, parameter-entity references
735 * can occur only where markup declarations can occur, not
736 * within markup declarations.
737 * In that case this is handled in xmlParseMarkupDecl
738 */
739 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
740 return;
741 break;
742 case XML_PARSER_IGNORE:
743 return;
744 }
745
746 NEXT;
747 name = xmlParseName(ctxt);
748 if (xmlParserDebugEntities)
749 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000750 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000751 if (name == NULL) {
752 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
753 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000754 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000755 ctxt->wellFormed = 0;
756 ctxt->disableSAX = 1;
757 } else {
758 if (RAW == ';') {
759 NEXT;
760 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
761 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
762 if (entity == NULL) {
763
764 /*
765 * [ WFC: Entity Declared ]
766 * In a document without any DTD, a document with only an
767 * internal DTD subset which contains no parameter entity
768 * references, or a document with "standalone='yes'", ...
769 * ... The declaration of a parameter entity must precede
770 * any reference to it...
771 */
772 if ((ctxt->standalone == 1) ||
773 ((ctxt->hasExternalSubset == 0) &&
774 (ctxt->hasPErefs == 0))) {
775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
776 ctxt->sax->error(ctxt->userData,
777 "PEReference: %%%s; not found\n", name);
778 ctxt->wellFormed = 0;
779 ctxt->disableSAX = 1;
780 } else {
781 /*
782 * [ VC: Entity Declared ]
783 * In a document with an external subset or external
784 * parameter entities with "standalone='no'", ...
785 * ... The declaration of a parameter entity must precede
786 * any reference to it...
787 */
788 if ((!ctxt->disableSAX) &&
789 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
790 ctxt->vctxt.error(ctxt->vctxt.userData,
791 "PEReference: %%%s; not found\n", name);
792 } else if ((!ctxt->disableSAX) &&
793 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
794 ctxt->sax->warning(ctxt->userData,
795 "PEReference: %%%s; not found\n", name);
796 ctxt->valid = 0;
797 }
798 } else {
799 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
800 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000801 xmlChar start[4];
802 xmlCharEncoding enc;
803
Owen Taylor3473f882001-02-23 17:55:21 +0000804 /*
805 * handle the extra spaces added before and after
806 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000807 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000808 */
809 input = xmlNewEntityInputStream(ctxt, entity);
810 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000811
812 /*
813 * Get the 4 first bytes and decode the charset
814 * if enc != XML_CHAR_ENCODING_NONE
815 * plug some encoding conversion routines.
816 */
817 GROW
818 start[0] = RAW;
819 start[1] = NXT(1);
820 start[2] = NXT(2);
821 start[3] = NXT(3);
822 enc = xmlDetectCharEncoding(start, 4);
823 if (enc != XML_CHAR_ENCODING_NONE) {
824 xmlSwitchEncoding(ctxt, enc);
825 }
826
Owen Taylor3473f882001-02-23 17:55:21 +0000827 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
828 (RAW == '<') && (NXT(1) == '?') &&
829 (NXT(2) == 'x') && (NXT(3) == 'm') &&
830 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
831 xmlParseTextDecl(ctxt);
832 }
833 if (ctxt->token == 0)
834 ctxt->token = ' ';
835 } else {
836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
837 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000838 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000839 name);
840 ctxt->wellFormed = 0;
841 ctxt->disableSAX = 1;
842 }
843 }
844 } else {
845 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
847 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000848 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000849 ctxt->wellFormed = 0;
850 ctxt->disableSAX = 1;
851 }
852 xmlFree(name);
853 }
854}
855
856/*
857 * Macro used to grow the current buffer.
858 */
859#define growBuffer(buffer) { \
860 buffer##_size *= 2; \
861 buffer = (xmlChar *) \
862 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
863 if (buffer == NULL) { \
864 perror("realloc failed"); \
865 return(NULL); \
866 } \
867}
868
869/**
870 * xmlStringDecodeEntities:
871 * @ctxt: the parser context
872 * @str: the input string
873 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
874 * @end: an end marker xmlChar, 0 if none
875 * @end2: an end marker xmlChar, 0 if none
876 * @end3: an end marker xmlChar, 0 if none
877 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000878 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000879 *
880 * [67] Reference ::= EntityRef | CharRef
881 *
882 * [69] PEReference ::= '%' Name ';'
883 *
884 * Returns A newly allocated string with the substitution done. The caller
885 * must deallocate it !
886 */
887xmlChar *
888xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
889 xmlChar end, xmlChar end2, xmlChar end3) {
890 xmlChar *buffer = NULL;
891 int buffer_size = 0;
892
893 xmlChar *current = NULL;
894 xmlEntityPtr ent;
895 int c,l;
896 int nbchars = 0;
897
898 if (str == NULL)
899 return(NULL);
900
901 if (ctxt->depth > 40) {
902 ctxt->errNo = XML_ERR_ENTITY_LOOP;
903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
904 ctxt->sax->error(ctxt->userData,
905 "Detected entity reference loop\n");
906 ctxt->wellFormed = 0;
907 ctxt->disableSAX = 1;
908 return(NULL);
909 }
910
911 /*
912 * allocate a translation buffer.
913 */
914 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
915 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
916 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000917 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000918 return(NULL);
919 }
920
921 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000922 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000923 * we are operating on already parsed values.
924 */
925 c = CUR_SCHAR(str, l);
926 while ((c != 0) && (c != end) && /* non input consuming loop */
927 (c != end2) && (c != end3)) {
928
929 if (c == 0) break;
930 if ((c == '&') && (str[1] == '#')) {
931 int val = xmlParseStringCharRef(ctxt, &str);
932 if (val != 0) {
933 COPY_BUF(0,buffer,nbchars,val);
934 }
935 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
936 if (xmlParserDebugEntities)
937 xmlGenericError(xmlGenericErrorContext,
938 "String decoding Entity Reference: %.30s\n",
939 str);
940 ent = xmlParseStringEntityRef(ctxt, &str);
941 if ((ent != NULL) &&
942 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
943 if (ent->content != NULL) {
944 COPY_BUF(0,buffer,nbchars,ent->content[0]);
945 } else {
946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
947 ctxt->sax->error(ctxt->userData,
948 "internal error entity has no content\n");
949 }
950 } else if ((ent != NULL) && (ent->content != NULL)) {
951 xmlChar *rep;
952
953 ctxt->depth++;
954 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
955 0, 0, 0);
956 ctxt->depth--;
957 if (rep != NULL) {
958 current = rep;
959 while (*current != 0) { /* non input consuming loop */
960 buffer[nbchars++] = *current++;
961 if (nbchars >
962 buffer_size - XML_PARSER_BUFFER_SIZE) {
963 growBuffer(buffer);
964 }
965 }
966 xmlFree(rep);
967 }
968 } else if (ent != NULL) {
969 int i = xmlStrlen(ent->name);
970 const xmlChar *cur = ent->name;
971
972 buffer[nbchars++] = '&';
973 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
974 growBuffer(buffer);
975 }
976 for (;i > 0;i--)
977 buffer[nbchars++] = *cur++;
978 buffer[nbchars++] = ';';
979 }
980 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
981 if (xmlParserDebugEntities)
982 xmlGenericError(xmlGenericErrorContext,
983 "String decoding PE Reference: %.30s\n", str);
984 ent = xmlParseStringPEReference(ctxt, &str);
985 if (ent != NULL) {
986 xmlChar *rep;
987
988 ctxt->depth++;
989 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
990 0, 0, 0);
991 ctxt->depth--;
992 if (rep != NULL) {
993 current = rep;
994 while (*current != 0) { /* non input consuming loop */
995 buffer[nbchars++] = *current++;
996 if (nbchars >
997 buffer_size - XML_PARSER_BUFFER_SIZE) {
998 growBuffer(buffer);
999 }
1000 }
1001 xmlFree(rep);
1002 }
1003 }
1004 } else {
1005 COPY_BUF(l,buffer,nbchars,c);
1006 str += l;
1007 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1008 growBuffer(buffer);
1009 }
1010 }
1011 c = CUR_SCHAR(str, l);
1012 }
1013 buffer[nbchars++] = 0;
1014 return(buffer);
1015}
1016
1017
1018/************************************************************************
1019 * *
1020 * Commodity functions to handle xmlChars *
1021 * *
1022 ************************************************************************/
1023
1024/**
1025 * xmlStrndup:
1026 * @cur: the input xmlChar *
1027 * @len: the len of @cur
1028 *
1029 * a strndup for array of xmlChar's
1030 *
1031 * Returns a new xmlChar * or NULL
1032 */
1033xmlChar *
1034xmlStrndup(const xmlChar *cur, int len) {
1035 xmlChar *ret;
1036
1037 if ((cur == NULL) || (len < 0)) return(NULL);
1038 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1039 if (ret == NULL) {
1040 xmlGenericError(xmlGenericErrorContext,
1041 "malloc of %ld byte failed\n",
1042 (len + 1) * (long)sizeof(xmlChar));
1043 return(NULL);
1044 }
1045 memcpy(ret, cur, len * sizeof(xmlChar));
1046 ret[len] = 0;
1047 return(ret);
1048}
1049
1050/**
1051 * xmlStrdup:
1052 * @cur: the input xmlChar *
1053 *
1054 * a strdup for array of xmlChar's. Since they are supposed to be
1055 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1056 * a termination mark of '0'.
1057 *
1058 * Returns a new xmlChar * or NULL
1059 */
1060xmlChar *
1061xmlStrdup(const xmlChar *cur) {
1062 const xmlChar *p = cur;
1063
1064 if (cur == NULL) return(NULL);
1065 while (*p != 0) p++; /* non input consuming */
1066 return(xmlStrndup(cur, p - cur));
1067}
1068
1069/**
1070 * xmlCharStrndup:
1071 * @cur: the input char *
1072 * @len: the len of @cur
1073 *
1074 * a strndup for char's to xmlChar's
1075 *
1076 * Returns a new xmlChar * or NULL
1077 */
1078
1079xmlChar *
1080xmlCharStrndup(const char *cur, int len) {
1081 int i;
1082 xmlChar *ret;
1083
1084 if ((cur == NULL) || (len < 0)) return(NULL);
1085 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1086 if (ret == NULL) {
1087 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1088 (len + 1) * (long)sizeof(xmlChar));
1089 return(NULL);
1090 }
1091 for (i = 0;i < len;i++)
1092 ret[i] = (xmlChar) cur[i];
1093 ret[len] = 0;
1094 return(ret);
1095}
1096
1097/**
1098 * xmlCharStrdup:
1099 * @cur: the input char *
1100 * @len: the len of @cur
1101 *
1102 * a strdup for char's to xmlChar's
1103 *
1104 * Returns a new xmlChar * or NULL
1105 */
1106
1107xmlChar *
1108xmlCharStrdup(const char *cur) {
1109 const char *p = cur;
1110
1111 if (cur == NULL) return(NULL);
1112 while (*p != '\0') p++; /* non input consuming */
1113 return(xmlCharStrndup(cur, p - cur));
1114}
1115
1116/**
1117 * xmlStrcmp:
1118 * @str1: the first xmlChar *
1119 * @str2: the second xmlChar *
1120 *
1121 * a strcmp for xmlChar's
1122 *
1123 * Returns the integer result of the comparison
1124 */
1125
1126int
1127xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1128 register int tmp;
1129
1130 if (str1 == str2) return(0);
1131 if (str1 == NULL) return(-1);
1132 if (str2 == NULL) return(1);
1133 do {
1134 tmp = *str1++ - *str2;
1135 if (tmp != 0) return(tmp);
1136 } while (*str2++ != 0);
1137 return 0;
1138}
1139
1140/**
1141 * xmlStrEqual:
1142 * @str1: the first xmlChar *
1143 * @str2: the second xmlChar *
1144 *
1145 * Check if both string are equal of have same content
1146 * Should be a bit more readable and faster than xmlStrEqual()
1147 *
1148 * Returns 1 if they are equal, 0 if they are different
1149 */
1150
1151int
1152xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1153 if (str1 == str2) return(1);
1154 if (str1 == NULL) return(0);
1155 if (str2 == NULL) return(0);
1156 do {
1157 if (*str1++ != *str2) return(0);
1158 } while (*str2++);
1159 return(1);
1160}
1161
1162/**
1163 * xmlStrncmp:
1164 * @str1: the first xmlChar *
1165 * @str2: the second xmlChar *
1166 * @len: the max comparison length
1167 *
1168 * a strncmp for xmlChar's
1169 *
1170 * Returns the integer result of the comparison
1171 */
1172
1173int
1174xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1175 register int tmp;
1176
1177 if (len <= 0) return(0);
1178 if (str1 == str2) return(0);
1179 if (str1 == NULL) return(-1);
1180 if (str2 == NULL) return(1);
1181 do {
1182 tmp = *str1++ - *str2;
1183 if (tmp != 0 || --len == 0) return(tmp);
1184 } while (*str2++ != 0);
1185 return 0;
1186}
1187
Daniel Veillardb44025c2001-10-11 22:55:55 +00001188static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001189 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1190 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1191 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1192 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1193 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1194 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1195 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1196 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1197 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1198 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1199 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1200 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1201 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1202 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1203 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1204 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1205 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1206 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1207 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1208 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1209 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1210 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1211 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1212 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1213 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1214 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1215 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1216 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1217 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1218 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1219 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1220 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1221};
1222
1223/**
1224 * xmlStrcasecmp:
1225 * @str1: the first xmlChar *
1226 * @str2: the second xmlChar *
1227 *
1228 * a strcasecmp for xmlChar's
1229 *
1230 * Returns the integer result of the comparison
1231 */
1232
1233int
1234xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1235 register int tmp;
1236
1237 if (str1 == str2) return(0);
1238 if (str1 == NULL) return(-1);
1239 if (str2 == NULL) return(1);
1240 do {
1241 tmp = casemap[*str1++] - casemap[*str2];
1242 if (tmp != 0) return(tmp);
1243 } while (*str2++ != 0);
1244 return 0;
1245}
1246
1247/**
1248 * xmlStrncasecmp:
1249 * @str1: the first xmlChar *
1250 * @str2: the second xmlChar *
1251 * @len: the max comparison length
1252 *
1253 * a strncasecmp for xmlChar's
1254 *
1255 * Returns the integer result of the comparison
1256 */
1257
1258int
1259xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1260 register int tmp;
1261
1262 if (len <= 0) return(0);
1263 if (str1 == str2) return(0);
1264 if (str1 == NULL) return(-1);
1265 if (str2 == NULL) return(1);
1266 do {
1267 tmp = casemap[*str1++] - casemap[*str2];
1268 if (tmp != 0 || --len == 0) return(tmp);
1269 } while (*str2++ != 0);
1270 return 0;
1271}
1272
1273/**
1274 * xmlStrchr:
1275 * @str: the xmlChar * array
1276 * @val: the xmlChar to search
1277 *
1278 * a strchr for xmlChar's
1279 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001280 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001281 */
1282
1283const xmlChar *
1284xmlStrchr(const xmlChar *str, xmlChar val) {
1285 if (str == NULL) return(NULL);
1286 while (*str != 0) { /* non input consuming */
1287 if (*str == val) return((xmlChar *) str);
1288 str++;
1289 }
1290 return(NULL);
1291}
1292
1293/**
1294 * xmlStrstr:
1295 * @str: the xmlChar * array (haystack)
1296 * @val: the xmlChar to search (needle)
1297 *
1298 * a strstr for xmlChar's
1299 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001300 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001301 */
1302
1303const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001304xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001305 int n;
1306
1307 if (str == NULL) return(NULL);
1308 if (val == NULL) return(NULL);
1309 n = xmlStrlen(val);
1310
1311 if (n == 0) return(str);
1312 while (*str != 0) { /* non input consuming */
1313 if (*str == *val) {
1314 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1315 }
1316 str++;
1317 }
1318 return(NULL);
1319}
1320
1321/**
1322 * xmlStrcasestr:
1323 * @str: the xmlChar * array (haystack)
1324 * @val: the xmlChar to search (needle)
1325 *
1326 * a case-ignoring strstr for xmlChar's
1327 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001328 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001329 */
1330
1331const xmlChar *
1332xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1333 int n;
1334
1335 if (str == NULL) return(NULL);
1336 if (val == NULL) return(NULL);
1337 n = xmlStrlen(val);
1338
1339 if (n == 0) return(str);
1340 while (*str != 0) { /* non input consuming */
1341 if (casemap[*str] == casemap[*val])
1342 if (!xmlStrncasecmp(str, val, n)) return(str);
1343 str++;
1344 }
1345 return(NULL);
1346}
1347
1348/**
1349 * xmlStrsub:
1350 * @str: the xmlChar * array (haystack)
1351 * @start: the index of the first char (zero based)
1352 * @len: the length of the substring
1353 *
1354 * Extract a substring of a given string
1355 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001356 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001357 */
1358
1359xmlChar *
1360xmlStrsub(const xmlChar *str, int start, int len) {
1361 int i;
1362
1363 if (str == NULL) return(NULL);
1364 if (start < 0) return(NULL);
1365 if (len < 0) return(NULL);
1366
1367 for (i = 0;i < start;i++) {
1368 if (*str == 0) return(NULL);
1369 str++;
1370 }
1371 if (*str == 0) return(NULL);
1372 return(xmlStrndup(str, len));
1373}
1374
1375/**
1376 * xmlStrlen:
1377 * @str: the xmlChar * array
1378 *
1379 * length of a xmlChar's string
1380 *
1381 * Returns the number of xmlChar contained in the ARRAY.
1382 */
1383
1384int
1385xmlStrlen(const xmlChar *str) {
1386 int len = 0;
1387
1388 if (str == NULL) return(0);
1389 while (*str != 0) { /* non input consuming */
1390 str++;
1391 len++;
1392 }
1393 return(len);
1394}
1395
1396/**
1397 * xmlStrncat:
1398 * @cur: the original xmlChar * array
1399 * @add: the xmlChar * array added
1400 * @len: the length of @add
1401 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001402 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001403 * first bytes of @add.
1404 *
1405 * Returns a new xmlChar *, the original @cur is reallocated if needed
1406 * and should not be freed
1407 */
1408
1409xmlChar *
1410xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1411 int size;
1412 xmlChar *ret;
1413
1414 if ((add == NULL) || (len == 0))
1415 return(cur);
1416 if (cur == NULL)
1417 return(xmlStrndup(add, len));
1418
1419 size = xmlStrlen(cur);
1420 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1421 if (ret == NULL) {
1422 xmlGenericError(xmlGenericErrorContext,
1423 "xmlStrncat: realloc of %ld byte failed\n",
1424 (size + len + 1) * (long)sizeof(xmlChar));
1425 return(cur);
1426 }
1427 memcpy(&ret[size], add, len * sizeof(xmlChar));
1428 ret[size + len] = 0;
1429 return(ret);
1430}
1431
1432/**
1433 * xmlStrcat:
1434 * @cur: the original xmlChar * array
1435 * @add: the xmlChar * array added
1436 *
1437 * a strcat for array of xmlChar's. Since they are supposed to be
1438 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1439 * a termination mark of '0'.
1440 *
1441 * Returns a new xmlChar * containing the concatenated string.
1442 */
1443xmlChar *
1444xmlStrcat(xmlChar *cur, const xmlChar *add) {
1445 const xmlChar *p = add;
1446
1447 if (add == NULL) return(cur);
1448 if (cur == NULL)
1449 return(xmlStrdup(add));
1450
1451 while (*p != 0) p++; /* non input consuming */
1452 return(xmlStrncat(cur, add, p - add));
1453}
1454
1455/************************************************************************
1456 * *
1457 * Commodity functions, cleanup needed ? *
1458 * *
1459 ************************************************************************/
1460
1461/**
1462 * areBlanks:
1463 * @ctxt: an XML parser context
1464 * @str: a xmlChar *
1465 * @len: the size of @str
1466 *
1467 * Is this a sequence of blank chars that one can ignore ?
1468 *
1469 * Returns 1 if ignorable 0 otherwise.
1470 */
1471
1472static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1473 int i, ret;
1474 xmlNodePtr lastChild;
1475
Daniel Veillard05c13a22001-09-09 08:38:09 +00001476 /*
1477 * Don't spend time trying to differentiate them, the same callback is
1478 * used !
1479 */
1480 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001481 return(0);
1482
Owen Taylor3473f882001-02-23 17:55:21 +00001483 /*
1484 * Check for xml:space value.
1485 */
1486 if (*(ctxt->space) == 1)
1487 return(0);
1488
1489 /*
1490 * Check that the string is made of blanks
1491 */
1492 for (i = 0;i < len;i++)
1493 if (!(IS_BLANK(str[i]))) return(0);
1494
1495 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001496 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001497 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001498 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001499 if (ctxt->myDoc != NULL) {
1500 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1501 if (ret == 0) return(1);
1502 if (ret == 1) return(0);
1503 }
1504
1505 /*
1506 * Otherwise, heuristic :-\
1507 */
Owen Taylor3473f882001-02-23 17:55:21 +00001508 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001509 if ((ctxt->node->children == NULL) &&
1510 (RAW == '<') && (NXT(1) == '/')) return(0);
1511
1512 lastChild = xmlGetLastChild(ctxt->node);
1513 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001514 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1515 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001516 } else if (xmlNodeIsText(lastChild))
1517 return(0);
1518 else if ((ctxt->node->children != NULL) &&
1519 (xmlNodeIsText(ctxt->node->children)))
1520 return(0);
1521 return(1);
1522}
1523
1524/*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001525 * Forward definition for recursive behavior.
Owen Taylor3473f882001-02-23 17:55:21 +00001526 */
1527void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1528void xmlParseReference(xmlParserCtxtPtr ctxt);
1529
1530/************************************************************************
1531 * *
1532 * Extra stuff for namespace support *
1533 * Relates to http://www.w3.org/TR/WD-xml-names *
1534 * *
1535 ************************************************************************/
1536
1537/**
1538 * xmlSplitQName:
1539 * @ctxt: an XML parser context
1540 * @name: an XML parser context
1541 * @prefix: a xmlChar **
1542 *
1543 * parse an UTF8 encoded XML qualified name string
1544 *
1545 * [NS 5] QName ::= (Prefix ':')? LocalPart
1546 *
1547 * [NS 6] Prefix ::= NCName
1548 *
1549 * [NS 7] LocalPart ::= NCName
1550 *
1551 * Returns the local part, and prefix is updated
1552 * to get the Prefix if any.
1553 */
1554
1555xmlChar *
1556xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1557 xmlChar buf[XML_MAX_NAMELEN + 5];
1558 xmlChar *buffer = NULL;
1559 int len = 0;
1560 int max = XML_MAX_NAMELEN;
1561 xmlChar *ret = NULL;
1562 const xmlChar *cur = name;
1563 int c;
1564
1565 *prefix = NULL;
1566
1567#ifndef XML_XML_NAMESPACE
1568 /* xml: prefix is not really a namespace */
1569 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1570 (cur[2] == 'l') && (cur[3] == ':'))
1571 return(xmlStrdup(name));
1572#endif
1573
1574 /* nasty but valid */
1575 if (cur[0] == ':')
1576 return(xmlStrdup(name));
1577
1578 c = *cur++;
1579 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1580 buf[len++] = c;
1581 c = *cur++;
1582 }
1583 if (len >= max) {
1584 /*
1585 * Okay someone managed to make a huge name, so he's ready to pay
1586 * for the processing speed.
1587 */
1588 max = len * 2;
1589
1590 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1591 if (buffer == NULL) {
1592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1593 ctxt->sax->error(ctxt->userData,
1594 "xmlSplitQName: out of memory\n");
1595 return(NULL);
1596 }
1597 memcpy(buffer, buf, len);
1598 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1599 if (len + 10 > max) {
1600 max *= 2;
1601 buffer = (xmlChar *) xmlRealloc(buffer,
1602 max * sizeof(xmlChar));
1603 if (buffer == NULL) {
1604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1605 ctxt->sax->error(ctxt->userData,
1606 "xmlSplitQName: out of memory\n");
1607 return(NULL);
1608 }
1609 }
1610 buffer[len++] = c;
1611 c = *cur++;
1612 }
1613 buffer[len] = 0;
1614 }
1615
1616 if (buffer == NULL)
1617 ret = xmlStrndup(buf, len);
1618 else {
1619 ret = buffer;
1620 buffer = NULL;
1621 max = XML_MAX_NAMELEN;
1622 }
1623
1624
1625 if (c == ':') {
1626 c = *cur++;
1627 if (c == 0) return(ret);
1628 *prefix = ret;
1629 len = 0;
1630
1631 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1632 buf[len++] = c;
1633 c = *cur++;
1634 }
1635 if (len >= max) {
1636 /*
1637 * Okay someone managed to make a huge name, so he's ready to pay
1638 * for the processing speed.
1639 */
1640 max = len * 2;
1641
1642 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1643 if (buffer == NULL) {
1644 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1645 ctxt->sax->error(ctxt->userData,
1646 "xmlSplitQName: out of memory\n");
1647 return(NULL);
1648 }
1649 memcpy(buffer, buf, len);
1650 while (c != 0) { /* tested bigname2.xml */
1651 if (len + 10 > max) {
1652 max *= 2;
1653 buffer = (xmlChar *) xmlRealloc(buffer,
1654 max * sizeof(xmlChar));
1655 if (buffer == NULL) {
1656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1657 ctxt->sax->error(ctxt->userData,
1658 "xmlSplitQName: out of memory\n");
1659 return(NULL);
1660 }
1661 }
1662 buffer[len++] = c;
1663 c = *cur++;
1664 }
1665 buffer[len] = 0;
1666 }
1667
1668 if (buffer == NULL)
1669 ret = xmlStrndup(buf, len);
1670 else {
1671 ret = buffer;
1672 }
1673 }
1674
1675 return(ret);
1676}
1677
1678/************************************************************************
1679 * *
1680 * The parser itself *
1681 * Relates to http://www.w3.org/TR/REC-xml *
1682 * *
1683 ************************************************************************/
1684
Daniel Veillard76d66f42001-05-16 21:05:17 +00001685static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001686/**
1687 * xmlParseName:
1688 * @ctxt: an XML parser context
1689 *
1690 * parse an XML name.
1691 *
1692 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1693 * CombiningChar | Extender
1694 *
1695 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1696 *
1697 * [6] Names ::= Name (S Name)*
1698 *
1699 * Returns the Name parsed or NULL
1700 */
1701
1702xmlChar *
1703xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001704 const xmlChar *in;
1705 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001706 int count = 0;
1707
1708 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001709
1710 /*
1711 * Accelerator for simple ASCII names
1712 */
1713 in = ctxt->input->cur;
1714 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1715 ((*in >= 0x41) && (*in <= 0x5A)) ||
1716 (*in == '_') || (*in == ':')) {
1717 in++;
1718 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1719 ((*in >= 0x41) && (*in <= 0x5A)) ||
1720 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001721 (*in == '_') || (*in == '-') ||
1722 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001723 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001724 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001725 count = in - ctxt->input->cur;
1726 ret = xmlStrndup(ctxt->input->cur, count);
1727 ctxt->input->cur = in;
1728 return(ret);
1729 }
1730 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001731 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001732}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001733
Daniel Veillard76d66f42001-05-16 21:05:17 +00001734static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001735xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1736 xmlChar buf[XML_MAX_NAMELEN + 5];
1737 int len = 0, l;
1738 int c;
1739 int count = 0;
1740
1741 /*
1742 * Handler for more complex cases
1743 */
1744 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001745 c = CUR_CHAR(l);
1746 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1747 (!IS_LETTER(c) && (c != '_') &&
1748 (c != ':'))) {
1749 return(NULL);
1750 }
1751
1752 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1753 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1754 (c == '.') || (c == '-') ||
1755 (c == '_') || (c == ':') ||
1756 (IS_COMBINING(c)) ||
1757 (IS_EXTENDER(c)))) {
1758 if (count++ > 100) {
1759 count = 0;
1760 GROW;
1761 }
1762 COPY_BUF(l,buf,len,c);
1763 NEXTL(l);
1764 c = CUR_CHAR(l);
1765 if (len >= XML_MAX_NAMELEN) {
1766 /*
1767 * Okay someone managed to make a huge name, so he's ready to pay
1768 * for the processing speed.
1769 */
1770 xmlChar *buffer;
1771 int max = len * 2;
1772
1773 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1774 if (buffer == NULL) {
1775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1776 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001777 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001778 return(NULL);
1779 }
1780 memcpy(buffer, buf, len);
1781 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1782 (c == '.') || (c == '-') ||
1783 (c == '_') || (c == ':') ||
1784 (IS_COMBINING(c)) ||
1785 (IS_EXTENDER(c))) {
1786 if (count++ > 100) {
1787 count = 0;
1788 GROW;
1789 }
1790 if (len + 10 > max) {
1791 max *= 2;
1792 buffer = (xmlChar *) xmlRealloc(buffer,
1793 max * sizeof(xmlChar));
1794 if (buffer == NULL) {
1795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1796 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001797 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001798 return(NULL);
1799 }
1800 }
1801 COPY_BUF(l,buffer,len,c);
1802 NEXTL(l);
1803 c = CUR_CHAR(l);
1804 }
1805 buffer[len] = 0;
1806 return(buffer);
1807 }
1808 }
1809 return(xmlStrndup(buf, len));
1810}
1811
1812/**
1813 * xmlParseStringName:
1814 * @ctxt: an XML parser context
1815 * @str: a pointer to the string pointer (IN/OUT)
1816 *
1817 * parse an XML name.
1818 *
1819 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1820 * CombiningChar | Extender
1821 *
1822 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1823 *
1824 * [6] Names ::= Name (S Name)*
1825 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001826 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001827 * is updated to the current location in the string.
1828 */
1829
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001830static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001831xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1832 xmlChar buf[XML_MAX_NAMELEN + 5];
1833 const xmlChar *cur = *str;
1834 int len = 0, l;
1835 int c;
1836
1837 c = CUR_SCHAR(cur, l);
1838 if (!IS_LETTER(c) && (c != '_') &&
1839 (c != ':')) {
1840 return(NULL);
1841 }
1842
1843 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1844 (c == '.') || (c == '-') ||
1845 (c == '_') || (c == ':') ||
1846 (IS_COMBINING(c)) ||
1847 (IS_EXTENDER(c))) {
1848 COPY_BUF(l,buf,len,c);
1849 cur += l;
1850 c = CUR_SCHAR(cur, l);
1851 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1852 /*
1853 * Okay someone managed to make a huge name, so he's ready to pay
1854 * for the processing speed.
1855 */
1856 xmlChar *buffer;
1857 int max = len * 2;
1858
1859 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1860 if (buffer == NULL) {
1861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1862 ctxt->sax->error(ctxt->userData,
1863 "xmlParseStringName: out of memory\n");
1864 return(NULL);
1865 }
1866 memcpy(buffer, buf, len);
1867 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1868 (c == '.') || (c == '-') ||
1869 (c == '_') || (c == ':') ||
1870 (IS_COMBINING(c)) ||
1871 (IS_EXTENDER(c))) {
1872 if (len + 10 > max) {
1873 max *= 2;
1874 buffer = (xmlChar *) xmlRealloc(buffer,
1875 max * sizeof(xmlChar));
1876 if (buffer == NULL) {
1877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1878 ctxt->sax->error(ctxt->userData,
1879 "xmlParseStringName: out of memory\n");
1880 return(NULL);
1881 }
1882 }
1883 COPY_BUF(l,buffer,len,c);
1884 cur += l;
1885 c = CUR_SCHAR(cur, l);
1886 }
1887 buffer[len] = 0;
1888 *str = cur;
1889 return(buffer);
1890 }
1891 }
1892 *str = cur;
1893 return(xmlStrndup(buf, len));
1894}
1895
1896/**
1897 * xmlParseNmtoken:
1898 * @ctxt: an XML parser context
1899 *
1900 * parse an XML Nmtoken.
1901 *
1902 * [7] Nmtoken ::= (NameChar)+
1903 *
1904 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1905 *
1906 * Returns the Nmtoken parsed or NULL
1907 */
1908
1909xmlChar *
1910xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1911 xmlChar buf[XML_MAX_NAMELEN + 5];
1912 int len = 0, l;
1913 int c;
1914 int count = 0;
1915
1916 GROW;
1917 c = CUR_CHAR(l);
1918
1919 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1920 (c == '.') || (c == '-') ||
1921 (c == '_') || (c == ':') ||
1922 (IS_COMBINING(c)) ||
1923 (IS_EXTENDER(c))) {
1924 if (count++ > 100) {
1925 count = 0;
1926 GROW;
1927 }
1928 COPY_BUF(l,buf,len,c);
1929 NEXTL(l);
1930 c = CUR_CHAR(l);
1931 if (len >= XML_MAX_NAMELEN) {
1932 /*
1933 * Okay someone managed to make a huge token, so he's ready to pay
1934 * for the processing speed.
1935 */
1936 xmlChar *buffer;
1937 int max = len * 2;
1938
1939 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1940 if (buffer == NULL) {
1941 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1942 ctxt->sax->error(ctxt->userData,
1943 "xmlParseNmtoken: out of memory\n");
1944 return(NULL);
1945 }
1946 memcpy(buffer, buf, len);
1947 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1948 (c == '.') || (c == '-') ||
1949 (c == '_') || (c == ':') ||
1950 (IS_COMBINING(c)) ||
1951 (IS_EXTENDER(c))) {
1952 if (count++ > 100) {
1953 count = 0;
1954 GROW;
1955 }
1956 if (len + 10 > max) {
1957 max *= 2;
1958 buffer = (xmlChar *) xmlRealloc(buffer,
1959 max * sizeof(xmlChar));
1960 if (buffer == NULL) {
1961 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1962 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001963 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001964 return(NULL);
1965 }
1966 }
1967 COPY_BUF(l,buffer,len,c);
1968 NEXTL(l);
1969 c = CUR_CHAR(l);
1970 }
1971 buffer[len] = 0;
1972 return(buffer);
1973 }
1974 }
1975 if (len == 0)
1976 return(NULL);
1977 return(xmlStrndup(buf, len));
1978}
1979
1980/**
1981 * xmlParseEntityValue:
1982 * @ctxt: an XML parser context
1983 * @orig: if non-NULL store a copy of the original entity value
1984 *
1985 * parse a value for ENTITY declarations
1986 *
1987 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1988 * "'" ([^%&'] | PEReference | Reference)* "'"
1989 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001990 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00001991 */
1992
1993xmlChar *
1994xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1995 xmlChar *buf = NULL;
1996 int len = 0;
1997 int size = XML_PARSER_BUFFER_SIZE;
1998 int c, l;
1999 xmlChar stop;
2000 xmlChar *ret = NULL;
2001 const xmlChar *cur = NULL;
2002 xmlParserInputPtr input;
2003
2004 if (RAW == '"') stop = '"';
2005 else if (RAW == '\'') stop = '\'';
2006 else {
2007 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2009 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2010 ctxt->wellFormed = 0;
2011 ctxt->disableSAX = 1;
2012 return(NULL);
2013 }
2014 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2015 if (buf == NULL) {
2016 xmlGenericError(xmlGenericErrorContext,
2017 "malloc of %d byte failed\n", size);
2018 return(NULL);
2019 }
2020
2021 /*
2022 * The content of the entity definition is copied in a buffer.
2023 */
2024
2025 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2026 input = ctxt->input;
2027 GROW;
2028 NEXT;
2029 c = CUR_CHAR(l);
2030 /*
2031 * NOTE: 4.4.5 Included in Literal
2032 * When a parameter entity reference appears in a literal entity
2033 * value, ... a single or double quote character in the replacement
2034 * text is always treated as a normal data character and will not
2035 * terminate the literal.
2036 * In practice it means we stop the loop only when back at parsing
2037 * the initial entity and the quote is found
2038 */
2039 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2040 (ctxt->input != input))) {
2041 if (len + 5 >= size) {
2042 size *= 2;
2043 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2044 if (buf == NULL) {
2045 xmlGenericError(xmlGenericErrorContext,
2046 "realloc of %d byte failed\n", size);
2047 return(NULL);
2048 }
2049 }
2050 COPY_BUF(l,buf,len,c);
2051 NEXTL(l);
2052 /*
2053 * Pop-up of finished entities.
2054 */
2055 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2056 xmlPopInput(ctxt);
2057
2058 GROW;
2059 c = CUR_CHAR(l);
2060 if (c == 0) {
2061 GROW;
2062 c = CUR_CHAR(l);
2063 }
2064 }
2065 buf[len] = 0;
2066
2067 /*
2068 * Raise problem w.r.t. '&' and '%' being used in non-entities
2069 * reference constructs. Note Charref will be handled in
2070 * xmlStringDecodeEntities()
2071 */
2072 cur = buf;
2073 while (*cur != 0) { /* non input consuming */
2074 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2075 xmlChar *name;
2076 xmlChar tmp = *cur;
2077
2078 cur++;
2079 name = xmlParseStringName(ctxt, &cur);
2080 if ((name == NULL) || (*cur != ';')) {
2081 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2082 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2083 ctxt->sax->error(ctxt->userData,
2084 "EntityValue: '%c' forbidden except for entities references\n",
2085 tmp);
2086 ctxt->wellFormed = 0;
2087 ctxt->disableSAX = 1;
2088 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002089 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2090 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002091 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2092 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2093 ctxt->sax->error(ctxt->userData,
2094 "EntityValue: PEReferences forbidden in internal subset\n",
2095 tmp);
2096 ctxt->wellFormed = 0;
2097 ctxt->disableSAX = 1;
2098 }
2099 if (name != NULL)
2100 xmlFree(name);
2101 }
2102 cur++;
2103 }
2104
2105 /*
2106 * Then PEReference entities are substituted.
2107 */
2108 if (c != stop) {
2109 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2110 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2111 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2112 ctxt->wellFormed = 0;
2113 ctxt->disableSAX = 1;
2114 xmlFree(buf);
2115 } else {
2116 NEXT;
2117 /*
2118 * NOTE: 4.4.7 Bypassed
2119 * When a general entity reference appears in the EntityValue in
2120 * an entity declaration, it is bypassed and left as is.
2121 * so XML_SUBSTITUTE_REF is not set here.
2122 */
2123 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2124 0, 0, 0);
2125 if (orig != NULL)
2126 *orig = buf;
2127 else
2128 xmlFree(buf);
2129 }
2130
2131 return(ret);
2132}
2133
2134/**
2135 * xmlParseAttValue:
2136 * @ctxt: an XML parser context
2137 *
2138 * parse a value for an attribute
2139 * Note: the parser won't do substitution of entities here, this
2140 * will be handled later in xmlStringGetNodeList
2141 *
2142 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2143 * "'" ([^<&'] | Reference)* "'"
2144 *
2145 * 3.3.3 Attribute-Value Normalization:
2146 * Before the value of an attribute is passed to the application or
2147 * checked for validity, the XML processor must normalize it as follows:
2148 * - a character reference is processed by appending the referenced
2149 * character to the attribute value
2150 * - an entity reference is processed by recursively processing the
2151 * replacement text of the entity
2152 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2153 * appending #x20 to the normalized value, except that only a single
2154 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2155 * parsed entity or the literal entity value of an internal parsed entity
2156 * - other characters are processed by appending them to the normalized value
2157 * If the declared value is not CDATA, then the XML processor must further
2158 * process the normalized attribute value by discarding any leading and
2159 * trailing space (#x20) characters, and by replacing sequences of space
2160 * (#x20) characters by a single space (#x20) character.
2161 * All attributes for which no declaration has been read should be treated
2162 * by a non-validating parser as if declared CDATA.
2163 *
2164 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2165 */
2166
2167xmlChar *
2168xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2169 xmlChar limit = 0;
2170 xmlChar *buf = NULL;
2171 int len = 0;
2172 int buf_size = 0;
2173 int c, l;
2174 xmlChar *current = NULL;
2175 xmlEntityPtr ent;
2176
2177
2178 SHRINK;
2179 if (NXT(0) == '"') {
2180 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2181 limit = '"';
2182 NEXT;
2183 } else if (NXT(0) == '\'') {
2184 limit = '\'';
2185 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2186 NEXT;
2187 } else {
2188 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2190 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2191 ctxt->wellFormed = 0;
2192 ctxt->disableSAX = 1;
2193 return(NULL);
2194 }
2195
2196 /*
2197 * allocate a translation buffer.
2198 */
2199 buf_size = XML_PARSER_BUFFER_SIZE;
2200 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2201 if (buf == NULL) {
2202 perror("xmlParseAttValue: malloc failed");
2203 return(NULL);
2204 }
2205
2206 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002207 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002208 */
2209 c = CUR_CHAR(l);
2210 while (((NXT(0) != limit) && /* checked */
2211 (c != '<')) || (ctxt->token != 0)) {
2212 if (c == 0) break;
2213 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002214 if (ctxt->replaceEntities) {
2215 if (len > buf_size - 10) {
2216 growBuffer(buf);
2217 }
2218 buf[len++] = '&';
2219 } else {
2220 /*
2221 * The reparsing will be done in xmlStringGetNodeList()
2222 * called by the attribute() function in SAX.c
2223 */
2224 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002225
Daniel Veillard319a7422001-09-11 09:27:09 +00002226 if (len > buf_size - 10) {
2227 growBuffer(buf);
2228 }
2229 current = &buffer[0];
2230 while (*current != 0) { /* non input consuming */
2231 buf[len++] = *current++;
2232 }
2233 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002234 }
Owen Taylor3473f882001-02-23 17:55:21 +00002235 } else if (c == '&') {
2236 if (NXT(1) == '#') {
2237 int val = xmlParseCharRef(ctxt);
2238 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002239 if (ctxt->replaceEntities) {
2240 if (len > buf_size - 10) {
2241 growBuffer(buf);
2242 }
2243 buf[len++] = '&';
2244 } else {
2245 /*
2246 * The reparsing will be done in xmlStringGetNodeList()
2247 * called by the attribute() function in SAX.c
2248 */
2249 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002250
Daniel Veillard319a7422001-09-11 09:27:09 +00002251 if (len > buf_size - 10) {
2252 growBuffer(buf);
2253 }
2254 current = &buffer[0];
2255 while (*current != 0) { /* non input consuming */
2256 buf[len++] = *current++;
2257 }
Owen Taylor3473f882001-02-23 17:55:21 +00002258 }
2259 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002260 if (len > buf_size - 10) {
2261 growBuffer(buf);
2262 }
Owen Taylor3473f882001-02-23 17:55:21 +00002263 len += xmlCopyChar(0, &buf[len], val);
2264 }
2265 } else {
2266 ent = xmlParseEntityRef(ctxt);
2267 if ((ent != NULL) &&
2268 (ctxt->replaceEntities != 0)) {
2269 xmlChar *rep;
2270
2271 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2272 rep = xmlStringDecodeEntities(ctxt, ent->content,
2273 XML_SUBSTITUTE_REF, 0, 0, 0);
2274 if (rep != NULL) {
2275 current = rep;
2276 while (*current != 0) { /* non input consuming */
2277 buf[len++] = *current++;
2278 if (len > buf_size - 10) {
2279 growBuffer(buf);
2280 }
2281 }
2282 xmlFree(rep);
2283 }
2284 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002285 if (len > buf_size - 10) {
2286 growBuffer(buf);
2287 }
Owen Taylor3473f882001-02-23 17:55:21 +00002288 if (ent->content != NULL)
2289 buf[len++] = ent->content[0];
2290 }
2291 } else if (ent != NULL) {
2292 int i = xmlStrlen(ent->name);
2293 const xmlChar *cur = ent->name;
2294
2295 /*
2296 * This may look absurd but is needed to detect
2297 * entities problems
2298 */
2299 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2300 (ent->content != NULL)) {
2301 xmlChar *rep;
2302 rep = xmlStringDecodeEntities(ctxt, ent->content,
2303 XML_SUBSTITUTE_REF, 0, 0, 0);
2304 if (rep != NULL)
2305 xmlFree(rep);
2306 }
2307
2308 /*
2309 * Just output the reference
2310 */
2311 buf[len++] = '&';
2312 if (len > buf_size - i - 10) {
2313 growBuffer(buf);
2314 }
2315 for (;i > 0;i--)
2316 buf[len++] = *cur++;
2317 buf[len++] = ';';
2318 }
2319 }
2320 } else {
2321 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2322 COPY_BUF(l,buf,len,0x20);
2323 if (len > buf_size - 10) {
2324 growBuffer(buf);
2325 }
2326 } else {
2327 COPY_BUF(l,buf,len,c);
2328 if (len > buf_size - 10) {
2329 growBuffer(buf);
2330 }
2331 }
2332 NEXTL(l);
2333 }
2334 GROW;
2335 c = CUR_CHAR(l);
2336 }
2337 buf[len++] = 0;
2338 if (RAW == '<') {
2339 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2341 ctxt->sax->error(ctxt->userData,
2342 "Unescaped '<' not allowed in attributes values\n");
2343 ctxt->wellFormed = 0;
2344 ctxt->disableSAX = 1;
2345 } else if (RAW != limit) {
2346 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2347 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2348 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2349 ctxt->wellFormed = 0;
2350 ctxt->disableSAX = 1;
2351 } else
2352 NEXT;
2353 return(buf);
2354}
2355
2356/**
2357 * xmlParseSystemLiteral:
2358 * @ctxt: an XML parser context
2359 *
2360 * parse an XML Literal
2361 *
2362 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2363 *
2364 * Returns the SystemLiteral parsed or NULL
2365 */
2366
2367xmlChar *
2368xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2369 xmlChar *buf = NULL;
2370 int len = 0;
2371 int size = XML_PARSER_BUFFER_SIZE;
2372 int cur, l;
2373 xmlChar stop;
2374 int state = ctxt->instate;
2375 int count = 0;
2376
2377 SHRINK;
2378 if (RAW == '"') {
2379 NEXT;
2380 stop = '"';
2381 } else if (RAW == '\'') {
2382 NEXT;
2383 stop = '\'';
2384 } else {
2385 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2386 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2387 ctxt->sax->error(ctxt->userData,
2388 "SystemLiteral \" or ' expected\n");
2389 ctxt->wellFormed = 0;
2390 ctxt->disableSAX = 1;
2391 return(NULL);
2392 }
2393
2394 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2395 if (buf == NULL) {
2396 xmlGenericError(xmlGenericErrorContext,
2397 "malloc of %d byte failed\n", size);
2398 return(NULL);
2399 }
2400 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2401 cur = CUR_CHAR(l);
2402 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2403 if (len + 5 >= size) {
2404 size *= 2;
2405 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2406 if (buf == NULL) {
2407 xmlGenericError(xmlGenericErrorContext,
2408 "realloc of %d byte failed\n", size);
2409 ctxt->instate = (xmlParserInputState) state;
2410 return(NULL);
2411 }
2412 }
2413 count++;
2414 if (count > 50) {
2415 GROW;
2416 count = 0;
2417 }
2418 COPY_BUF(l,buf,len,cur);
2419 NEXTL(l);
2420 cur = CUR_CHAR(l);
2421 if (cur == 0) {
2422 GROW;
2423 SHRINK;
2424 cur = CUR_CHAR(l);
2425 }
2426 }
2427 buf[len] = 0;
2428 ctxt->instate = (xmlParserInputState) state;
2429 if (!IS_CHAR(cur)) {
2430 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2431 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2432 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2433 ctxt->wellFormed = 0;
2434 ctxt->disableSAX = 1;
2435 } else {
2436 NEXT;
2437 }
2438 return(buf);
2439}
2440
2441/**
2442 * xmlParsePubidLiteral:
2443 * @ctxt: an XML parser context
2444 *
2445 * parse an XML public literal
2446 *
2447 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2448 *
2449 * Returns the PubidLiteral parsed or NULL.
2450 */
2451
2452xmlChar *
2453xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2454 xmlChar *buf = NULL;
2455 int len = 0;
2456 int size = XML_PARSER_BUFFER_SIZE;
2457 xmlChar cur;
2458 xmlChar stop;
2459 int count = 0;
2460
2461 SHRINK;
2462 if (RAW == '"') {
2463 NEXT;
2464 stop = '"';
2465 } else if (RAW == '\'') {
2466 NEXT;
2467 stop = '\'';
2468 } else {
2469 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2471 ctxt->sax->error(ctxt->userData,
2472 "SystemLiteral \" or ' expected\n");
2473 ctxt->wellFormed = 0;
2474 ctxt->disableSAX = 1;
2475 return(NULL);
2476 }
2477 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2478 if (buf == NULL) {
2479 xmlGenericError(xmlGenericErrorContext,
2480 "malloc of %d byte failed\n", size);
2481 return(NULL);
2482 }
2483 cur = CUR;
2484 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2485 if (len + 1 >= size) {
2486 size *= 2;
2487 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2488 if (buf == NULL) {
2489 xmlGenericError(xmlGenericErrorContext,
2490 "realloc of %d byte failed\n", size);
2491 return(NULL);
2492 }
2493 }
2494 buf[len++] = cur;
2495 count++;
2496 if (count > 50) {
2497 GROW;
2498 count = 0;
2499 }
2500 NEXT;
2501 cur = CUR;
2502 if (cur == 0) {
2503 GROW;
2504 SHRINK;
2505 cur = CUR;
2506 }
2507 }
2508 buf[len] = 0;
2509 if (cur != stop) {
2510 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2511 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2512 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2513 ctxt->wellFormed = 0;
2514 ctxt->disableSAX = 1;
2515 } else {
2516 NEXT;
2517 }
2518 return(buf);
2519}
2520
Daniel Veillard48b2f892001-02-25 16:11:03 +00002521void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002522/**
2523 * xmlParseCharData:
2524 * @ctxt: an XML parser context
2525 * @cdata: int indicating whether we are within a CDATA section
2526 *
2527 * parse a CharData section.
2528 * if we are within a CDATA section ']]>' marks an end of section.
2529 *
2530 * The right angle bracket (>) may be represented using the string "&gt;",
2531 * and must, for compatibility, be escaped using "&gt;" or a character
2532 * reference when it appears in the string "]]>" in content, when that
2533 * string is not marking the end of a CDATA section.
2534 *
2535 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2536 */
2537
2538void
2539xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002540 const xmlChar *in;
2541 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002542 int line = ctxt->input->line;
2543 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002544
2545 SHRINK;
2546 GROW;
2547 /*
2548 * Accelerated common case where input don't need to be
2549 * modified before passing it to the handler.
2550 */
2551 if ((ctxt->token == 0) && (!cdata)) {
2552 in = ctxt->input->cur;
2553 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002554get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002555 while (((*in >= 0x20) && (*in != '<') &&
2556 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2557 in++;
2558 if (*in == 0xA) {
2559 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002560 in++;
2561 while (*in == 0xA) {
2562 ctxt->input->line++;
2563 in++;
2564 }
2565 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002566 }
2567 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002568 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002569 if (IS_BLANK(*ctxt->input->cur)) {
2570 const xmlChar *tmp = ctxt->input->cur;
2571 ctxt->input->cur = in;
2572 if (areBlanks(ctxt, tmp, nbchar)) {
2573 if (ctxt->sax->ignorableWhitespace != NULL)
2574 ctxt->sax->ignorableWhitespace(ctxt->userData,
2575 tmp, nbchar);
2576 } else {
2577 if (ctxt->sax->characters != NULL)
2578 ctxt->sax->characters(ctxt->userData,
2579 tmp, nbchar);
2580 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002581 line = ctxt->input->line;
2582 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002583 } else {
2584 if (ctxt->sax->characters != NULL)
2585 ctxt->sax->characters(ctxt->userData,
2586 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002587 line = ctxt->input->line;
2588 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002589 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002590 }
2591 ctxt->input->cur = in;
2592 if (*in == 0xD) {
2593 in++;
2594 if (*in == 0xA) {
2595 ctxt->input->cur = in;
2596 in++;
2597 ctxt->input->line++;
2598 continue; /* while */
2599 }
2600 in--;
2601 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002602 if (*in == '<') {
2603 return;
2604 }
2605 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002606 return;
2607 }
2608 SHRINK;
2609 GROW;
2610 in = ctxt->input->cur;
2611 } while ((*in >= 0x20) && (*in <= 0x7F));
2612 nbchar = 0;
2613 }
Daniel Veillard50582112001-03-26 22:52:16 +00002614 ctxt->input->line = line;
2615 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002616 xmlParseCharDataComplex(ctxt, cdata);
2617}
2618
2619void
2620xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002621 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2622 int nbchar = 0;
2623 int cur, l;
2624 int count = 0;
2625
2626 SHRINK;
2627 GROW;
2628 cur = CUR_CHAR(l);
2629 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2630 ((cur != '&') || (ctxt->token == '&')) &&
2631 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2632 if ((cur == ']') && (NXT(1) == ']') &&
2633 (NXT(2) == '>')) {
2634 if (cdata) break;
2635 else {
2636 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2638 ctxt->sax->error(ctxt->userData,
2639 "Sequence ']]>' not allowed in content\n");
2640 /* Should this be relaxed ??? I see a "must here */
2641 ctxt->wellFormed = 0;
2642 ctxt->disableSAX = 1;
2643 }
2644 }
2645 COPY_BUF(l,buf,nbchar,cur);
2646 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2647 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002648 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002649 */
2650 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2651 if (areBlanks(ctxt, buf, nbchar)) {
2652 if (ctxt->sax->ignorableWhitespace != NULL)
2653 ctxt->sax->ignorableWhitespace(ctxt->userData,
2654 buf, nbchar);
2655 } else {
2656 if (ctxt->sax->characters != NULL)
2657 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2658 }
2659 }
2660 nbchar = 0;
2661 }
2662 count++;
2663 if (count > 50) {
2664 GROW;
2665 count = 0;
2666 }
2667 NEXTL(l);
2668 cur = CUR_CHAR(l);
2669 }
2670 if (nbchar != 0) {
2671 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002672 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002673 */
2674 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2675 if (areBlanks(ctxt, buf, nbchar)) {
2676 if (ctxt->sax->ignorableWhitespace != NULL)
2677 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2678 } else {
2679 if (ctxt->sax->characters != NULL)
2680 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2681 }
2682 }
2683 }
2684}
2685
2686/**
2687 * xmlParseExternalID:
2688 * @ctxt: an XML parser context
2689 * @publicID: a xmlChar** receiving PubidLiteral
2690 * @strict: indicate whether we should restrict parsing to only
2691 * production [75], see NOTE below
2692 *
2693 * Parse an External ID or a Public ID
2694 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002695 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002696 * 'PUBLIC' S PubidLiteral S SystemLiteral
2697 *
2698 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2699 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2700 *
2701 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2702 *
2703 * Returns the function returns SystemLiteral and in the second
2704 * case publicID receives PubidLiteral, is strict is off
2705 * it is possible to return NULL and have publicID set.
2706 */
2707
2708xmlChar *
2709xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2710 xmlChar *URI = NULL;
2711
2712 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002713
2714 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002715 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2716 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2717 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2718 SKIP(6);
2719 if (!IS_BLANK(CUR)) {
2720 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2722 ctxt->sax->error(ctxt->userData,
2723 "Space required after 'SYSTEM'\n");
2724 ctxt->wellFormed = 0;
2725 ctxt->disableSAX = 1;
2726 }
2727 SKIP_BLANKS;
2728 URI = xmlParseSystemLiteral(ctxt);
2729 if (URI == NULL) {
2730 ctxt->errNo = XML_ERR_URI_REQUIRED;
2731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2732 ctxt->sax->error(ctxt->userData,
2733 "xmlParseExternalID: SYSTEM, no URI\n");
2734 ctxt->wellFormed = 0;
2735 ctxt->disableSAX = 1;
2736 }
2737 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2738 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2739 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2740 SKIP(6);
2741 if (!IS_BLANK(CUR)) {
2742 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2744 ctxt->sax->error(ctxt->userData,
2745 "Space required after 'PUBLIC'\n");
2746 ctxt->wellFormed = 0;
2747 ctxt->disableSAX = 1;
2748 }
2749 SKIP_BLANKS;
2750 *publicID = xmlParsePubidLiteral(ctxt);
2751 if (*publicID == NULL) {
2752 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2753 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2754 ctxt->sax->error(ctxt->userData,
2755 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2756 ctxt->wellFormed = 0;
2757 ctxt->disableSAX = 1;
2758 }
2759 if (strict) {
2760 /*
2761 * We don't handle [83] so "S SystemLiteral" is required.
2762 */
2763 if (!IS_BLANK(CUR)) {
2764 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2766 ctxt->sax->error(ctxt->userData,
2767 "Space required after the Public Identifier\n");
2768 ctxt->wellFormed = 0;
2769 ctxt->disableSAX = 1;
2770 }
2771 } else {
2772 /*
2773 * We handle [83] so we return immediately, if
2774 * "S SystemLiteral" is not detected. From a purely parsing
2775 * point of view that's a nice mess.
2776 */
2777 const xmlChar *ptr;
2778 GROW;
2779
2780 ptr = CUR_PTR;
2781 if (!IS_BLANK(*ptr)) return(NULL);
2782
2783 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2784 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2785 }
2786 SKIP_BLANKS;
2787 URI = xmlParseSystemLiteral(ctxt);
2788 if (URI == NULL) {
2789 ctxt->errNo = XML_ERR_URI_REQUIRED;
2790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2791 ctxt->sax->error(ctxt->userData,
2792 "xmlParseExternalID: PUBLIC, no URI\n");
2793 ctxt->wellFormed = 0;
2794 ctxt->disableSAX = 1;
2795 }
2796 }
2797 return(URI);
2798}
2799
2800/**
2801 * xmlParseComment:
2802 * @ctxt: an XML parser context
2803 *
2804 * Skip an XML (SGML) comment <!-- .... -->
2805 * The spec says that "For compatibility, the string "--" (double-hyphen)
2806 * must not occur within comments. "
2807 *
2808 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2809 */
2810void
2811xmlParseComment(xmlParserCtxtPtr ctxt) {
2812 xmlChar *buf = NULL;
2813 int len;
2814 int size = XML_PARSER_BUFFER_SIZE;
2815 int q, ql;
2816 int r, rl;
2817 int cur, l;
2818 xmlParserInputState state;
2819 xmlParserInputPtr input = ctxt->input;
2820 int count = 0;
2821
2822 /*
2823 * Check that there is a comment right here.
2824 */
2825 if ((RAW != '<') || (NXT(1) != '!') ||
2826 (NXT(2) != '-') || (NXT(3) != '-')) return;
2827
2828 state = ctxt->instate;
2829 ctxt->instate = XML_PARSER_COMMENT;
2830 SHRINK;
2831 SKIP(4);
2832 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2833 if (buf == NULL) {
2834 xmlGenericError(xmlGenericErrorContext,
2835 "malloc of %d byte failed\n", size);
2836 ctxt->instate = state;
2837 return;
2838 }
2839 q = CUR_CHAR(ql);
2840 NEXTL(ql);
2841 r = CUR_CHAR(rl);
2842 NEXTL(rl);
2843 cur = CUR_CHAR(l);
2844 len = 0;
2845 while (IS_CHAR(cur) && /* checked */
2846 ((cur != '>') ||
2847 (r != '-') || (q != '-'))) {
2848 if ((r == '-') && (q == '-') && (len > 1)) {
2849 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2851 ctxt->sax->error(ctxt->userData,
2852 "Comment must not contain '--' (double-hyphen)`\n");
2853 ctxt->wellFormed = 0;
2854 ctxt->disableSAX = 1;
2855 }
2856 if (len + 5 >= size) {
2857 size *= 2;
2858 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2859 if (buf == NULL) {
2860 xmlGenericError(xmlGenericErrorContext,
2861 "realloc of %d byte failed\n", size);
2862 ctxt->instate = state;
2863 return;
2864 }
2865 }
2866 COPY_BUF(ql,buf,len,q);
2867 q = r;
2868 ql = rl;
2869 r = cur;
2870 rl = l;
2871
2872 count++;
2873 if (count > 50) {
2874 GROW;
2875 count = 0;
2876 }
2877 NEXTL(l);
2878 cur = CUR_CHAR(l);
2879 if (cur == 0) {
2880 SHRINK;
2881 GROW;
2882 cur = CUR_CHAR(l);
2883 }
2884 }
2885 buf[len] = 0;
2886 if (!IS_CHAR(cur)) {
2887 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2888 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2889 ctxt->sax->error(ctxt->userData,
2890 "Comment not terminated \n<!--%.50s\n", buf);
2891 ctxt->wellFormed = 0;
2892 ctxt->disableSAX = 1;
2893 xmlFree(buf);
2894 } else {
2895 if (input != ctxt->input) {
2896 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2898 ctxt->sax->error(ctxt->userData,
2899"Comment doesn't start and stop in the same entity\n");
2900 ctxt->wellFormed = 0;
2901 ctxt->disableSAX = 1;
2902 }
2903 NEXT;
2904 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2905 (!ctxt->disableSAX))
2906 ctxt->sax->comment(ctxt->userData, buf);
2907 xmlFree(buf);
2908 }
2909 ctxt->instate = state;
2910}
2911
2912/**
2913 * xmlParsePITarget:
2914 * @ctxt: an XML parser context
2915 *
2916 * parse the name of a PI
2917 *
2918 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2919 *
2920 * Returns the PITarget name or NULL
2921 */
2922
2923xmlChar *
2924xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2925 xmlChar *name;
2926
2927 name = xmlParseName(ctxt);
2928 if ((name != NULL) &&
2929 ((name[0] == 'x') || (name[0] == 'X')) &&
2930 ((name[1] == 'm') || (name[1] == 'M')) &&
2931 ((name[2] == 'l') || (name[2] == 'L'))) {
2932 int i;
2933 if ((name[0] == 'x') && (name[1] == 'm') &&
2934 (name[2] == 'l') && (name[3] == 0)) {
2935 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2937 ctxt->sax->error(ctxt->userData,
2938 "XML declaration allowed only at the start of the document\n");
2939 ctxt->wellFormed = 0;
2940 ctxt->disableSAX = 1;
2941 return(name);
2942 } else if (name[3] == 0) {
2943 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2945 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2946 ctxt->wellFormed = 0;
2947 ctxt->disableSAX = 1;
2948 return(name);
2949 }
2950 for (i = 0;;i++) {
2951 if (xmlW3CPIs[i] == NULL) break;
2952 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2953 return(name);
2954 }
2955 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2956 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2957 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002958 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002959 }
2960 }
2961 return(name);
2962}
2963
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002964#ifdef LIBXML_CATALOG_ENABLED
2965/**
2966 * xmlParseCatalogPI:
2967 * @ctxt: an XML parser context
2968 * @catalog: the PI value string
2969 *
2970 * parse an XML Catalog Processing Instruction.
2971 *
2972 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
2973 *
2974 * Occurs only if allowed by the user and if happening in the Misc
2975 * part of the document before any doctype informations
2976 * This will add the given catalog to the parsing context in order
2977 * to be used if there is a resolution need further down in the document
2978 */
2979
2980static void
2981xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
2982 xmlChar *URL = NULL;
2983 const xmlChar *tmp, *base;
2984 xmlChar marker;
2985
2986 tmp = catalog;
2987 while (IS_BLANK(*tmp)) tmp++;
2988 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
2989 goto error;
2990 tmp += 7;
2991 while (IS_BLANK(*tmp)) tmp++;
2992 if (*tmp != '=') {
2993 return;
2994 }
2995 tmp++;
2996 while (IS_BLANK(*tmp)) tmp++;
2997 marker = *tmp;
2998 if ((marker != '\'') && (marker != '"'))
2999 goto error;
3000 tmp++;
3001 base = tmp;
3002 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3003 if (*tmp == 0)
3004 goto error;
3005 URL = xmlStrndup(base, tmp - base);
3006 tmp++;
3007 while (IS_BLANK(*tmp)) tmp++;
3008 if (*tmp != 0)
3009 goto error;
3010
3011 if (URL != NULL) {
3012 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3013 xmlFree(URL);
3014 }
3015 return;
3016
3017error:
3018 ctxt->errNo = XML_WAR_CATALOG_PI;
3019 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3020 ctxt->sax->warning(ctxt->userData,
3021 "Catalog PI syntax error: %s\n", catalog);
3022 if (URL != NULL)
3023 xmlFree(URL);
3024}
3025#endif
3026
Owen Taylor3473f882001-02-23 17:55:21 +00003027/**
3028 * xmlParsePI:
3029 * @ctxt: an XML parser context
3030 *
3031 * parse an XML Processing Instruction.
3032 *
3033 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3034 *
3035 * The processing is transfered to SAX once parsed.
3036 */
3037
3038void
3039xmlParsePI(xmlParserCtxtPtr ctxt) {
3040 xmlChar *buf = NULL;
3041 int len = 0;
3042 int size = XML_PARSER_BUFFER_SIZE;
3043 int cur, l;
3044 xmlChar *target;
3045 xmlParserInputState state;
3046 int count = 0;
3047
3048 if ((RAW == '<') && (NXT(1) == '?')) {
3049 xmlParserInputPtr input = ctxt->input;
3050 state = ctxt->instate;
3051 ctxt->instate = XML_PARSER_PI;
3052 /*
3053 * this is a Processing Instruction.
3054 */
3055 SKIP(2);
3056 SHRINK;
3057
3058 /*
3059 * Parse the target name and check for special support like
3060 * namespace.
3061 */
3062 target = xmlParsePITarget(ctxt);
3063 if (target != NULL) {
3064 if ((RAW == '?') && (NXT(1) == '>')) {
3065 if (input != ctxt->input) {
3066 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3067 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3068 ctxt->sax->error(ctxt->userData,
3069 "PI declaration doesn't start and stop in the same entity\n");
3070 ctxt->wellFormed = 0;
3071 ctxt->disableSAX = 1;
3072 }
3073 SKIP(2);
3074
3075 /*
3076 * SAX: PI detected.
3077 */
3078 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3079 (ctxt->sax->processingInstruction != NULL))
3080 ctxt->sax->processingInstruction(ctxt->userData,
3081 target, NULL);
3082 ctxt->instate = state;
3083 xmlFree(target);
3084 return;
3085 }
3086 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3087 if (buf == NULL) {
3088 xmlGenericError(xmlGenericErrorContext,
3089 "malloc of %d byte failed\n", size);
3090 ctxt->instate = state;
3091 return;
3092 }
3093 cur = CUR;
3094 if (!IS_BLANK(cur)) {
3095 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3097 ctxt->sax->error(ctxt->userData,
3098 "xmlParsePI: PI %s space expected\n", target);
3099 ctxt->wellFormed = 0;
3100 ctxt->disableSAX = 1;
3101 }
3102 SKIP_BLANKS;
3103 cur = CUR_CHAR(l);
3104 while (IS_CHAR(cur) && /* checked */
3105 ((cur != '?') || (NXT(1) != '>'))) {
3106 if (len + 5 >= size) {
3107 size *= 2;
3108 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3109 if (buf == NULL) {
3110 xmlGenericError(xmlGenericErrorContext,
3111 "realloc of %d byte failed\n", size);
3112 ctxt->instate = state;
3113 return;
3114 }
3115 }
3116 count++;
3117 if (count > 50) {
3118 GROW;
3119 count = 0;
3120 }
3121 COPY_BUF(l,buf,len,cur);
3122 NEXTL(l);
3123 cur = CUR_CHAR(l);
3124 if (cur == 0) {
3125 SHRINK;
3126 GROW;
3127 cur = CUR_CHAR(l);
3128 }
3129 }
3130 buf[len] = 0;
3131 if (cur != '?') {
3132 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3133 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3134 ctxt->sax->error(ctxt->userData,
3135 "xmlParsePI: PI %s never end ...\n", target);
3136 ctxt->wellFormed = 0;
3137 ctxt->disableSAX = 1;
3138 } else {
3139 if (input != ctxt->input) {
3140 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3142 ctxt->sax->error(ctxt->userData,
3143 "PI declaration doesn't start and stop in the same entity\n");
3144 ctxt->wellFormed = 0;
3145 ctxt->disableSAX = 1;
3146 }
3147 SKIP(2);
3148
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003149#ifdef LIBXML_CATALOG_ENABLED
3150 if (((state == XML_PARSER_MISC) ||
3151 (state == XML_PARSER_START)) &&
3152 (xmlStrEqual(target, XML_CATALOG_PI))) {
3153 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3154 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3155 (allow == XML_CATA_ALLOW_ALL))
3156 xmlParseCatalogPI(ctxt, buf);
3157 }
3158#endif
3159
3160
Owen Taylor3473f882001-02-23 17:55:21 +00003161 /*
3162 * SAX: PI detected.
3163 */
3164 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3165 (ctxt->sax->processingInstruction != NULL))
3166 ctxt->sax->processingInstruction(ctxt->userData,
3167 target, buf);
3168 }
3169 xmlFree(buf);
3170 xmlFree(target);
3171 } else {
3172 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3173 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3174 ctxt->sax->error(ctxt->userData,
3175 "xmlParsePI : no target name\n");
3176 ctxt->wellFormed = 0;
3177 ctxt->disableSAX = 1;
3178 }
3179 ctxt->instate = state;
3180 }
3181}
3182
3183/**
3184 * xmlParseNotationDecl:
3185 * @ctxt: an XML parser context
3186 *
3187 * parse a notation declaration
3188 *
3189 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3190 *
3191 * Hence there is actually 3 choices:
3192 * 'PUBLIC' S PubidLiteral
3193 * 'PUBLIC' S PubidLiteral S SystemLiteral
3194 * and 'SYSTEM' S SystemLiteral
3195 *
3196 * See the NOTE on xmlParseExternalID().
3197 */
3198
3199void
3200xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3201 xmlChar *name;
3202 xmlChar *Pubid;
3203 xmlChar *Systemid;
3204
3205 if ((RAW == '<') && (NXT(1) == '!') &&
3206 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3207 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3208 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3209 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3210 xmlParserInputPtr input = ctxt->input;
3211 SHRINK;
3212 SKIP(10);
3213 if (!IS_BLANK(CUR)) {
3214 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3216 ctxt->sax->error(ctxt->userData,
3217 "Space required after '<!NOTATION'\n");
3218 ctxt->wellFormed = 0;
3219 ctxt->disableSAX = 1;
3220 return;
3221 }
3222 SKIP_BLANKS;
3223
Daniel Veillard76d66f42001-05-16 21:05:17 +00003224 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003225 if (name == NULL) {
3226 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3228 ctxt->sax->error(ctxt->userData,
3229 "NOTATION: Name expected here\n");
3230 ctxt->wellFormed = 0;
3231 ctxt->disableSAX = 1;
3232 return;
3233 }
3234 if (!IS_BLANK(CUR)) {
3235 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3237 ctxt->sax->error(ctxt->userData,
3238 "Space required after the NOTATION name'\n");
3239 ctxt->wellFormed = 0;
3240 ctxt->disableSAX = 1;
3241 return;
3242 }
3243 SKIP_BLANKS;
3244
3245 /*
3246 * Parse the IDs.
3247 */
3248 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3249 SKIP_BLANKS;
3250
3251 if (RAW == '>') {
3252 if (input != ctxt->input) {
3253 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3254 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3255 ctxt->sax->error(ctxt->userData,
3256"Notation declaration doesn't start and stop in the same entity\n");
3257 ctxt->wellFormed = 0;
3258 ctxt->disableSAX = 1;
3259 }
3260 NEXT;
3261 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3262 (ctxt->sax->notationDecl != NULL))
3263 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3264 } else {
3265 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3267 ctxt->sax->error(ctxt->userData,
3268 "'>' required to close NOTATION declaration\n");
3269 ctxt->wellFormed = 0;
3270 ctxt->disableSAX = 1;
3271 }
3272 xmlFree(name);
3273 if (Systemid != NULL) xmlFree(Systemid);
3274 if (Pubid != NULL) xmlFree(Pubid);
3275 }
3276}
3277
3278/**
3279 * xmlParseEntityDecl:
3280 * @ctxt: an XML parser context
3281 *
3282 * parse <!ENTITY declarations
3283 *
3284 * [70] EntityDecl ::= GEDecl | PEDecl
3285 *
3286 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3287 *
3288 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3289 *
3290 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3291 *
3292 * [74] PEDef ::= EntityValue | ExternalID
3293 *
3294 * [76] NDataDecl ::= S 'NDATA' S Name
3295 *
3296 * [ VC: Notation Declared ]
3297 * The Name must match the declared name of a notation.
3298 */
3299
3300void
3301xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3302 xmlChar *name = NULL;
3303 xmlChar *value = NULL;
3304 xmlChar *URI = NULL, *literal = NULL;
3305 xmlChar *ndata = NULL;
3306 int isParameter = 0;
3307 xmlChar *orig = NULL;
3308
3309 GROW;
3310 if ((RAW == '<') && (NXT(1) == '!') &&
3311 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3312 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3313 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3314 xmlParserInputPtr input = ctxt->input;
3315 ctxt->instate = XML_PARSER_ENTITY_DECL;
3316 SHRINK;
3317 SKIP(8);
3318 if (!IS_BLANK(CUR)) {
3319 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3320 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3321 ctxt->sax->error(ctxt->userData,
3322 "Space required after '<!ENTITY'\n");
3323 ctxt->wellFormed = 0;
3324 ctxt->disableSAX = 1;
3325 }
3326 SKIP_BLANKS;
3327
3328 if (RAW == '%') {
3329 NEXT;
3330 if (!IS_BLANK(CUR)) {
3331 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3333 ctxt->sax->error(ctxt->userData,
3334 "Space required after '%'\n");
3335 ctxt->wellFormed = 0;
3336 ctxt->disableSAX = 1;
3337 }
3338 SKIP_BLANKS;
3339 isParameter = 1;
3340 }
3341
Daniel Veillard76d66f42001-05-16 21:05:17 +00003342 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003343 if (name == NULL) {
3344 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3345 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3346 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3347 ctxt->wellFormed = 0;
3348 ctxt->disableSAX = 1;
3349 return;
3350 }
3351 if (!IS_BLANK(CUR)) {
3352 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3353 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3354 ctxt->sax->error(ctxt->userData,
3355 "Space required after the entity name\n");
3356 ctxt->wellFormed = 0;
3357 ctxt->disableSAX = 1;
3358 }
3359 SKIP_BLANKS;
3360
3361 /*
3362 * handle the various case of definitions...
3363 */
3364 if (isParameter) {
3365 if ((RAW == '"') || (RAW == '\'')) {
3366 value = xmlParseEntityValue(ctxt, &orig);
3367 if (value) {
3368 if ((ctxt->sax != NULL) &&
3369 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3370 ctxt->sax->entityDecl(ctxt->userData, name,
3371 XML_INTERNAL_PARAMETER_ENTITY,
3372 NULL, NULL, value);
3373 }
3374 } else {
3375 URI = xmlParseExternalID(ctxt, &literal, 1);
3376 if ((URI == NULL) && (literal == NULL)) {
3377 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3378 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3379 ctxt->sax->error(ctxt->userData,
3380 "Entity value required\n");
3381 ctxt->wellFormed = 0;
3382 ctxt->disableSAX = 1;
3383 }
3384 if (URI) {
3385 xmlURIPtr uri;
3386
3387 uri = xmlParseURI((const char *) URI);
3388 if (uri == NULL) {
3389 ctxt->errNo = XML_ERR_INVALID_URI;
3390 if ((ctxt->sax != NULL) &&
3391 (!ctxt->disableSAX) &&
3392 (ctxt->sax->error != NULL))
3393 ctxt->sax->error(ctxt->userData,
3394 "Invalid URI: %s\n", URI);
3395 ctxt->wellFormed = 0;
3396 } else {
3397 if (uri->fragment != NULL) {
3398 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3399 if ((ctxt->sax != NULL) &&
3400 (!ctxt->disableSAX) &&
3401 (ctxt->sax->error != NULL))
3402 ctxt->sax->error(ctxt->userData,
3403 "Fragment not allowed: %s\n", URI);
3404 ctxt->wellFormed = 0;
3405 } else {
3406 if ((ctxt->sax != NULL) &&
3407 (!ctxt->disableSAX) &&
3408 (ctxt->sax->entityDecl != NULL))
3409 ctxt->sax->entityDecl(ctxt->userData, name,
3410 XML_EXTERNAL_PARAMETER_ENTITY,
3411 literal, URI, NULL);
3412 }
3413 xmlFreeURI(uri);
3414 }
3415 }
3416 }
3417 } else {
3418 if ((RAW == '"') || (RAW == '\'')) {
3419 value = xmlParseEntityValue(ctxt, &orig);
3420 if ((ctxt->sax != NULL) &&
3421 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3422 ctxt->sax->entityDecl(ctxt->userData, name,
3423 XML_INTERNAL_GENERAL_ENTITY,
3424 NULL, NULL, value);
3425 } else {
3426 URI = xmlParseExternalID(ctxt, &literal, 1);
3427 if ((URI == NULL) && (literal == NULL)) {
3428 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3429 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3430 ctxt->sax->error(ctxt->userData,
3431 "Entity value required\n");
3432 ctxt->wellFormed = 0;
3433 ctxt->disableSAX = 1;
3434 }
3435 if (URI) {
3436 xmlURIPtr uri;
3437
3438 uri = xmlParseURI((const char *)URI);
3439 if (uri == NULL) {
3440 ctxt->errNo = XML_ERR_INVALID_URI;
3441 if ((ctxt->sax != NULL) &&
3442 (!ctxt->disableSAX) &&
3443 (ctxt->sax->error != NULL))
3444 ctxt->sax->error(ctxt->userData,
3445 "Invalid URI: %s\n", URI);
3446 ctxt->wellFormed = 0;
3447 } else {
3448 if (uri->fragment != NULL) {
3449 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3450 if ((ctxt->sax != NULL) &&
3451 (!ctxt->disableSAX) &&
3452 (ctxt->sax->error != NULL))
3453 ctxt->sax->error(ctxt->userData,
3454 "Fragment not allowed: %s\n", URI);
3455 ctxt->wellFormed = 0;
3456 }
3457 xmlFreeURI(uri);
3458 }
3459 }
3460 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3461 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3463 ctxt->sax->error(ctxt->userData,
3464 "Space required before 'NDATA'\n");
3465 ctxt->wellFormed = 0;
3466 ctxt->disableSAX = 1;
3467 }
3468 SKIP_BLANKS;
3469 if ((RAW == 'N') && (NXT(1) == 'D') &&
3470 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3471 (NXT(4) == 'A')) {
3472 SKIP(5);
3473 if (!IS_BLANK(CUR)) {
3474 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3476 ctxt->sax->error(ctxt->userData,
3477 "Space required after 'NDATA'\n");
3478 ctxt->wellFormed = 0;
3479 ctxt->disableSAX = 1;
3480 }
3481 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003482 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003483 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3484 (ctxt->sax->unparsedEntityDecl != NULL))
3485 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3486 literal, URI, ndata);
3487 } else {
3488 if ((ctxt->sax != NULL) &&
3489 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3490 ctxt->sax->entityDecl(ctxt->userData, name,
3491 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3492 literal, URI, NULL);
3493 }
3494 }
3495 }
3496 SKIP_BLANKS;
3497 if (RAW != '>') {
3498 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3500 ctxt->sax->error(ctxt->userData,
3501 "xmlParseEntityDecl: entity %s not terminated\n", name);
3502 ctxt->wellFormed = 0;
3503 ctxt->disableSAX = 1;
3504 } else {
3505 if (input != ctxt->input) {
3506 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3508 ctxt->sax->error(ctxt->userData,
3509"Entity declaration doesn't start and stop in the same entity\n");
3510 ctxt->wellFormed = 0;
3511 ctxt->disableSAX = 1;
3512 }
3513 NEXT;
3514 }
3515 if (orig != NULL) {
3516 /*
3517 * Ugly mechanism to save the raw entity value.
3518 */
3519 xmlEntityPtr cur = NULL;
3520
3521 if (isParameter) {
3522 if ((ctxt->sax != NULL) &&
3523 (ctxt->sax->getParameterEntity != NULL))
3524 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3525 } else {
3526 if ((ctxt->sax != NULL) &&
3527 (ctxt->sax->getEntity != NULL))
3528 cur = ctxt->sax->getEntity(ctxt->userData, name);
3529 }
3530 if (cur != NULL) {
3531 if (cur->orig != NULL)
3532 xmlFree(orig);
3533 else
3534 cur->orig = orig;
3535 } else
3536 xmlFree(orig);
3537 }
3538 if (name != NULL) xmlFree(name);
3539 if (value != NULL) xmlFree(value);
3540 if (URI != NULL) xmlFree(URI);
3541 if (literal != NULL) xmlFree(literal);
3542 if (ndata != NULL) xmlFree(ndata);
3543 }
3544}
3545
3546/**
3547 * xmlParseDefaultDecl:
3548 * @ctxt: an XML parser context
3549 * @value: Receive a possible fixed default value for the attribute
3550 *
3551 * Parse an attribute default declaration
3552 *
3553 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3554 *
3555 * [ VC: Required Attribute ]
3556 * if the default declaration is the keyword #REQUIRED, then the
3557 * attribute must be specified for all elements of the type in the
3558 * attribute-list declaration.
3559 *
3560 * [ VC: Attribute Default Legal ]
3561 * The declared default value must meet the lexical constraints of
3562 * the declared attribute type c.f. xmlValidateAttributeDecl()
3563 *
3564 * [ VC: Fixed Attribute Default ]
3565 * if an attribute has a default value declared with the #FIXED
3566 * keyword, instances of that attribute must match the default value.
3567 *
3568 * [ WFC: No < in Attribute Values ]
3569 * handled in xmlParseAttValue()
3570 *
3571 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3572 * or XML_ATTRIBUTE_FIXED.
3573 */
3574
3575int
3576xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3577 int val;
3578 xmlChar *ret;
3579
3580 *value = NULL;
3581 if ((RAW == '#') && (NXT(1) == 'R') &&
3582 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3583 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3584 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3585 (NXT(8) == 'D')) {
3586 SKIP(9);
3587 return(XML_ATTRIBUTE_REQUIRED);
3588 }
3589 if ((RAW == '#') && (NXT(1) == 'I') &&
3590 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3591 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3592 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3593 SKIP(8);
3594 return(XML_ATTRIBUTE_IMPLIED);
3595 }
3596 val = XML_ATTRIBUTE_NONE;
3597 if ((RAW == '#') && (NXT(1) == 'F') &&
3598 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3599 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3600 SKIP(6);
3601 val = XML_ATTRIBUTE_FIXED;
3602 if (!IS_BLANK(CUR)) {
3603 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3605 ctxt->sax->error(ctxt->userData,
3606 "Space required after '#FIXED'\n");
3607 ctxt->wellFormed = 0;
3608 ctxt->disableSAX = 1;
3609 }
3610 SKIP_BLANKS;
3611 }
3612 ret = xmlParseAttValue(ctxt);
3613 ctxt->instate = XML_PARSER_DTD;
3614 if (ret == NULL) {
3615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3616 ctxt->sax->error(ctxt->userData,
3617 "Attribute default value declaration error\n");
3618 ctxt->wellFormed = 0;
3619 ctxt->disableSAX = 1;
3620 } else
3621 *value = ret;
3622 return(val);
3623}
3624
3625/**
3626 * xmlParseNotationType:
3627 * @ctxt: an XML parser context
3628 *
3629 * parse an Notation attribute type.
3630 *
3631 * Note: the leading 'NOTATION' S part has already being parsed...
3632 *
3633 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3634 *
3635 * [ VC: Notation Attributes ]
3636 * Values of this type must match one of the notation names included
3637 * in the declaration; all notation names in the declaration must be declared.
3638 *
3639 * Returns: the notation attribute tree built while parsing
3640 */
3641
3642xmlEnumerationPtr
3643xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3644 xmlChar *name;
3645 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3646
3647 if (RAW != '(') {
3648 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3650 ctxt->sax->error(ctxt->userData,
3651 "'(' required to start 'NOTATION'\n");
3652 ctxt->wellFormed = 0;
3653 ctxt->disableSAX = 1;
3654 return(NULL);
3655 }
3656 SHRINK;
3657 do {
3658 NEXT;
3659 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003660 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003661 if (name == NULL) {
3662 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3664 ctxt->sax->error(ctxt->userData,
3665 "Name expected in NOTATION declaration\n");
3666 ctxt->wellFormed = 0;
3667 ctxt->disableSAX = 1;
3668 return(ret);
3669 }
3670 cur = xmlCreateEnumeration(name);
3671 xmlFree(name);
3672 if (cur == NULL) return(ret);
3673 if (last == NULL) ret = last = cur;
3674 else {
3675 last->next = cur;
3676 last = cur;
3677 }
3678 SKIP_BLANKS;
3679 } while (RAW == '|');
3680 if (RAW != ')') {
3681 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3682 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3683 ctxt->sax->error(ctxt->userData,
3684 "')' required to finish NOTATION declaration\n");
3685 ctxt->wellFormed = 0;
3686 ctxt->disableSAX = 1;
3687 if ((last != NULL) && (last != ret))
3688 xmlFreeEnumeration(last);
3689 return(ret);
3690 }
3691 NEXT;
3692 return(ret);
3693}
3694
3695/**
3696 * xmlParseEnumerationType:
3697 * @ctxt: an XML parser context
3698 *
3699 * parse an Enumeration attribute type.
3700 *
3701 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3702 *
3703 * [ VC: Enumeration ]
3704 * Values of this type must match one of the Nmtoken tokens in
3705 * the declaration
3706 *
3707 * Returns: the enumeration attribute tree built while parsing
3708 */
3709
3710xmlEnumerationPtr
3711xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3712 xmlChar *name;
3713 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3714
3715 if (RAW != '(') {
3716 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3717 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3718 ctxt->sax->error(ctxt->userData,
3719 "'(' required to start ATTLIST enumeration\n");
3720 ctxt->wellFormed = 0;
3721 ctxt->disableSAX = 1;
3722 return(NULL);
3723 }
3724 SHRINK;
3725 do {
3726 NEXT;
3727 SKIP_BLANKS;
3728 name = xmlParseNmtoken(ctxt);
3729 if (name == NULL) {
3730 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3732 ctxt->sax->error(ctxt->userData,
3733 "NmToken expected in ATTLIST enumeration\n");
3734 ctxt->wellFormed = 0;
3735 ctxt->disableSAX = 1;
3736 return(ret);
3737 }
3738 cur = xmlCreateEnumeration(name);
3739 xmlFree(name);
3740 if (cur == NULL) return(ret);
3741 if (last == NULL) ret = last = cur;
3742 else {
3743 last->next = cur;
3744 last = cur;
3745 }
3746 SKIP_BLANKS;
3747 } while (RAW == '|');
3748 if (RAW != ')') {
3749 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3751 ctxt->sax->error(ctxt->userData,
3752 "')' required to finish ATTLIST enumeration\n");
3753 ctxt->wellFormed = 0;
3754 ctxt->disableSAX = 1;
3755 return(ret);
3756 }
3757 NEXT;
3758 return(ret);
3759}
3760
3761/**
3762 * xmlParseEnumeratedType:
3763 * @ctxt: an XML parser context
3764 * @tree: the enumeration tree built while parsing
3765 *
3766 * parse an Enumerated attribute type.
3767 *
3768 * [57] EnumeratedType ::= NotationType | Enumeration
3769 *
3770 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3771 *
3772 *
3773 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3774 */
3775
3776int
3777xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3778 if ((RAW == 'N') && (NXT(1) == 'O') &&
3779 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3780 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3781 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3782 SKIP(8);
3783 if (!IS_BLANK(CUR)) {
3784 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3786 ctxt->sax->error(ctxt->userData,
3787 "Space required after 'NOTATION'\n");
3788 ctxt->wellFormed = 0;
3789 ctxt->disableSAX = 1;
3790 return(0);
3791 }
3792 SKIP_BLANKS;
3793 *tree = xmlParseNotationType(ctxt);
3794 if (*tree == NULL) return(0);
3795 return(XML_ATTRIBUTE_NOTATION);
3796 }
3797 *tree = xmlParseEnumerationType(ctxt);
3798 if (*tree == NULL) return(0);
3799 return(XML_ATTRIBUTE_ENUMERATION);
3800}
3801
3802/**
3803 * xmlParseAttributeType:
3804 * @ctxt: an XML parser context
3805 * @tree: the enumeration tree built while parsing
3806 *
3807 * parse the Attribute list def for an element
3808 *
3809 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3810 *
3811 * [55] StringType ::= 'CDATA'
3812 *
3813 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3814 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3815 *
3816 * Validity constraints for attribute values syntax are checked in
3817 * xmlValidateAttributeValue()
3818 *
3819 * [ VC: ID ]
3820 * Values of type ID must match the Name production. A name must not
3821 * appear more than once in an XML document as a value of this type;
3822 * i.e., ID values must uniquely identify the elements which bear them.
3823 *
3824 * [ VC: One ID per Element Type ]
3825 * No element type may have more than one ID attribute specified.
3826 *
3827 * [ VC: ID Attribute Default ]
3828 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3829 *
3830 * [ VC: IDREF ]
3831 * Values of type IDREF must match the Name production, and values
3832 * of type IDREFS must match Names; each IDREF Name must match the value
3833 * of an ID attribute on some element in the XML document; i.e. IDREF
3834 * values must match the value of some ID attribute.
3835 *
3836 * [ VC: Entity Name ]
3837 * Values of type ENTITY must match the Name production, values
3838 * of type ENTITIES must match Names; each Entity Name must match the
3839 * name of an unparsed entity declared in the DTD.
3840 *
3841 * [ VC: Name Token ]
3842 * Values of type NMTOKEN must match the Nmtoken production; values
3843 * of type NMTOKENS must match Nmtokens.
3844 *
3845 * Returns the attribute type
3846 */
3847int
3848xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3849 SHRINK;
3850 if ((RAW == 'C') && (NXT(1) == 'D') &&
3851 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3852 (NXT(4) == 'A')) {
3853 SKIP(5);
3854 return(XML_ATTRIBUTE_CDATA);
3855 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3856 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3857 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3858 SKIP(6);
3859 return(XML_ATTRIBUTE_IDREFS);
3860 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3861 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3862 (NXT(4) == 'F')) {
3863 SKIP(5);
3864 return(XML_ATTRIBUTE_IDREF);
3865 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3866 SKIP(2);
3867 return(XML_ATTRIBUTE_ID);
3868 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3869 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3870 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3871 SKIP(6);
3872 return(XML_ATTRIBUTE_ENTITY);
3873 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3874 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3875 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3876 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3877 SKIP(8);
3878 return(XML_ATTRIBUTE_ENTITIES);
3879 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3880 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3881 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3882 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3883 SKIP(8);
3884 return(XML_ATTRIBUTE_NMTOKENS);
3885 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3886 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3887 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3888 (NXT(6) == 'N')) {
3889 SKIP(7);
3890 return(XML_ATTRIBUTE_NMTOKEN);
3891 }
3892 return(xmlParseEnumeratedType(ctxt, tree));
3893}
3894
3895/**
3896 * xmlParseAttributeListDecl:
3897 * @ctxt: an XML parser context
3898 *
3899 * : parse the Attribute list def for an element
3900 *
3901 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3902 *
3903 * [53] AttDef ::= S Name S AttType S DefaultDecl
3904 *
3905 */
3906void
3907xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3908 xmlChar *elemName;
3909 xmlChar *attrName;
3910 xmlEnumerationPtr tree;
3911
3912 if ((RAW == '<') && (NXT(1) == '!') &&
3913 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3914 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3915 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3916 (NXT(8) == 'T')) {
3917 xmlParserInputPtr input = ctxt->input;
3918
3919 SKIP(9);
3920 if (!IS_BLANK(CUR)) {
3921 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3923 ctxt->sax->error(ctxt->userData,
3924 "Space required after '<!ATTLIST'\n");
3925 ctxt->wellFormed = 0;
3926 ctxt->disableSAX = 1;
3927 }
3928 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003929 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003930 if (elemName == NULL) {
3931 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3933 ctxt->sax->error(ctxt->userData,
3934 "ATTLIST: no name for Element\n");
3935 ctxt->wellFormed = 0;
3936 ctxt->disableSAX = 1;
3937 return;
3938 }
3939 SKIP_BLANKS;
3940 GROW;
3941 while (RAW != '>') {
3942 const xmlChar *check = CUR_PTR;
3943 int type;
3944 int def;
3945 xmlChar *defaultValue = NULL;
3946
3947 GROW;
3948 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003949 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003950 if (attrName == NULL) {
3951 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3953 ctxt->sax->error(ctxt->userData,
3954 "ATTLIST: no name for Attribute\n");
3955 ctxt->wellFormed = 0;
3956 ctxt->disableSAX = 1;
3957 break;
3958 }
3959 GROW;
3960 if (!IS_BLANK(CUR)) {
3961 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3962 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3963 ctxt->sax->error(ctxt->userData,
3964 "Space required after the attribute name\n");
3965 ctxt->wellFormed = 0;
3966 ctxt->disableSAX = 1;
3967 if (attrName != NULL)
3968 xmlFree(attrName);
3969 if (defaultValue != NULL)
3970 xmlFree(defaultValue);
3971 break;
3972 }
3973 SKIP_BLANKS;
3974
3975 type = xmlParseAttributeType(ctxt, &tree);
3976 if (type <= 0) {
3977 if (attrName != NULL)
3978 xmlFree(attrName);
3979 if (defaultValue != NULL)
3980 xmlFree(defaultValue);
3981 break;
3982 }
3983
3984 GROW;
3985 if (!IS_BLANK(CUR)) {
3986 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3988 ctxt->sax->error(ctxt->userData,
3989 "Space required after the attribute type\n");
3990 ctxt->wellFormed = 0;
3991 ctxt->disableSAX = 1;
3992 if (attrName != NULL)
3993 xmlFree(attrName);
3994 if (defaultValue != NULL)
3995 xmlFree(defaultValue);
3996 if (tree != NULL)
3997 xmlFreeEnumeration(tree);
3998 break;
3999 }
4000 SKIP_BLANKS;
4001
4002 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4003 if (def <= 0) {
4004 if (attrName != NULL)
4005 xmlFree(attrName);
4006 if (defaultValue != NULL)
4007 xmlFree(defaultValue);
4008 if (tree != NULL)
4009 xmlFreeEnumeration(tree);
4010 break;
4011 }
4012
4013 GROW;
4014 if (RAW != '>') {
4015 if (!IS_BLANK(CUR)) {
4016 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4018 ctxt->sax->error(ctxt->userData,
4019 "Space required after the attribute default value\n");
4020 ctxt->wellFormed = 0;
4021 ctxt->disableSAX = 1;
4022 if (attrName != NULL)
4023 xmlFree(attrName);
4024 if (defaultValue != NULL)
4025 xmlFree(defaultValue);
4026 if (tree != NULL)
4027 xmlFreeEnumeration(tree);
4028 break;
4029 }
4030 SKIP_BLANKS;
4031 }
4032 if (check == CUR_PTR) {
4033 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4035 ctxt->sax->error(ctxt->userData,
4036 "xmlParseAttributeListDecl: detected internal error\n");
4037 if (attrName != NULL)
4038 xmlFree(attrName);
4039 if (defaultValue != NULL)
4040 xmlFree(defaultValue);
4041 if (tree != NULL)
4042 xmlFreeEnumeration(tree);
4043 break;
4044 }
4045 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4046 (ctxt->sax->attributeDecl != NULL))
4047 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4048 type, def, defaultValue, tree);
4049 if (attrName != NULL)
4050 xmlFree(attrName);
4051 if (defaultValue != NULL)
4052 xmlFree(defaultValue);
4053 GROW;
4054 }
4055 if (RAW == '>') {
4056 if (input != ctxt->input) {
4057 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4059 ctxt->sax->error(ctxt->userData,
4060"Attribute list declaration doesn't start and stop in the same entity\n");
4061 ctxt->wellFormed = 0;
4062 ctxt->disableSAX = 1;
4063 }
4064 NEXT;
4065 }
4066
4067 xmlFree(elemName);
4068 }
4069}
4070
4071/**
4072 * xmlParseElementMixedContentDecl:
4073 * @ctxt: an XML parser context
4074 *
4075 * parse the declaration for a Mixed Element content
4076 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4077 *
4078 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4079 * '(' S? '#PCDATA' S? ')'
4080 *
4081 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4082 *
4083 * [ VC: No Duplicate Types ]
4084 * The same name must not appear more than once in a single
4085 * mixed-content declaration.
4086 *
4087 * returns: the list of the xmlElementContentPtr describing the element choices
4088 */
4089xmlElementContentPtr
4090xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
4091 xmlElementContentPtr ret = NULL, cur = NULL, n;
4092 xmlChar *elem = NULL;
4093
4094 GROW;
4095 if ((RAW == '#') && (NXT(1) == 'P') &&
4096 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4097 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4098 (NXT(6) == 'A')) {
4099 SKIP(7);
4100 SKIP_BLANKS;
4101 SHRINK;
4102 if (RAW == ')') {
4103 ctxt->entity = ctxt->input;
4104 NEXT;
4105 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4106 if (RAW == '*') {
4107 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4108 NEXT;
4109 }
4110 return(ret);
4111 }
4112 if ((RAW == '(') || (RAW == '|')) {
4113 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4114 if (ret == NULL) return(NULL);
4115 }
4116 while (RAW == '|') {
4117 NEXT;
4118 if (elem == NULL) {
4119 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4120 if (ret == NULL) return(NULL);
4121 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004122 if (cur != NULL)
4123 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004124 cur = ret;
4125 } else {
4126 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4127 if (n == NULL) return(NULL);
4128 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004129 if (n->c1 != NULL)
4130 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004131 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004132 if (n != NULL)
4133 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004134 cur = n;
4135 xmlFree(elem);
4136 }
4137 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004138 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004139 if (elem == NULL) {
4140 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4142 ctxt->sax->error(ctxt->userData,
4143 "xmlParseElementMixedContentDecl : Name expected\n");
4144 ctxt->wellFormed = 0;
4145 ctxt->disableSAX = 1;
4146 xmlFreeElementContent(cur);
4147 return(NULL);
4148 }
4149 SKIP_BLANKS;
4150 GROW;
4151 }
4152 if ((RAW == ')') && (NXT(1) == '*')) {
4153 if (elem != NULL) {
4154 cur->c2 = xmlNewElementContent(elem,
4155 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004156 if (cur->c2 != NULL)
4157 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004158 xmlFree(elem);
4159 }
4160 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4161 ctxt->entity = ctxt->input;
4162 SKIP(2);
4163 } else {
4164 if (elem != NULL) xmlFree(elem);
4165 xmlFreeElementContent(ret);
4166 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4167 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4168 ctxt->sax->error(ctxt->userData,
4169 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4170 ctxt->wellFormed = 0;
4171 ctxt->disableSAX = 1;
4172 return(NULL);
4173 }
4174
4175 } else {
4176 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4178 ctxt->sax->error(ctxt->userData,
4179 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4180 ctxt->wellFormed = 0;
4181 ctxt->disableSAX = 1;
4182 }
4183 return(ret);
4184}
4185
4186/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004187 * xmlParseElementChildrenContentD:
4188 * @ctxt: an XML parser context
4189 *
4190 * VMS version of xmlParseElementChildrenContentDecl()
4191 *
4192 * Returns the tree of xmlElementContentPtr describing the element
4193 * hierarchy.
4194 */
4195/**
Owen Taylor3473f882001-02-23 17:55:21 +00004196 * xmlParseElementChildrenContentDecl:
4197 * @ctxt: an XML parser context
4198 *
4199 * parse the declaration for a Mixed Element content
4200 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4201 *
4202 *
4203 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4204 *
4205 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4206 *
4207 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4208 *
4209 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4210 *
4211 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4212 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004213 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004214 * opening or closing parentheses in a choice, seq, or Mixed
4215 * construct is contained in the replacement text for a parameter
4216 * entity, both must be contained in the same replacement text. For
4217 * interoperability, if a parameter-entity reference appears in a
4218 * choice, seq, or Mixed construct, its replacement text should not
4219 * be empty, and neither the first nor last non-blank character of
4220 * the replacement text should be a connector (| or ,).
4221 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004222 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004223 * hierarchy.
4224 */
4225xmlElementContentPtr
4226#ifdef VMS
4227xmlParseElementChildrenContentD
4228#else
4229xmlParseElementChildrenContentDecl
4230#endif
4231(xmlParserCtxtPtr ctxt) {
4232 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4233 xmlChar *elem;
4234 xmlChar type = 0;
4235
4236 SKIP_BLANKS;
4237 GROW;
4238 if (RAW == '(') {
4239 /* Recurse on first child */
4240 NEXT;
4241 SKIP_BLANKS;
4242 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4243 SKIP_BLANKS;
4244 GROW;
4245 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004246 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004247 if (elem == NULL) {
4248 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4250 ctxt->sax->error(ctxt->userData,
4251 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4252 ctxt->wellFormed = 0;
4253 ctxt->disableSAX = 1;
4254 return(NULL);
4255 }
4256 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4257 GROW;
4258 if (RAW == '?') {
4259 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4260 NEXT;
4261 } else if (RAW == '*') {
4262 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4263 NEXT;
4264 } else if (RAW == '+') {
4265 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4266 NEXT;
4267 } else {
4268 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4269 }
4270 xmlFree(elem);
4271 GROW;
4272 }
4273 SKIP_BLANKS;
4274 SHRINK;
4275 while (RAW != ')') {
4276 /*
4277 * Each loop we parse one separator and one element.
4278 */
4279 if (RAW == ',') {
4280 if (type == 0) type = CUR;
4281
4282 /*
4283 * Detect "Name | Name , Name" error
4284 */
4285 else if (type != CUR) {
4286 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4287 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4288 ctxt->sax->error(ctxt->userData,
4289 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4290 type);
4291 ctxt->wellFormed = 0;
4292 ctxt->disableSAX = 1;
4293 if ((op != NULL) && (op != ret))
4294 xmlFreeElementContent(op);
4295 if ((last != NULL) && (last != ret) &&
4296 (last != ret->c1) && (last != ret->c2))
4297 xmlFreeElementContent(last);
4298 if (ret != NULL)
4299 xmlFreeElementContent(ret);
4300 return(NULL);
4301 }
4302 NEXT;
4303
4304 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4305 if (op == NULL) {
4306 xmlFreeElementContent(ret);
4307 return(NULL);
4308 }
4309 if (last == NULL) {
4310 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004311 if (ret != NULL)
4312 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004313 ret = cur = op;
4314 } else {
4315 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004316 if (op != NULL)
4317 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004318 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004319 if (last != NULL)
4320 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004321 cur =op;
4322 last = NULL;
4323 }
4324 } else if (RAW == '|') {
4325 if (type == 0) type = CUR;
4326
4327 /*
4328 * Detect "Name , Name | Name" error
4329 */
4330 else if (type != CUR) {
4331 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4333 ctxt->sax->error(ctxt->userData,
4334 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4335 type);
4336 ctxt->wellFormed = 0;
4337 ctxt->disableSAX = 1;
4338 if ((op != NULL) && (op != ret) && (op != last))
4339 xmlFreeElementContent(op);
4340 if ((last != NULL) && (last != ret) &&
4341 (last != ret->c1) && (last != ret->c2))
4342 xmlFreeElementContent(last);
4343 if (ret != NULL)
4344 xmlFreeElementContent(ret);
4345 return(NULL);
4346 }
4347 NEXT;
4348
4349 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4350 if (op == NULL) {
4351 if ((op != NULL) && (op != ret))
4352 xmlFreeElementContent(op);
4353 if ((last != NULL) && (last != ret) &&
4354 (last != ret->c1) && (last != ret->c2))
4355 xmlFreeElementContent(last);
4356 if (ret != NULL)
4357 xmlFreeElementContent(ret);
4358 return(NULL);
4359 }
4360 if (last == NULL) {
4361 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004362 if (ret != NULL)
4363 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004364 ret = cur = op;
4365 } else {
4366 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004367 if (op != NULL)
4368 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004369 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004370 if (last != NULL)
4371 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004372 cur =op;
4373 last = NULL;
4374 }
4375 } else {
4376 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4378 ctxt->sax->error(ctxt->userData,
4379 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4380 ctxt->wellFormed = 0;
4381 ctxt->disableSAX = 1;
4382 if ((op != NULL) && (op != ret))
4383 xmlFreeElementContent(op);
4384 if ((last != NULL) && (last != ret) &&
4385 (last != ret->c1) && (last != ret->c2))
4386 xmlFreeElementContent(last);
4387 if (ret != NULL)
4388 xmlFreeElementContent(ret);
4389 return(NULL);
4390 }
4391 GROW;
4392 SKIP_BLANKS;
4393 GROW;
4394 if (RAW == '(') {
4395 /* Recurse on second child */
4396 NEXT;
4397 SKIP_BLANKS;
4398 last = xmlParseElementChildrenContentDecl(ctxt);
4399 SKIP_BLANKS;
4400 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004401 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004402 if (elem == NULL) {
4403 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4405 ctxt->sax->error(ctxt->userData,
4406 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4407 ctxt->wellFormed = 0;
4408 ctxt->disableSAX = 1;
4409 if ((op != NULL) && (op != ret))
4410 xmlFreeElementContent(op);
4411 if ((last != NULL) && (last != ret) &&
4412 (last != ret->c1) && (last != ret->c2))
4413 xmlFreeElementContent(last);
4414 if (ret != NULL)
4415 xmlFreeElementContent(ret);
4416 return(NULL);
4417 }
4418 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4419 xmlFree(elem);
4420 if (RAW == '?') {
4421 last->ocur = XML_ELEMENT_CONTENT_OPT;
4422 NEXT;
4423 } else if (RAW == '*') {
4424 last->ocur = XML_ELEMENT_CONTENT_MULT;
4425 NEXT;
4426 } else if (RAW == '+') {
4427 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4428 NEXT;
4429 } else {
4430 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4431 }
4432 }
4433 SKIP_BLANKS;
4434 GROW;
4435 }
4436 if ((cur != NULL) && (last != NULL)) {
4437 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004438 if (last != NULL)
4439 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004440 }
4441 ctxt->entity = ctxt->input;
4442 NEXT;
4443 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004444 if (ret != NULL)
4445 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004446 NEXT;
4447 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004448 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004449 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004450 cur = ret;
4451 /*
4452 * Some normalization:
4453 * (a | b* | c?)* == (a | b | c)*
4454 */
4455 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4456 if ((cur->c1 != NULL) &&
4457 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4458 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4459 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4460 if ((cur->c2 != NULL) &&
4461 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4462 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4463 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4464 cur = cur->c2;
4465 }
4466 }
Owen Taylor3473f882001-02-23 17:55:21 +00004467 NEXT;
4468 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004469 if (ret != NULL) {
4470 int found = 0;
4471
Daniel Veillarde470df72001-04-18 21:41:07 +00004472 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004473 /*
4474 * Some normalization:
4475 * (a | b*)+ == (a | b)*
4476 * (a | b?)+ == (a | b)*
4477 */
4478 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4479 if ((cur->c1 != NULL) &&
4480 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4481 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4482 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4483 found = 1;
4484 }
4485 if ((cur->c2 != NULL) &&
4486 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4487 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4488 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4489 found = 1;
4490 }
4491 cur = cur->c2;
4492 }
4493 if (found)
4494 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4495 }
Owen Taylor3473f882001-02-23 17:55:21 +00004496 NEXT;
4497 }
4498 return(ret);
4499}
4500
4501/**
4502 * xmlParseElementContentDecl:
4503 * @ctxt: an XML parser context
4504 * @name: the name of the element being defined.
4505 * @result: the Element Content pointer will be stored here if any
4506 *
4507 * parse the declaration for an Element content either Mixed or Children,
4508 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4509 *
4510 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4511 *
4512 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4513 */
4514
4515int
4516xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4517 xmlElementContentPtr *result) {
4518
4519 xmlElementContentPtr tree = NULL;
4520 xmlParserInputPtr input = ctxt->input;
4521 int res;
4522
4523 *result = NULL;
4524
4525 if (RAW != '(') {
4526 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4527 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4528 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004529 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004530 ctxt->wellFormed = 0;
4531 ctxt->disableSAX = 1;
4532 return(-1);
4533 }
4534 NEXT;
4535 GROW;
4536 SKIP_BLANKS;
4537 if ((RAW == '#') && (NXT(1) == 'P') &&
4538 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4539 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4540 (NXT(6) == 'A')) {
4541 tree = xmlParseElementMixedContentDecl(ctxt);
4542 res = XML_ELEMENT_TYPE_MIXED;
4543 } else {
4544 tree = xmlParseElementChildrenContentDecl(ctxt);
4545 res = XML_ELEMENT_TYPE_ELEMENT;
4546 }
4547 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4548 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4550 ctxt->sax->error(ctxt->userData,
4551"Element content declaration doesn't start and stop in the same entity\n");
4552 ctxt->wellFormed = 0;
4553 ctxt->disableSAX = 1;
4554 }
4555 SKIP_BLANKS;
4556 *result = tree;
4557 return(res);
4558}
4559
4560/**
4561 * xmlParseElementDecl:
4562 * @ctxt: an XML parser context
4563 *
4564 * parse an Element declaration.
4565 *
4566 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4567 *
4568 * [ VC: Unique Element Type Declaration ]
4569 * No element type may be declared more than once
4570 *
4571 * Returns the type of the element, or -1 in case of error
4572 */
4573int
4574xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4575 xmlChar *name;
4576 int ret = -1;
4577 xmlElementContentPtr content = NULL;
4578
4579 GROW;
4580 if ((RAW == '<') && (NXT(1) == '!') &&
4581 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4582 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4583 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4584 (NXT(8) == 'T')) {
4585 xmlParserInputPtr input = ctxt->input;
4586
4587 SKIP(9);
4588 if (!IS_BLANK(CUR)) {
4589 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4591 ctxt->sax->error(ctxt->userData,
4592 "Space required after 'ELEMENT'\n");
4593 ctxt->wellFormed = 0;
4594 ctxt->disableSAX = 1;
4595 }
4596 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004597 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004598 if (name == NULL) {
4599 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4601 ctxt->sax->error(ctxt->userData,
4602 "xmlParseElementDecl: no name for Element\n");
4603 ctxt->wellFormed = 0;
4604 ctxt->disableSAX = 1;
4605 return(-1);
4606 }
4607 while ((RAW == 0) && (ctxt->inputNr > 1))
4608 xmlPopInput(ctxt);
4609 if (!IS_BLANK(CUR)) {
4610 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4612 ctxt->sax->error(ctxt->userData,
4613 "Space required after the element name\n");
4614 ctxt->wellFormed = 0;
4615 ctxt->disableSAX = 1;
4616 }
4617 SKIP_BLANKS;
4618 if ((RAW == 'E') && (NXT(1) == 'M') &&
4619 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4620 (NXT(4) == 'Y')) {
4621 SKIP(5);
4622 /*
4623 * Element must always be empty.
4624 */
4625 ret = XML_ELEMENT_TYPE_EMPTY;
4626 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4627 (NXT(2) == 'Y')) {
4628 SKIP(3);
4629 /*
4630 * Element is a generic container.
4631 */
4632 ret = XML_ELEMENT_TYPE_ANY;
4633 } else if (RAW == '(') {
4634 ret = xmlParseElementContentDecl(ctxt, name, &content);
4635 } else {
4636 /*
4637 * [ WFC: PEs in Internal Subset ] error handling.
4638 */
4639 if ((RAW == '%') && (ctxt->external == 0) &&
4640 (ctxt->inputNr == 1)) {
4641 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4643 ctxt->sax->error(ctxt->userData,
4644 "PEReference: forbidden within markup decl in internal subset\n");
4645 } else {
4646 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4648 ctxt->sax->error(ctxt->userData,
4649 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4650 }
4651 ctxt->wellFormed = 0;
4652 ctxt->disableSAX = 1;
4653 if (name != NULL) xmlFree(name);
4654 return(-1);
4655 }
4656
4657 SKIP_BLANKS;
4658 /*
4659 * Pop-up of finished entities.
4660 */
4661 while ((RAW == 0) && (ctxt->inputNr > 1))
4662 xmlPopInput(ctxt);
4663 SKIP_BLANKS;
4664
4665 if (RAW != '>') {
4666 ctxt->errNo = XML_ERR_GT_REQUIRED;
4667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4668 ctxt->sax->error(ctxt->userData,
4669 "xmlParseElementDecl: expected '>' at the end\n");
4670 ctxt->wellFormed = 0;
4671 ctxt->disableSAX = 1;
4672 } else {
4673 if (input != ctxt->input) {
4674 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4675 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4676 ctxt->sax->error(ctxt->userData,
4677"Element declaration doesn't start and stop in the same entity\n");
4678 ctxt->wellFormed = 0;
4679 ctxt->disableSAX = 1;
4680 }
4681
4682 NEXT;
4683 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4684 (ctxt->sax->elementDecl != NULL))
4685 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4686 content);
4687 }
4688 if (content != NULL) {
4689 xmlFreeElementContent(content);
4690 }
4691 if (name != NULL) {
4692 xmlFree(name);
4693 }
4694 }
4695 return(ret);
4696}
4697
4698/**
Owen Taylor3473f882001-02-23 17:55:21 +00004699 * xmlParseConditionalSections
4700 * @ctxt: an XML parser context
4701 *
4702 * [61] conditionalSect ::= includeSect | ignoreSect
4703 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4704 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4705 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4706 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4707 */
4708
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004709static void
Owen Taylor3473f882001-02-23 17:55:21 +00004710xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4711 SKIP(3);
4712 SKIP_BLANKS;
4713 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4714 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4715 (NXT(6) == 'E')) {
4716 SKIP(7);
4717 SKIP_BLANKS;
4718 if (RAW != '[') {
4719 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4721 ctxt->sax->error(ctxt->userData,
4722 "XML conditional section '[' expected\n");
4723 ctxt->wellFormed = 0;
4724 ctxt->disableSAX = 1;
4725 } else {
4726 NEXT;
4727 }
4728 if (xmlParserDebugEntities) {
4729 if ((ctxt->input != NULL) && (ctxt->input->filename))
4730 xmlGenericError(xmlGenericErrorContext,
4731 "%s(%d): ", ctxt->input->filename,
4732 ctxt->input->line);
4733 xmlGenericError(xmlGenericErrorContext,
4734 "Entering INCLUDE Conditional Section\n");
4735 }
4736
4737 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4738 (NXT(2) != '>'))) {
4739 const xmlChar *check = CUR_PTR;
4740 int cons = ctxt->input->consumed;
4741 int tok = ctxt->token;
4742
4743 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4744 xmlParseConditionalSections(ctxt);
4745 } else if (IS_BLANK(CUR)) {
4746 NEXT;
4747 } else if (RAW == '%') {
4748 xmlParsePEReference(ctxt);
4749 } else
4750 xmlParseMarkupDecl(ctxt);
4751
4752 /*
4753 * Pop-up of finished entities.
4754 */
4755 while ((RAW == 0) && (ctxt->inputNr > 1))
4756 xmlPopInput(ctxt);
4757
4758 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4759 (tok == ctxt->token)) {
4760 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4762 ctxt->sax->error(ctxt->userData,
4763 "Content error in the external subset\n");
4764 ctxt->wellFormed = 0;
4765 ctxt->disableSAX = 1;
4766 break;
4767 }
4768 }
4769 if (xmlParserDebugEntities) {
4770 if ((ctxt->input != NULL) && (ctxt->input->filename))
4771 xmlGenericError(xmlGenericErrorContext,
4772 "%s(%d): ", ctxt->input->filename,
4773 ctxt->input->line);
4774 xmlGenericError(xmlGenericErrorContext,
4775 "Leaving INCLUDE Conditional Section\n");
4776 }
4777
4778 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4779 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4780 int state;
4781 int instate;
4782 int depth = 0;
4783
4784 SKIP(6);
4785 SKIP_BLANKS;
4786 if (RAW != '[') {
4787 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4789 ctxt->sax->error(ctxt->userData,
4790 "XML conditional section '[' expected\n");
4791 ctxt->wellFormed = 0;
4792 ctxt->disableSAX = 1;
4793 } else {
4794 NEXT;
4795 }
4796 if (xmlParserDebugEntities) {
4797 if ((ctxt->input != NULL) && (ctxt->input->filename))
4798 xmlGenericError(xmlGenericErrorContext,
4799 "%s(%d): ", ctxt->input->filename,
4800 ctxt->input->line);
4801 xmlGenericError(xmlGenericErrorContext,
4802 "Entering IGNORE Conditional Section\n");
4803 }
4804
4805 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004806 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004807 * But disable SAX event generating DTD building in the meantime
4808 */
4809 state = ctxt->disableSAX;
4810 instate = ctxt->instate;
4811 ctxt->disableSAX = 1;
4812 ctxt->instate = XML_PARSER_IGNORE;
4813
4814 while (depth >= 0) {
4815 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4816 depth++;
4817 SKIP(3);
4818 continue;
4819 }
4820 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4821 if (--depth >= 0) SKIP(3);
4822 continue;
4823 }
4824 NEXT;
4825 continue;
4826 }
4827
4828 ctxt->disableSAX = state;
4829 ctxt->instate = instate;
4830
4831 if (xmlParserDebugEntities) {
4832 if ((ctxt->input != NULL) && (ctxt->input->filename))
4833 xmlGenericError(xmlGenericErrorContext,
4834 "%s(%d): ", ctxt->input->filename,
4835 ctxt->input->line);
4836 xmlGenericError(xmlGenericErrorContext,
4837 "Leaving IGNORE Conditional Section\n");
4838 }
4839
4840 } else {
4841 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4843 ctxt->sax->error(ctxt->userData,
4844 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4845 ctxt->wellFormed = 0;
4846 ctxt->disableSAX = 1;
4847 }
4848
4849 if (RAW == 0)
4850 SHRINK;
4851
4852 if (RAW == 0) {
4853 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4855 ctxt->sax->error(ctxt->userData,
4856 "XML conditional section not closed\n");
4857 ctxt->wellFormed = 0;
4858 ctxt->disableSAX = 1;
4859 } else {
4860 SKIP(3);
4861 }
4862}
4863
4864/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004865 * xmlParseMarkupDecl:
4866 * @ctxt: an XML parser context
4867 *
4868 * parse Markup declarations
4869 *
4870 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4871 * NotationDecl | PI | Comment
4872 *
4873 * [ VC: Proper Declaration/PE Nesting ]
4874 * Parameter-entity replacement text must be properly nested with
4875 * markup declarations. That is to say, if either the first character
4876 * or the last character of a markup declaration (markupdecl above) is
4877 * contained in the replacement text for a parameter-entity reference,
4878 * both must be contained in the same replacement text.
4879 *
4880 * [ WFC: PEs in Internal Subset ]
4881 * In the internal DTD subset, parameter-entity references can occur
4882 * only where markup declarations can occur, not within markup declarations.
4883 * (This does not apply to references that occur in external parameter
4884 * entities or to the external subset.)
4885 */
4886void
4887xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4888 GROW;
4889 xmlParseElementDecl(ctxt);
4890 xmlParseAttributeListDecl(ctxt);
4891 xmlParseEntityDecl(ctxt);
4892 xmlParseNotationDecl(ctxt);
4893 xmlParsePI(ctxt);
4894 xmlParseComment(ctxt);
4895 /*
4896 * This is only for internal subset. On external entities,
4897 * the replacement is done before parsing stage
4898 */
4899 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4900 xmlParsePEReference(ctxt);
4901
4902 /*
4903 * Conditional sections are allowed from entities included
4904 * by PE References in the internal subset.
4905 */
4906 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4907 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4908 xmlParseConditionalSections(ctxt);
4909 }
4910 }
4911
4912 ctxt->instate = XML_PARSER_DTD;
4913}
4914
4915/**
4916 * xmlParseTextDecl:
4917 * @ctxt: an XML parser context
4918 *
4919 * parse an XML declaration header for external entities
4920 *
4921 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4922 *
4923 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4924 */
4925
4926void
4927xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4928 xmlChar *version;
4929
4930 /*
4931 * We know that '<?xml' is here.
4932 */
4933 if ((RAW == '<') && (NXT(1) == '?') &&
4934 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4935 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4936 SKIP(5);
4937 } else {
4938 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4940 ctxt->sax->error(ctxt->userData,
4941 "Text declaration '<?xml' required\n");
4942 ctxt->wellFormed = 0;
4943 ctxt->disableSAX = 1;
4944
4945 return;
4946 }
4947
4948 if (!IS_BLANK(CUR)) {
4949 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4950 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4951 ctxt->sax->error(ctxt->userData,
4952 "Space needed after '<?xml'\n");
4953 ctxt->wellFormed = 0;
4954 ctxt->disableSAX = 1;
4955 }
4956 SKIP_BLANKS;
4957
4958 /*
4959 * We may have the VersionInfo here.
4960 */
4961 version = xmlParseVersionInfo(ctxt);
4962 if (version == NULL)
4963 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4964 ctxt->input->version = version;
4965
4966 /*
4967 * We must have the encoding declaration
4968 */
4969 if (!IS_BLANK(CUR)) {
4970 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4972 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4973 ctxt->wellFormed = 0;
4974 ctxt->disableSAX = 1;
4975 }
4976 xmlParseEncodingDecl(ctxt);
4977 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4978 /*
4979 * The XML REC instructs us to stop parsing right here
4980 */
4981 return;
4982 }
4983
4984 SKIP_BLANKS;
4985 if ((RAW == '?') && (NXT(1) == '>')) {
4986 SKIP(2);
4987 } else if (RAW == '>') {
4988 /* Deprecated old WD ... */
4989 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4991 ctxt->sax->error(ctxt->userData,
4992 "XML declaration must end-up with '?>'\n");
4993 ctxt->wellFormed = 0;
4994 ctxt->disableSAX = 1;
4995 NEXT;
4996 } else {
4997 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4999 ctxt->sax->error(ctxt->userData,
5000 "parsing XML declaration: '?>' expected\n");
5001 ctxt->wellFormed = 0;
5002 ctxt->disableSAX = 1;
5003 MOVETO_ENDTAG(CUR_PTR);
5004 NEXT;
5005 }
5006}
5007
5008/**
Owen Taylor3473f882001-02-23 17:55:21 +00005009 * xmlParseExternalSubset:
5010 * @ctxt: an XML parser context
5011 * @ExternalID: the external identifier
5012 * @SystemID: the system identifier (or URL)
5013 *
5014 * parse Markup declarations from an external subset
5015 *
5016 * [30] extSubset ::= textDecl? extSubsetDecl
5017 *
5018 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5019 */
5020void
5021xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5022 const xmlChar *SystemID) {
5023 GROW;
5024 if ((RAW == '<') && (NXT(1) == '?') &&
5025 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5026 (NXT(4) == 'l')) {
5027 xmlParseTextDecl(ctxt);
5028 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5029 /*
5030 * The XML REC instructs us to stop parsing right here
5031 */
5032 ctxt->instate = XML_PARSER_EOF;
5033 return;
5034 }
5035 }
5036 if (ctxt->myDoc == NULL) {
5037 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5038 }
5039 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5040 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5041
5042 ctxt->instate = XML_PARSER_DTD;
5043 ctxt->external = 1;
5044 while (((RAW == '<') && (NXT(1) == '?')) ||
5045 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005046 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005047 const xmlChar *check = CUR_PTR;
5048 int cons = ctxt->input->consumed;
5049 int tok = ctxt->token;
5050
5051 GROW;
5052 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5053 xmlParseConditionalSections(ctxt);
5054 } else if (IS_BLANK(CUR)) {
5055 NEXT;
5056 } else if (RAW == '%') {
5057 xmlParsePEReference(ctxt);
5058 } else
5059 xmlParseMarkupDecl(ctxt);
5060
5061 /*
5062 * Pop-up of finished entities.
5063 */
5064 while ((RAW == 0) && (ctxt->inputNr > 1))
5065 xmlPopInput(ctxt);
5066
5067 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5068 (tok == ctxt->token)) {
5069 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5070 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5071 ctxt->sax->error(ctxt->userData,
5072 "Content error in the external subset\n");
5073 ctxt->wellFormed = 0;
5074 ctxt->disableSAX = 1;
5075 break;
5076 }
5077 }
5078
5079 if (RAW != 0) {
5080 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5082 ctxt->sax->error(ctxt->userData,
5083 "Extra content at the end of the document\n");
5084 ctxt->wellFormed = 0;
5085 ctxt->disableSAX = 1;
5086 }
5087
5088}
5089
5090/**
5091 * xmlParseReference:
5092 * @ctxt: an XML parser context
5093 *
5094 * parse and handle entity references in content, depending on the SAX
5095 * interface, this may end-up in a call to character() if this is a
5096 * CharRef, a predefined entity, if there is no reference() callback.
5097 * or if the parser was asked to switch to that mode.
5098 *
5099 * [67] Reference ::= EntityRef | CharRef
5100 */
5101void
5102xmlParseReference(xmlParserCtxtPtr ctxt) {
5103 xmlEntityPtr ent;
5104 xmlChar *val;
5105 if (RAW != '&') return;
5106
5107 if (NXT(1) == '#') {
5108 int i = 0;
5109 xmlChar out[10];
5110 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005111 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005112
5113 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5114 /*
5115 * So we are using non-UTF-8 buffers
5116 * Check that the char fit on 8bits, if not
5117 * generate a CharRef.
5118 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005119 if (value <= 0xFF) {
5120 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005121 out[1] = 0;
5122 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5123 (!ctxt->disableSAX))
5124 ctxt->sax->characters(ctxt->userData, out, 1);
5125 } else {
5126 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005127 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005128 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005129 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005130 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5131 (!ctxt->disableSAX))
5132 ctxt->sax->reference(ctxt->userData, out);
5133 }
5134 } else {
5135 /*
5136 * Just encode the value in UTF-8
5137 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005138 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005139 out[i] = 0;
5140 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5141 (!ctxt->disableSAX))
5142 ctxt->sax->characters(ctxt->userData, out, i);
5143 }
5144 } else {
5145 ent = xmlParseEntityRef(ctxt);
5146 if (ent == NULL) return;
5147 if ((ent->name != NULL) &&
5148 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5149 xmlNodePtr list = NULL;
5150 int ret;
5151
5152
5153 /*
5154 * The first reference to the entity trigger a parsing phase
5155 * where the ent->children is filled with the result from
5156 * the parsing.
5157 */
5158 if (ent->children == NULL) {
5159 xmlChar *value;
5160 value = ent->content;
5161
5162 /*
5163 * Check that this entity is well formed
5164 */
5165 if ((value != NULL) &&
5166 (value[1] == 0) && (value[0] == '<') &&
5167 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5168 /*
5169 * DONE: get definite answer on this !!!
5170 * Lots of entity decls are used to declare a single
5171 * char
5172 * <!ENTITY lt "<">
5173 * Which seems to be valid since
5174 * 2.4: The ampersand character (&) and the left angle
5175 * bracket (<) may appear in their literal form only
5176 * when used ... They are also legal within the literal
5177 * entity value of an internal entity declaration;i
5178 * see "4.3.2 Well-Formed Parsed Entities".
5179 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5180 * Looking at the OASIS test suite and James Clark
5181 * tests, this is broken. However the XML REC uses
5182 * it. Is the XML REC not well-formed ????
5183 * This is a hack to avoid this problem
5184 *
5185 * ANSWER: since lt gt amp .. are already defined,
5186 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005187 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005188 * is lousy but acceptable.
5189 */
5190 list = xmlNewDocText(ctxt->myDoc, value);
5191 if (list != NULL) {
5192 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5193 (ent->children == NULL)) {
5194 ent->children = list;
5195 ent->last = list;
5196 list->parent = (xmlNodePtr) ent;
5197 } else {
5198 xmlFreeNodeList(list);
5199 }
5200 } else if (list != NULL) {
5201 xmlFreeNodeList(list);
5202 }
5203 } else {
5204 /*
5205 * 4.3.2: An internal general parsed entity is well-formed
5206 * if its replacement text matches the production labeled
5207 * content.
5208 */
5209 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5210 ctxt->depth++;
5211 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5212 ctxt->sax, NULL, ctxt->depth,
5213 value, &list);
5214 ctxt->depth--;
5215 } else if (ent->etype ==
5216 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5217 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005218 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005219 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005220 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005221 ctxt->depth--;
5222 } else {
5223 ret = -1;
5224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5225 ctxt->sax->error(ctxt->userData,
5226 "Internal: invalid entity type\n");
5227 }
5228 if (ret == XML_ERR_ENTITY_LOOP) {
5229 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5231 ctxt->sax->error(ctxt->userData,
5232 "Detected entity reference loop\n");
5233 ctxt->wellFormed = 0;
5234 ctxt->disableSAX = 1;
5235 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005236 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5237 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005238 (ent->children == NULL)) {
5239 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005240 if (ctxt->replaceEntities) {
5241 /*
5242 * Prune it directly in the generated document
5243 * except for single text nodes.
5244 */
5245 if ((list->type == XML_TEXT_NODE) &&
5246 (list->next == NULL)) {
5247 list->parent = (xmlNodePtr) ent;
5248 list = NULL;
5249 } else {
5250 while (list != NULL) {
5251 list->parent = (xmlNodePtr) ctxt->node;
5252 if (list->next == NULL)
5253 ent->last = list;
5254 list = list->next;
5255 }
5256 list = ent->children;
5257 }
5258 } else {
5259 while (list != NULL) {
5260 list->parent = (xmlNodePtr) ent;
5261 if (list->next == NULL)
5262 ent->last = list;
5263 list = list->next;
5264 }
Owen Taylor3473f882001-02-23 17:55:21 +00005265 }
5266 } else {
5267 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005268 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005269 }
5270 } else if (ret > 0) {
5271 ctxt->errNo = ret;
5272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5273 ctxt->sax->error(ctxt->userData,
5274 "Entity value required\n");
5275 ctxt->wellFormed = 0;
5276 ctxt->disableSAX = 1;
5277 } else if (list != NULL) {
5278 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005279 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005280 }
5281 }
5282 }
5283 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5284 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5285 /*
5286 * Create a node.
5287 */
5288 ctxt->sax->reference(ctxt->userData, ent->name);
5289 return;
5290 } else if (ctxt->replaceEntities) {
5291 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5292 /*
5293 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005294 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005295 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005296 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005297 if (list == NULL) {
5298 xmlNodePtr new, cur;
5299 cur = ent->children;
5300 while (cur != NULL) {
5301 new = xmlCopyNode(cur, 1);
5302 xmlAddChild(ctxt->node, new);
5303 if (cur == ent->last)
5304 break;
5305 cur = cur->next;
5306 }
5307 } else {
5308 /*
5309 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005310 * node with a possible previous text one which
5311 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005312 */
5313 if (ent->children->type == XML_TEXT_NODE)
5314 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5315 if ((ent->last != ent->children) &&
5316 (ent->last->type == XML_TEXT_NODE))
5317 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5318 xmlAddChildList(ctxt->node, ent->children);
5319 }
5320
Owen Taylor3473f882001-02-23 17:55:21 +00005321 /*
5322 * This is to avoid a nasty side effect, see
5323 * characters() in SAX.c
5324 */
5325 ctxt->nodemem = 0;
5326 ctxt->nodelen = 0;
5327 return;
5328 } else {
5329 /*
5330 * Probably running in SAX mode
5331 */
5332 xmlParserInputPtr input;
5333
5334 input = xmlNewEntityInputStream(ctxt, ent);
5335 xmlPushInput(ctxt, input);
5336 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5337 (RAW == '<') && (NXT(1) == '?') &&
5338 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5339 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5340 xmlParseTextDecl(ctxt);
5341 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5342 /*
5343 * The XML REC instructs us to stop parsing right here
5344 */
5345 ctxt->instate = XML_PARSER_EOF;
5346 return;
5347 }
5348 if (input->standalone == 1) {
5349 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5351 ctxt->sax->error(ctxt->userData,
5352 "external parsed entities cannot be standalone\n");
5353 ctxt->wellFormed = 0;
5354 ctxt->disableSAX = 1;
5355 }
5356 }
5357 return;
5358 }
5359 }
5360 } else {
5361 val = ent->content;
5362 if (val == NULL) return;
5363 /*
5364 * inline the entity.
5365 */
5366 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5367 (!ctxt->disableSAX))
5368 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5369 }
5370 }
5371}
5372
5373/**
5374 * xmlParseEntityRef:
5375 * @ctxt: an XML parser context
5376 *
5377 * parse ENTITY references declarations
5378 *
5379 * [68] EntityRef ::= '&' Name ';'
5380 *
5381 * [ WFC: Entity Declared ]
5382 * In a document without any DTD, a document with only an internal DTD
5383 * subset which contains no parameter entity references, or a document
5384 * with "standalone='yes'", the Name given in the entity reference
5385 * must match that in an entity declaration, except that well-formed
5386 * documents need not declare any of the following entities: amp, lt,
5387 * gt, apos, quot. The declaration of a parameter entity must precede
5388 * any reference to it. Similarly, the declaration of a general entity
5389 * must precede any reference to it which appears in a default value in an
5390 * attribute-list declaration. Note that if entities are declared in the
5391 * external subset or in external parameter entities, a non-validating
5392 * processor is not obligated to read and process their declarations;
5393 * for such documents, the rule that an entity must be declared is a
5394 * well-formedness constraint only if standalone='yes'.
5395 *
5396 * [ WFC: Parsed Entity ]
5397 * An entity reference must not contain the name of an unparsed entity
5398 *
5399 * Returns the xmlEntityPtr if found, or NULL otherwise.
5400 */
5401xmlEntityPtr
5402xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5403 xmlChar *name;
5404 xmlEntityPtr ent = NULL;
5405
5406 GROW;
5407
5408 if (RAW == '&') {
5409 NEXT;
5410 name = xmlParseName(ctxt);
5411 if (name == NULL) {
5412 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5414 ctxt->sax->error(ctxt->userData,
5415 "xmlParseEntityRef: no name\n");
5416 ctxt->wellFormed = 0;
5417 ctxt->disableSAX = 1;
5418 } else {
5419 if (RAW == ';') {
5420 NEXT;
5421 /*
5422 * Ask first SAX for entity resolution, otherwise try the
5423 * predefined set.
5424 */
5425 if (ctxt->sax != NULL) {
5426 if (ctxt->sax->getEntity != NULL)
5427 ent = ctxt->sax->getEntity(ctxt->userData, name);
5428 if (ent == NULL)
5429 ent = xmlGetPredefinedEntity(name);
5430 }
5431 /*
5432 * [ WFC: Entity Declared ]
5433 * In a document without any DTD, a document with only an
5434 * internal DTD subset which contains no parameter entity
5435 * references, or a document with "standalone='yes'", the
5436 * Name given in the entity reference must match that in an
5437 * entity declaration, except that well-formed documents
5438 * need not declare any of the following entities: amp, lt,
5439 * gt, apos, quot.
5440 * The declaration of a parameter entity must precede any
5441 * reference to it.
5442 * Similarly, the declaration of a general entity must
5443 * precede any reference to it which appears in a default
5444 * value in an attribute-list declaration. Note that if
5445 * entities are declared in the external subset or in
5446 * external parameter entities, a non-validating processor
5447 * is not obligated to read and process their declarations;
5448 * for such documents, the rule that an entity must be
5449 * declared is a well-formedness constraint only if
5450 * standalone='yes'.
5451 */
5452 if (ent == NULL) {
5453 if ((ctxt->standalone == 1) ||
5454 ((ctxt->hasExternalSubset == 0) &&
5455 (ctxt->hasPErefs == 0))) {
5456 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5457 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5458 ctxt->sax->error(ctxt->userData,
5459 "Entity '%s' not defined\n", name);
5460 ctxt->wellFormed = 0;
5461 ctxt->disableSAX = 1;
5462 } else {
5463 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005464 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005465 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005466 "Entity '%s' not defined\n", name);
5467 }
5468 }
5469
5470 /*
5471 * [ WFC: Parsed Entity ]
5472 * An entity reference must not contain the name of an
5473 * unparsed entity
5474 */
5475 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5476 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5478 ctxt->sax->error(ctxt->userData,
5479 "Entity reference to unparsed entity %s\n", name);
5480 ctxt->wellFormed = 0;
5481 ctxt->disableSAX = 1;
5482 }
5483
5484 /*
5485 * [ WFC: No External Entity References ]
5486 * Attribute values cannot contain direct or indirect
5487 * entity references to external entities.
5488 */
5489 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5490 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5491 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5493 ctxt->sax->error(ctxt->userData,
5494 "Attribute references external entity '%s'\n", name);
5495 ctxt->wellFormed = 0;
5496 ctxt->disableSAX = 1;
5497 }
5498 /*
5499 * [ WFC: No < in Attribute Values ]
5500 * The replacement text of any entity referred to directly or
5501 * indirectly in an attribute value (other than "&lt;") must
5502 * not contain a <.
5503 */
5504 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5505 (ent != NULL) &&
5506 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5507 (ent->content != NULL) &&
5508 (xmlStrchr(ent->content, '<'))) {
5509 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5511 ctxt->sax->error(ctxt->userData,
5512 "'<' in entity '%s' is not allowed in attributes values\n", name);
5513 ctxt->wellFormed = 0;
5514 ctxt->disableSAX = 1;
5515 }
5516
5517 /*
5518 * Internal check, no parameter entities here ...
5519 */
5520 else {
5521 switch (ent->etype) {
5522 case XML_INTERNAL_PARAMETER_ENTITY:
5523 case XML_EXTERNAL_PARAMETER_ENTITY:
5524 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5526 ctxt->sax->error(ctxt->userData,
5527 "Attempt to reference the parameter entity '%s'\n", name);
5528 ctxt->wellFormed = 0;
5529 ctxt->disableSAX = 1;
5530 break;
5531 default:
5532 break;
5533 }
5534 }
5535
5536 /*
5537 * [ WFC: No Recursion ]
5538 * A parsed entity must not contain a recursive reference
5539 * to itself, either directly or indirectly.
5540 * Done somewhere else
5541 */
5542
5543 } else {
5544 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5546 ctxt->sax->error(ctxt->userData,
5547 "xmlParseEntityRef: expecting ';'\n");
5548 ctxt->wellFormed = 0;
5549 ctxt->disableSAX = 1;
5550 }
5551 xmlFree(name);
5552 }
5553 }
5554 return(ent);
5555}
5556
5557/**
5558 * xmlParseStringEntityRef:
5559 * @ctxt: an XML parser context
5560 * @str: a pointer to an index in the string
5561 *
5562 * parse ENTITY references declarations, but this version parses it from
5563 * a string value.
5564 *
5565 * [68] EntityRef ::= '&' Name ';'
5566 *
5567 * [ WFC: Entity Declared ]
5568 * In a document without any DTD, a document with only an internal DTD
5569 * subset which contains no parameter entity references, or a document
5570 * with "standalone='yes'", the Name given in the entity reference
5571 * must match that in an entity declaration, except that well-formed
5572 * documents need not declare any of the following entities: amp, lt,
5573 * gt, apos, quot. The declaration of a parameter entity must precede
5574 * any reference to it. Similarly, the declaration of a general entity
5575 * must precede any reference to it which appears in a default value in an
5576 * attribute-list declaration. Note that if entities are declared in the
5577 * external subset or in external parameter entities, a non-validating
5578 * processor is not obligated to read and process their declarations;
5579 * for such documents, the rule that an entity must be declared is a
5580 * well-formedness constraint only if standalone='yes'.
5581 *
5582 * [ WFC: Parsed Entity ]
5583 * An entity reference must not contain the name of an unparsed entity
5584 *
5585 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5586 * is updated to the current location in the string.
5587 */
5588xmlEntityPtr
5589xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5590 xmlChar *name;
5591 const xmlChar *ptr;
5592 xmlChar cur;
5593 xmlEntityPtr ent = NULL;
5594
5595 if ((str == NULL) || (*str == NULL))
5596 return(NULL);
5597 ptr = *str;
5598 cur = *ptr;
5599 if (cur == '&') {
5600 ptr++;
5601 cur = *ptr;
5602 name = xmlParseStringName(ctxt, &ptr);
5603 if (name == NULL) {
5604 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5606 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005607 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005608 ctxt->wellFormed = 0;
5609 ctxt->disableSAX = 1;
5610 } else {
5611 if (*ptr == ';') {
5612 ptr++;
5613 /*
5614 * Ask first SAX for entity resolution, otherwise try the
5615 * predefined set.
5616 */
5617 if (ctxt->sax != NULL) {
5618 if (ctxt->sax->getEntity != NULL)
5619 ent = ctxt->sax->getEntity(ctxt->userData, name);
5620 if (ent == NULL)
5621 ent = xmlGetPredefinedEntity(name);
5622 }
5623 /*
5624 * [ WFC: Entity Declared ]
5625 * In a document without any DTD, a document with only an
5626 * internal DTD subset which contains no parameter entity
5627 * references, or a document with "standalone='yes'", the
5628 * Name given in the entity reference must match that in an
5629 * entity declaration, except that well-formed documents
5630 * need not declare any of the following entities: amp, lt,
5631 * gt, apos, quot.
5632 * The declaration of a parameter entity must precede any
5633 * reference to it.
5634 * Similarly, the declaration of a general entity must
5635 * precede any reference to it which appears in a default
5636 * value in an attribute-list declaration. Note that if
5637 * entities are declared in the external subset or in
5638 * external parameter entities, a non-validating processor
5639 * is not obligated to read and process their declarations;
5640 * for such documents, the rule that an entity must be
5641 * declared is a well-formedness constraint only if
5642 * standalone='yes'.
5643 */
5644 if (ent == NULL) {
5645 if ((ctxt->standalone == 1) ||
5646 ((ctxt->hasExternalSubset == 0) &&
5647 (ctxt->hasPErefs == 0))) {
5648 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5650 ctxt->sax->error(ctxt->userData,
5651 "Entity '%s' not defined\n", name);
5652 ctxt->wellFormed = 0;
5653 ctxt->disableSAX = 1;
5654 } else {
5655 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5656 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5657 ctxt->sax->warning(ctxt->userData,
5658 "Entity '%s' not defined\n", name);
5659 }
5660 }
5661
5662 /*
5663 * [ WFC: Parsed Entity ]
5664 * An entity reference must not contain the name of an
5665 * unparsed entity
5666 */
5667 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5668 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5670 ctxt->sax->error(ctxt->userData,
5671 "Entity reference to unparsed entity %s\n", name);
5672 ctxt->wellFormed = 0;
5673 ctxt->disableSAX = 1;
5674 }
5675
5676 /*
5677 * [ WFC: No External Entity References ]
5678 * Attribute values cannot contain direct or indirect
5679 * entity references to external entities.
5680 */
5681 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5682 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5683 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5684 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5685 ctxt->sax->error(ctxt->userData,
5686 "Attribute references external entity '%s'\n", name);
5687 ctxt->wellFormed = 0;
5688 ctxt->disableSAX = 1;
5689 }
5690 /*
5691 * [ WFC: No < in Attribute Values ]
5692 * The replacement text of any entity referred to directly or
5693 * indirectly in an attribute value (other than "&lt;") must
5694 * not contain a <.
5695 */
5696 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5697 (ent != NULL) &&
5698 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5699 (ent->content != NULL) &&
5700 (xmlStrchr(ent->content, '<'))) {
5701 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5703 ctxt->sax->error(ctxt->userData,
5704 "'<' in entity '%s' is not allowed in attributes values\n", name);
5705 ctxt->wellFormed = 0;
5706 ctxt->disableSAX = 1;
5707 }
5708
5709 /*
5710 * Internal check, no parameter entities here ...
5711 */
5712 else {
5713 switch (ent->etype) {
5714 case XML_INTERNAL_PARAMETER_ENTITY:
5715 case XML_EXTERNAL_PARAMETER_ENTITY:
5716 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5717 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5718 ctxt->sax->error(ctxt->userData,
5719 "Attempt to reference the parameter entity '%s'\n", name);
5720 ctxt->wellFormed = 0;
5721 ctxt->disableSAX = 1;
5722 break;
5723 default:
5724 break;
5725 }
5726 }
5727
5728 /*
5729 * [ WFC: No Recursion ]
5730 * A parsed entity must not contain a recursive reference
5731 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005732 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005733 */
5734
5735 } else {
5736 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5738 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005739 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005740 ctxt->wellFormed = 0;
5741 ctxt->disableSAX = 1;
5742 }
5743 xmlFree(name);
5744 }
5745 }
5746 *str = ptr;
5747 return(ent);
5748}
5749
5750/**
5751 * xmlParsePEReference:
5752 * @ctxt: an XML parser context
5753 *
5754 * parse PEReference declarations
5755 * The entity content is handled directly by pushing it's content as
5756 * a new input stream.
5757 *
5758 * [69] PEReference ::= '%' Name ';'
5759 *
5760 * [ WFC: No Recursion ]
5761 * A parsed entity must not contain a recursive
5762 * reference to itself, either directly or indirectly.
5763 *
5764 * [ WFC: Entity Declared ]
5765 * In a document without any DTD, a document with only an internal DTD
5766 * subset which contains no parameter entity references, or a document
5767 * with "standalone='yes'", ... ... The declaration of a parameter
5768 * entity must precede any reference to it...
5769 *
5770 * [ VC: Entity Declared ]
5771 * In a document with an external subset or external parameter entities
5772 * with "standalone='no'", ... ... The declaration of a parameter entity
5773 * must precede any reference to it...
5774 *
5775 * [ WFC: In DTD ]
5776 * Parameter-entity references may only appear in the DTD.
5777 * NOTE: misleading but this is handled.
5778 */
5779void
5780xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5781 xmlChar *name;
5782 xmlEntityPtr entity = NULL;
5783 xmlParserInputPtr input;
5784
5785 if (RAW == '%') {
5786 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005787 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005788 if (name == NULL) {
5789 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5791 ctxt->sax->error(ctxt->userData,
5792 "xmlParsePEReference: no name\n");
5793 ctxt->wellFormed = 0;
5794 ctxt->disableSAX = 1;
5795 } else {
5796 if (RAW == ';') {
5797 NEXT;
5798 if ((ctxt->sax != NULL) &&
5799 (ctxt->sax->getParameterEntity != NULL))
5800 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5801 name);
5802 if (entity == NULL) {
5803 /*
5804 * [ WFC: Entity Declared ]
5805 * In a document without any DTD, a document with only an
5806 * internal DTD subset which contains no parameter entity
5807 * references, or a document with "standalone='yes'", ...
5808 * ... The declaration of a parameter entity must precede
5809 * any reference to it...
5810 */
5811 if ((ctxt->standalone == 1) ||
5812 ((ctxt->hasExternalSubset == 0) &&
5813 (ctxt->hasPErefs == 0))) {
5814 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5815 if ((!ctxt->disableSAX) &&
5816 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5817 ctxt->sax->error(ctxt->userData,
5818 "PEReference: %%%s; not found\n", name);
5819 ctxt->wellFormed = 0;
5820 ctxt->disableSAX = 1;
5821 } else {
5822 /*
5823 * [ VC: Entity Declared ]
5824 * In a document with an external subset or external
5825 * parameter entities with "standalone='no'", ...
5826 * ... The declaration of a parameter entity must precede
5827 * any reference to it...
5828 */
5829 if ((!ctxt->disableSAX) &&
5830 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5831 ctxt->sax->warning(ctxt->userData,
5832 "PEReference: %%%s; not found\n", name);
5833 ctxt->valid = 0;
5834 }
5835 } else {
5836 /*
5837 * Internal checking in case the entity quest barfed
5838 */
5839 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5840 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5841 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5842 ctxt->sax->warning(ctxt->userData,
5843 "Internal: %%%s; is not a parameter entity\n", name);
5844 } else {
5845 /*
5846 * TODO !!!
5847 * handle the extra spaces added before and after
5848 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5849 */
5850 input = xmlNewEntityInputStream(ctxt, entity);
5851 xmlPushInput(ctxt, input);
5852 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5853 (RAW == '<') && (NXT(1) == '?') &&
5854 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5855 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5856 xmlParseTextDecl(ctxt);
5857 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5858 /*
5859 * The XML REC instructs us to stop parsing
5860 * right here
5861 */
5862 ctxt->instate = XML_PARSER_EOF;
5863 xmlFree(name);
5864 return;
5865 }
5866 }
5867 if (ctxt->token == 0)
5868 ctxt->token = ' ';
5869 }
5870 }
5871 ctxt->hasPErefs = 1;
5872 } else {
5873 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5875 ctxt->sax->error(ctxt->userData,
5876 "xmlParsePEReference: expecting ';'\n");
5877 ctxt->wellFormed = 0;
5878 ctxt->disableSAX = 1;
5879 }
5880 xmlFree(name);
5881 }
5882 }
5883}
5884
5885/**
5886 * xmlParseStringPEReference:
5887 * @ctxt: an XML parser context
5888 * @str: a pointer to an index in the string
5889 *
5890 * parse PEReference declarations
5891 *
5892 * [69] PEReference ::= '%' Name ';'
5893 *
5894 * [ WFC: No Recursion ]
5895 * A parsed entity must not contain a recursive
5896 * reference to itself, either directly or indirectly.
5897 *
5898 * [ WFC: Entity Declared ]
5899 * In a document without any DTD, a document with only an internal DTD
5900 * subset which contains no parameter entity references, or a document
5901 * with "standalone='yes'", ... ... The declaration of a parameter
5902 * entity must precede any reference to it...
5903 *
5904 * [ VC: Entity Declared ]
5905 * In a document with an external subset or external parameter entities
5906 * with "standalone='no'", ... ... The declaration of a parameter entity
5907 * must precede any reference to it...
5908 *
5909 * [ WFC: In DTD ]
5910 * Parameter-entity references may only appear in the DTD.
5911 * NOTE: misleading but this is handled.
5912 *
5913 * Returns the string of the entity content.
5914 * str is updated to the current value of the index
5915 */
5916xmlEntityPtr
5917xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5918 const xmlChar *ptr;
5919 xmlChar cur;
5920 xmlChar *name;
5921 xmlEntityPtr entity = NULL;
5922
5923 if ((str == NULL) || (*str == NULL)) return(NULL);
5924 ptr = *str;
5925 cur = *ptr;
5926 if (cur == '%') {
5927 ptr++;
5928 cur = *ptr;
5929 name = xmlParseStringName(ctxt, &ptr);
5930 if (name == NULL) {
5931 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5933 ctxt->sax->error(ctxt->userData,
5934 "xmlParseStringPEReference: no name\n");
5935 ctxt->wellFormed = 0;
5936 ctxt->disableSAX = 1;
5937 } else {
5938 cur = *ptr;
5939 if (cur == ';') {
5940 ptr++;
5941 cur = *ptr;
5942 if ((ctxt->sax != NULL) &&
5943 (ctxt->sax->getParameterEntity != NULL))
5944 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5945 name);
5946 if (entity == NULL) {
5947 /*
5948 * [ WFC: Entity Declared ]
5949 * In a document without any DTD, a document with only an
5950 * internal DTD subset which contains no parameter entity
5951 * references, or a document with "standalone='yes'", ...
5952 * ... The declaration of a parameter entity must precede
5953 * any reference to it...
5954 */
5955 if ((ctxt->standalone == 1) ||
5956 ((ctxt->hasExternalSubset == 0) &&
5957 (ctxt->hasPErefs == 0))) {
5958 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5960 ctxt->sax->error(ctxt->userData,
5961 "PEReference: %%%s; not found\n", name);
5962 ctxt->wellFormed = 0;
5963 ctxt->disableSAX = 1;
5964 } else {
5965 /*
5966 * [ VC: Entity Declared ]
5967 * In a document with an external subset or external
5968 * parameter entities with "standalone='no'", ...
5969 * ... The declaration of a parameter entity must
5970 * precede any reference to it...
5971 */
5972 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5973 ctxt->sax->warning(ctxt->userData,
5974 "PEReference: %%%s; not found\n", name);
5975 ctxt->valid = 0;
5976 }
5977 } else {
5978 /*
5979 * Internal checking in case the entity quest barfed
5980 */
5981 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5982 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5983 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5984 ctxt->sax->warning(ctxt->userData,
5985 "Internal: %%%s; is not a parameter entity\n", name);
5986 }
5987 }
5988 ctxt->hasPErefs = 1;
5989 } else {
5990 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5992 ctxt->sax->error(ctxt->userData,
5993 "xmlParseStringPEReference: expecting ';'\n");
5994 ctxt->wellFormed = 0;
5995 ctxt->disableSAX = 1;
5996 }
5997 xmlFree(name);
5998 }
5999 }
6000 *str = ptr;
6001 return(entity);
6002}
6003
6004/**
6005 * xmlParseDocTypeDecl:
6006 * @ctxt: an XML parser context
6007 *
6008 * parse a DOCTYPE declaration
6009 *
6010 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6011 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6012 *
6013 * [ VC: Root Element Type ]
6014 * The Name in the document type declaration must match the element
6015 * type of the root element.
6016 */
6017
6018void
6019xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6020 xmlChar *name = NULL;
6021 xmlChar *ExternalID = NULL;
6022 xmlChar *URI = NULL;
6023
6024 /*
6025 * We know that '<!DOCTYPE' has been detected.
6026 */
6027 SKIP(9);
6028
6029 SKIP_BLANKS;
6030
6031 /*
6032 * Parse the DOCTYPE name.
6033 */
6034 name = xmlParseName(ctxt);
6035 if (name == NULL) {
6036 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6038 ctxt->sax->error(ctxt->userData,
6039 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6040 ctxt->wellFormed = 0;
6041 ctxt->disableSAX = 1;
6042 }
6043 ctxt->intSubName = name;
6044
6045 SKIP_BLANKS;
6046
6047 /*
6048 * Check for SystemID and ExternalID
6049 */
6050 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6051
6052 if ((URI != NULL) || (ExternalID != NULL)) {
6053 ctxt->hasExternalSubset = 1;
6054 }
6055 ctxt->extSubURI = URI;
6056 ctxt->extSubSystem = ExternalID;
6057
6058 SKIP_BLANKS;
6059
6060 /*
6061 * Create and update the internal subset.
6062 */
6063 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6064 (!ctxt->disableSAX))
6065 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6066
6067 /*
6068 * Is there any internal subset declarations ?
6069 * they are handled separately in xmlParseInternalSubset()
6070 */
6071 if (RAW == '[')
6072 return;
6073
6074 /*
6075 * We should be at the end of the DOCTYPE declaration.
6076 */
6077 if (RAW != '>') {
6078 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006080 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006081 ctxt->wellFormed = 0;
6082 ctxt->disableSAX = 1;
6083 }
6084 NEXT;
6085}
6086
6087/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006088 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006089 * @ctxt: an XML parser context
6090 *
6091 * parse the internal subset declaration
6092 *
6093 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6094 */
6095
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006096static void
Owen Taylor3473f882001-02-23 17:55:21 +00006097xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6098 /*
6099 * Is there any DTD definition ?
6100 */
6101 if (RAW == '[') {
6102 ctxt->instate = XML_PARSER_DTD;
6103 NEXT;
6104 /*
6105 * Parse the succession of Markup declarations and
6106 * PEReferences.
6107 * Subsequence (markupdecl | PEReference | S)*
6108 */
6109 while (RAW != ']') {
6110 const xmlChar *check = CUR_PTR;
6111 int cons = ctxt->input->consumed;
6112
6113 SKIP_BLANKS;
6114 xmlParseMarkupDecl(ctxt);
6115 xmlParsePEReference(ctxt);
6116
6117 /*
6118 * Pop-up of finished entities.
6119 */
6120 while ((RAW == 0) && (ctxt->inputNr > 1))
6121 xmlPopInput(ctxt);
6122
6123 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6124 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6126 ctxt->sax->error(ctxt->userData,
6127 "xmlParseInternalSubset: error detected in Markup declaration\n");
6128 ctxt->wellFormed = 0;
6129 ctxt->disableSAX = 1;
6130 break;
6131 }
6132 }
6133 if (RAW == ']') {
6134 NEXT;
6135 SKIP_BLANKS;
6136 }
6137 }
6138
6139 /*
6140 * We should be at the end of the DOCTYPE declaration.
6141 */
6142 if (RAW != '>') {
6143 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006145 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006146 ctxt->wellFormed = 0;
6147 ctxt->disableSAX = 1;
6148 }
6149 NEXT;
6150}
6151
6152/**
6153 * xmlParseAttribute:
6154 * @ctxt: an XML parser context
6155 * @value: a xmlChar ** used to store the value of the attribute
6156 *
6157 * parse an attribute
6158 *
6159 * [41] Attribute ::= Name Eq AttValue
6160 *
6161 * [ WFC: No External Entity References ]
6162 * Attribute values cannot contain direct or indirect entity references
6163 * to external entities.
6164 *
6165 * [ WFC: No < in Attribute Values ]
6166 * The replacement text of any entity referred to directly or indirectly in
6167 * an attribute value (other than "&lt;") must not contain a <.
6168 *
6169 * [ VC: Attribute Value Type ]
6170 * The attribute must have been declared; the value must be of the type
6171 * declared for it.
6172 *
6173 * [25] Eq ::= S? '=' S?
6174 *
6175 * With namespace:
6176 *
6177 * [NS 11] Attribute ::= QName Eq AttValue
6178 *
6179 * Also the case QName == xmlns:??? is handled independently as a namespace
6180 * definition.
6181 *
6182 * Returns the attribute name, and the value in *value.
6183 */
6184
6185xmlChar *
6186xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6187 xmlChar *name, *val;
6188
6189 *value = NULL;
6190 name = xmlParseName(ctxt);
6191 if (name == NULL) {
6192 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6194 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6195 ctxt->wellFormed = 0;
6196 ctxt->disableSAX = 1;
6197 return(NULL);
6198 }
6199
6200 /*
6201 * read the value
6202 */
6203 SKIP_BLANKS;
6204 if (RAW == '=') {
6205 NEXT;
6206 SKIP_BLANKS;
6207 val = xmlParseAttValue(ctxt);
6208 ctxt->instate = XML_PARSER_CONTENT;
6209 } else {
6210 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6212 ctxt->sax->error(ctxt->userData,
6213 "Specification mandate value for attribute %s\n", name);
6214 ctxt->wellFormed = 0;
6215 ctxt->disableSAX = 1;
6216 xmlFree(name);
6217 return(NULL);
6218 }
6219
6220 /*
6221 * Check that xml:lang conforms to the specification
6222 * No more registered as an error, just generate a warning now
6223 * since this was deprecated in XML second edition
6224 */
6225 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6226 if (!xmlCheckLanguageID(val)) {
6227 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6228 ctxt->sax->warning(ctxt->userData,
6229 "Malformed value for xml:lang : %s\n", val);
6230 }
6231 }
6232
6233 /*
6234 * Check that xml:space conforms to the specification
6235 */
6236 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6237 if (xmlStrEqual(val, BAD_CAST "default"))
6238 *(ctxt->space) = 0;
6239 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6240 *(ctxt->space) = 1;
6241 else {
6242 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6244 ctxt->sax->error(ctxt->userData,
6245"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6246 val);
6247 ctxt->wellFormed = 0;
6248 ctxt->disableSAX = 1;
6249 }
6250 }
6251
6252 *value = val;
6253 return(name);
6254}
6255
6256/**
6257 * xmlParseStartTag:
6258 * @ctxt: an XML parser context
6259 *
6260 * parse a start of tag either for rule element or
6261 * EmptyElement. In both case we don't parse the tag closing chars.
6262 *
6263 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6264 *
6265 * [ WFC: Unique Att Spec ]
6266 * No attribute name may appear more than once in the same start-tag or
6267 * empty-element tag.
6268 *
6269 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6270 *
6271 * [ WFC: Unique Att Spec ]
6272 * No attribute name may appear more than once in the same start-tag or
6273 * empty-element tag.
6274 *
6275 * With namespace:
6276 *
6277 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6278 *
6279 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6280 *
6281 * Returns the element name parsed
6282 */
6283
6284xmlChar *
6285xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6286 xmlChar *name;
6287 xmlChar *attname;
6288 xmlChar *attvalue;
6289 const xmlChar **atts = NULL;
6290 int nbatts = 0;
6291 int maxatts = 0;
6292 int i;
6293
6294 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006295 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006296
6297 name = xmlParseName(ctxt);
6298 if (name == NULL) {
6299 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6301 ctxt->sax->error(ctxt->userData,
6302 "xmlParseStartTag: invalid element name\n");
6303 ctxt->wellFormed = 0;
6304 ctxt->disableSAX = 1;
6305 return(NULL);
6306 }
6307
6308 /*
6309 * Now parse the attributes, it ends up with the ending
6310 *
6311 * (S Attribute)* S?
6312 */
6313 SKIP_BLANKS;
6314 GROW;
6315
Daniel Veillard21a0f912001-02-25 19:54:14 +00006316 while ((RAW != '>') &&
6317 ((RAW != '/') || (NXT(1) != '>')) &&
6318 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006319 const xmlChar *q = CUR_PTR;
6320 int cons = ctxt->input->consumed;
6321
6322 attname = xmlParseAttribute(ctxt, &attvalue);
6323 if ((attname != NULL) && (attvalue != NULL)) {
6324 /*
6325 * [ WFC: Unique Att Spec ]
6326 * No attribute name may appear more than once in the same
6327 * start-tag or empty-element tag.
6328 */
6329 for (i = 0; i < nbatts;i += 2) {
6330 if (xmlStrEqual(atts[i], attname)) {
6331 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6333 ctxt->sax->error(ctxt->userData,
6334 "Attribute %s redefined\n",
6335 attname);
6336 ctxt->wellFormed = 0;
6337 ctxt->disableSAX = 1;
6338 xmlFree(attname);
6339 xmlFree(attvalue);
6340 goto failed;
6341 }
6342 }
6343
6344 /*
6345 * Add the pair to atts
6346 */
6347 if (atts == NULL) {
6348 maxatts = 10;
6349 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6350 if (atts == NULL) {
6351 xmlGenericError(xmlGenericErrorContext,
6352 "malloc of %ld byte failed\n",
6353 maxatts * (long)sizeof(xmlChar *));
6354 return(NULL);
6355 }
6356 } else if (nbatts + 4 > maxatts) {
6357 maxatts *= 2;
6358 atts = (const xmlChar **) xmlRealloc((void *) atts,
6359 maxatts * sizeof(xmlChar *));
6360 if (atts == NULL) {
6361 xmlGenericError(xmlGenericErrorContext,
6362 "realloc of %ld byte failed\n",
6363 maxatts * (long)sizeof(xmlChar *));
6364 return(NULL);
6365 }
6366 }
6367 atts[nbatts++] = attname;
6368 atts[nbatts++] = attvalue;
6369 atts[nbatts] = NULL;
6370 atts[nbatts + 1] = NULL;
6371 } else {
6372 if (attname != NULL)
6373 xmlFree(attname);
6374 if (attvalue != NULL)
6375 xmlFree(attvalue);
6376 }
6377
6378failed:
6379
6380 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6381 break;
6382 if (!IS_BLANK(RAW)) {
6383 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6385 ctxt->sax->error(ctxt->userData,
6386 "attributes construct error\n");
6387 ctxt->wellFormed = 0;
6388 ctxt->disableSAX = 1;
6389 }
6390 SKIP_BLANKS;
6391 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6392 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6394 ctxt->sax->error(ctxt->userData,
6395 "xmlParseStartTag: problem parsing attributes\n");
6396 ctxt->wellFormed = 0;
6397 ctxt->disableSAX = 1;
6398 break;
6399 }
6400 GROW;
6401 }
6402
6403 /*
6404 * SAX: Start of Element !
6405 */
6406 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6407 (!ctxt->disableSAX))
6408 ctxt->sax->startElement(ctxt->userData, name, atts);
6409
6410 if (atts != NULL) {
6411 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6412 xmlFree((void *) atts);
6413 }
6414 return(name);
6415}
6416
6417/**
6418 * xmlParseEndTag:
6419 * @ctxt: an XML parser context
6420 *
6421 * parse an end of tag
6422 *
6423 * [42] ETag ::= '</' Name S? '>'
6424 *
6425 * With namespace
6426 *
6427 * [NS 9] ETag ::= '</' QName S? '>'
6428 */
6429
6430void
6431xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6432 xmlChar *name;
6433 xmlChar *oldname;
6434
6435 GROW;
6436 if ((RAW != '<') || (NXT(1) != '/')) {
6437 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6438 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6439 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6440 ctxt->wellFormed = 0;
6441 ctxt->disableSAX = 1;
6442 return;
6443 }
6444 SKIP(2);
6445
6446 name = xmlParseName(ctxt);
6447
6448 /*
6449 * We should definitely be at the ending "S? '>'" part
6450 */
6451 GROW;
6452 SKIP_BLANKS;
6453 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6454 ctxt->errNo = XML_ERR_GT_REQUIRED;
6455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6456 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6457 ctxt->wellFormed = 0;
6458 ctxt->disableSAX = 1;
6459 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006460 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006461
6462 /*
6463 * [ WFC: Element Type Match ]
6464 * The Name in an element's end-tag must match the element type in the
6465 * start-tag.
6466 *
6467 */
6468 if ((name == NULL) || (ctxt->name == NULL) ||
6469 (!xmlStrEqual(name, ctxt->name))) {
6470 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6472 if ((name != NULL) && (ctxt->name != NULL)) {
6473 ctxt->sax->error(ctxt->userData,
6474 "Opening and ending tag mismatch: %s and %s\n",
6475 ctxt->name, name);
6476 } else if (ctxt->name != NULL) {
6477 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006478 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006479 } else {
6480 ctxt->sax->error(ctxt->userData,
6481 "Ending tag error: internal error ???\n");
6482 }
6483
6484 }
6485 ctxt->wellFormed = 0;
6486 ctxt->disableSAX = 1;
6487 }
6488
6489 /*
6490 * SAX: End of Tag
6491 */
6492 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6493 (!ctxt->disableSAX))
6494 ctxt->sax->endElement(ctxt->userData, name);
6495
6496 if (name != NULL)
6497 xmlFree(name);
6498 oldname = namePop(ctxt);
6499 spacePop(ctxt);
6500 if (oldname != NULL) {
6501#ifdef DEBUG_STACK
6502 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6503#endif
6504 xmlFree(oldname);
6505 }
6506 return;
6507}
6508
6509/**
6510 * xmlParseCDSect:
6511 * @ctxt: an XML parser context
6512 *
6513 * Parse escaped pure raw content.
6514 *
6515 * [18] CDSect ::= CDStart CData CDEnd
6516 *
6517 * [19] CDStart ::= '<![CDATA['
6518 *
6519 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6520 *
6521 * [21] CDEnd ::= ']]>'
6522 */
6523void
6524xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6525 xmlChar *buf = NULL;
6526 int len = 0;
6527 int size = XML_PARSER_BUFFER_SIZE;
6528 int r, rl;
6529 int s, sl;
6530 int cur, l;
6531 int count = 0;
6532
6533 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6534 (NXT(2) == '[') && (NXT(3) == 'C') &&
6535 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6536 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6537 (NXT(8) == '[')) {
6538 SKIP(9);
6539 } else
6540 return;
6541
6542 ctxt->instate = XML_PARSER_CDATA_SECTION;
6543 r = CUR_CHAR(rl);
6544 if (!IS_CHAR(r)) {
6545 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6547 ctxt->sax->error(ctxt->userData,
6548 "CData section not finished\n");
6549 ctxt->wellFormed = 0;
6550 ctxt->disableSAX = 1;
6551 ctxt->instate = XML_PARSER_CONTENT;
6552 return;
6553 }
6554 NEXTL(rl);
6555 s = CUR_CHAR(sl);
6556 if (!IS_CHAR(s)) {
6557 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6559 ctxt->sax->error(ctxt->userData,
6560 "CData section not finished\n");
6561 ctxt->wellFormed = 0;
6562 ctxt->disableSAX = 1;
6563 ctxt->instate = XML_PARSER_CONTENT;
6564 return;
6565 }
6566 NEXTL(sl);
6567 cur = CUR_CHAR(l);
6568 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6569 if (buf == NULL) {
6570 xmlGenericError(xmlGenericErrorContext,
6571 "malloc of %d byte failed\n", size);
6572 return;
6573 }
6574 while (IS_CHAR(cur) &&
6575 ((r != ']') || (s != ']') || (cur != '>'))) {
6576 if (len + 5 >= size) {
6577 size *= 2;
6578 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6579 if (buf == NULL) {
6580 xmlGenericError(xmlGenericErrorContext,
6581 "realloc of %d byte failed\n", size);
6582 return;
6583 }
6584 }
6585 COPY_BUF(rl,buf,len,r);
6586 r = s;
6587 rl = sl;
6588 s = cur;
6589 sl = l;
6590 count++;
6591 if (count > 50) {
6592 GROW;
6593 count = 0;
6594 }
6595 NEXTL(l);
6596 cur = CUR_CHAR(l);
6597 }
6598 buf[len] = 0;
6599 ctxt->instate = XML_PARSER_CONTENT;
6600 if (cur != '>') {
6601 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6603 ctxt->sax->error(ctxt->userData,
6604 "CData section not finished\n%.50s\n", buf);
6605 ctxt->wellFormed = 0;
6606 ctxt->disableSAX = 1;
6607 xmlFree(buf);
6608 return;
6609 }
6610 NEXTL(l);
6611
6612 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006613 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006614 */
6615 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6616 if (ctxt->sax->cdataBlock != NULL)
6617 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006618 else if (ctxt->sax->characters != NULL)
6619 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006620 }
6621 xmlFree(buf);
6622}
6623
6624/**
6625 * xmlParseContent:
6626 * @ctxt: an XML parser context
6627 *
6628 * Parse a content:
6629 *
6630 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6631 */
6632
6633void
6634xmlParseContent(xmlParserCtxtPtr ctxt) {
6635 GROW;
6636 while (((RAW != 0) || (ctxt->token != 0)) &&
6637 ((RAW != '<') || (NXT(1) != '/'))) {
6638 const xmlChar *test = CUR_PTR;
6639 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006640 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006641 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006642
6643 /*
6644 * Handle possible processed charrefs.
6645 */
6646 if (ctxt->token != 0) {
6647 xmlParseCharData(ctxt, 0);
6648 }
6649 /*
6650 * First case : a Processing Instruction.
6651 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006652 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006653 xmlParsePI(ctxt);
6654 }
6655
6656 /*
6657 * Second case : a CDSection
6658 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006659 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006660 (NXT(2) == '[') && (NXT(3) == 'C') &&
6661 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6662 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6663 (NXT(8) == '[')) {
6664 xmlParseCDSect(ctxt);
6665 }
6666
6667 /*
6668 * Third case : a comment
6669 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006670 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006671 (NXT(2) == '-') && (NXT(3) == '-')) {
6672 xmlParseComment(ctxt);
6673 ctxt->instate = XML_PARSER_CONTENT;
6674 }
6675
6676 /*
6677 * Fourth case : a sub-element.
6678 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006679 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006680 xmlParseElement(ctxt);
6681 }
6682
6683 /*
6684 * Fifth case : a reference. If if has not been resolved,
6685 * parsing returns it's Name, create the node
6686 */
6687
Daniel Veillard21a0f912001-02-25 19:54:14 +00006688 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006689 xmlParseReference(ctxt);
6690 }
6691
6692 /*
6693 * Last case, text. Note that References are handled directly.
6694 */
6695 else {
6696 xmlParseCharData(ctxt, 0);
6697 }
6698
6699 GROW;
6700 /*
6701 * Pop-up of finished entities.
6702 */
6703 while ((RAW == 0) && (ctxt->inputNr > 1))
6704 xmlPopInput(ctxt);
6705 SHRINK;
6706
6707 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6708 (tok == ctxt->token)) {
6709 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6711 ctxt->sax->error(ctxt->userData,
6712 "detected an error in element content\n");
6713 ctxt->wellFormed = 0;
6714 ctxt->disableSAX = 1;
6715 ctxt->instate = XML_PARSER_EOF;
6716 break;
6717 }
6718 }
6719}
6720
6721/**
6722 * xmlParseElement:
6723 * @ctxt: an XML parser context
6724 *
6725 * parse an XML element, this is highly recursive
6726 *
6727 * [39] element ::= EmptyElemTag | STag content ETag
6728 *
6729 * [ WFC: Element Type Match ]
6730 * The Name in an element's end-tag must match the element type in the
6731 * start-tag.
6732 *
6733 * [ VC: Element Valid ]
6734 * An element is valid if there is a declaration matching elementdecl
6735 * where the Name matches the element type and one of the following holds:
6736 * - The declaration matches EMPTY and the element has no content.
6737 * - The declaration matches children and the sequence of child elements
6738 * belongs to the language generated by the regular expression in the
6739 * content model, with optional white space (characters matching the
6740 * nonterminal S) between each pair of child elements.
6741 * - The declaration matches Mixed and the content consists of character
6742 * data and child elements whose types match names in the content model.
6743 * - The declaration matches ANY, and the types of any child elements have
6744 * been declared.
6745 */
6746
6747void
6748xmlParseElement(xmlParserCtxtPtr ctxt) {
6749 const xmlChar *openTag = CUR_PTR;
6750 xmlChar *name;
6751 xmlChar *oldname;
6752 xmlParserNodeInfo node_info;
6753 xmlNodePtr ret;
6754
6755 /* Capture start position */
6756 if (ctxt->record_info) {
6757 node_info.begin_pos = ctxt->input->consumed +
6758 (CUR_PTR - ctxt->input->base);
6759 node_info.begin_line = ctxt->input->line;
6760 }
6761
6762 if (ctxt->spaceNr == 0)
6763 spacePush(ctxt, -1);
6764 else
6765 spacePush(ctxt, *ctxt->space);
6766
6767 name = xmlParseStartTag(ctxt);
6768 if (name == NULL) {
6769 spacePop(ctxt);
6770 return;
6771 }
6772 namePush(ctxt, name);
6773 ret = ctxt->node;
6774
6775 /*
6776 * [ VC: Root Element Type ]
6777 * The Name in the document type declaration must match the element
6778 * type of the root element.
6779 */
6780 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6781 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6782 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6783
6784 /*
6785 * Check for an Empty Element.
6786 */
6787 if ((RAW == '/') && (NXT(1) == '>')) {
6788 SKIP(2);
6789 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6790 (!ctxt->disableSAX))
6791 ctxt->sax->endElement(ctxt->userData, name);
6792 oldname = namePop(ctxt);
6793 spacePop(ctxt);
6794 if (oldname != NULL) {
6795#ifdef DEBUG_STACK
6796 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6797#endif
6798 xmlFree(oldname);
6799 }
6800 if ( ret != NULL && ctxt->record_info ) {
6801 node_info.end_pos = ctxt->input->consumed +
6802 (CUR_PTR - ctxt->input->base);
6803 node_info.end_line = ctxt->input->line;
6804 node_info.node = ret;
6805 xmlParserAddNodeInfo(ctxt, &node_info);
6806 }
6807 return;
6808 }
6809 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006810 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006811 } else {
6812 ctxt->errNo = XML_ERR_GT_REQUIRED;
6813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6814 ctxt->sax->error(ctxt->userData,
6815 "Couldn't find end of Start Tag\n%.30s\n",
6816 openTag);
6817 ctxt->wellFormed = 0;
6818 ctxt->disableSAX = 1;
6819
6820 /*
6821 * end of parsing of this node.
6822 */
6823 nodePop(ctxt);
6824 oldname = namePop(ctxt);
6825 spacePop(ctxt);
6826 if (oldname != NULL) {
6827#ifdef DEBUG_STACK
6828 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6829#endif
6830 xmlFree(oldname);
6831 }
6832
6833 /*
6834 * Capture end position and add node
6835 */
6836 if ( ret != NULL && ctxt->record_info ) {
6837 node_info.end_pos = ctxt->input->consumed +
6838 (CUR_PTR - ctxt->input->base);
6839 node_info.end_line = ctxt->input->line;
6840 node_info.node = ret;
6841 xmlParserAddNodeInfo(ctxt, &node_info);
6842 }
6843 return;
6844 }
6845
6846 /*
6847 * Parse the content of the element:
6848 */
6849 xmlParseContent(ctxt);
6850 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00006851 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00006852 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6853 ctxt->sax->error(ctxt->userData,
6854 "Premature end of data in tag %.30s\n", openTag);
6855 ctxt->wellFormed = 0;
6856 ctxt->disableSAX = 1;
6857
6858 /*
6859 * end of parsing of this node.
6860 */
6861 nodePop(ctxt);
6862 oldname = namePop(ctxt);
6863 spacePop(ctxt);
6864 if (oldname != NULL) {
6865#ifdef DEBUG_STACK
6866 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6867#endif
6868 xmlFree(oldname);
6869 }
6870 return;
6871 }
6872
6873 /*
6874 * parse the end of tag: '</' should be here.
6875 */
6876 xmlParseEndTag(ctxt);
6877
6878 /*
6879 * Capture end position and add node
6880 */
6881 if ( ret != NULL && ctxt->record_info ) {
6882 node_info.end_pos = ctxt->input->consumed +
6883 (CUR_PTR - ctxt->input->base);
6884 node_info.end_line = ctxt->input->line;
6885 node_info.node = ret;
6886 xmlParserAddNodeInfo(ctxt, &node_info);
6887 }
6888}
6889
6890/**
6891 * xmlParseVersionNum:
6892 * @ctxt: an XML parser context
6893 *
6894 * parse the XML version value.
6895 *
6896 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6897 *
6898 * Returns the string giving the XML version number, or NULL
6899 */
6900xmlChar *
6901xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6902 xmlChar *buf = NULL;
6903 int len = 0;
6904 int size = 10;
6905 xmlChar cur;
6906
6907 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6908 if (buf == NULL) {
6909 xmlGenericError(xmlGenericErrorContext,
6910 "malloc of %d byte failed\n", size);
6911 return(NULL);
6912 }
6913 cur = CUR;
6914 while (((cur >= 'a') && (cur <= 'z')) ||
6915 ((cur >= 'A') && (cur <= 'Z')) ||
6916 ((cur >= '0') && (cur <= '9')) ||
6917 (cur == '_') || (cur == '.') ||
6918 (cur == ':') || (cur == '-')) {
6919 if (len + 1 >= size) {
6920 size *= 2;
6921 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6922 if (buf == NULL) {
6923 xmlGenericError(xmlGenericErrorContext,
6924 "realloc of %d byte failed\n", size);
6925 return(NULL);
6926 }
6927 }
6928 buf[len++] = cur;
6929 NEXT;
6930 cur=CUR;
6931 }
6932 buf[len] = 0;
6933 return(buf);
6934}
6935
6936/**
6937 * xmlParseVersionInfo:
6938 * @ctxt: an XML parser context
6939 *
6940 * parse the XML version.
6941 *
6942 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6943 *
6944 * [25] Eq ::= S? '=' S?
6945 *
6946 * Returns the version string, e.g. "1.0"
6947 */
6948
6949xmlChar *
6950xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6951 xmlChar *version = NULL;
6952 const xmlChar *q;
6953
6954 if ((RAW == 'v') && (NXT(1) == 'e') &&
6955 (NXT(2) == 'r') && (NXT(3) == 's') &&
6956 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6957 (NXT(6) == 'n')) {
6958 SKIP(7);
6959 SKIP_BLANKS;
6960 if (RAW != '=') {
6961 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6962 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6963 ctxt->sax->error(ctxt->userData,
6964 "xmlParseVersionInfo : expected '='\n");
6965 ctxt->wellFormed = 0;
6966 ctxt->disableSAX = 1;
6967 return(NULL);
6968 }
6969 NEXT;
6970 SKIP_BLANKS;
6971 if (RAW == '"') {
6972 NEXT;
6973 q = CUR_PTR;
6974 version = xmlParseVersionNum(ctxt);
6975 if (RAW != '"') {
6976 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6978 ctxt->sax->error(ctxt->userData,
6979 "String not closed\n%.50s\n", q);
6980 ctxt->wellFormed = 0;
6981 ctxt->disableSAX = 1;
6982 } else
6983 NEXT;
6984 } else if (RAW == '\''){
6985 NEXT;
6986 q = CUR_PTR;
6987 version = xmlParseVersionNum(ctxt);
6988 if (RAW != '\'') {
6989 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6991 ctxt->sax->error(ctxt->userData,
6992 "String not closed\n%.50s\n", q);
6993 ctxt->wellFormed = 0;
6994 ctxt->disableSAX = 1;
6995 } else
6996 NEXT;
6997 } else {
6998 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7000 ctxt->sax->error(ctxt->userData,
7001 "xmlParseVersionInfo : expected ' or \"\n");
7002 ctxt->wellFormed = 0;
7003 ctxt->disableSAX = 1;
7004 }
7005 }
7006 return(version);
7007}
7008
7009/**
7010 * xmlParseEncName:
7011 * @ctxt: an XML parser context
7012 *
7013 * parse the XML encoding name
7014 *
7015 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7016 *
7017 * Returns the encoding name value or NULL
7018 */
7019xmlChar *
7020xmlParseEncName(xmlParserCtxtPtr ctxt) {
7021 xmlChar *buf = NULL;
7022 int len = 0;
7023 int size = 10;
7024 xmlChar cur;
7025
7026 cur = CUR;
7027 if (((cur >= 'a') && (cur <= 'z')) ||
7028 ((cur >= 'A') && (cur <= 'Z'))) {
7029 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7030 if (buf == NULL) {
7031 xmlGenericError(xmlGenericErrorContext,
7032 "malloc of %d byte failed\n", size);
7033 return(NULL);
7034 }
7035
7036 buf[len++] = cur;
7037 NEXT;
7038 cur = CUR;
7039 while (((cur >= 'a') && (cur <= 'z')) ||
7040 ((cur >= 'A') && (cur <= 'Z')) ||
7041 ((cur >= '0') && (cur <= '9')) ||
7042 (cur == '.') || (cur == '_') ||
7043 (cur == '-')) {
7044 if (len + 1 >= size) {
7045 size *= 2;
7046 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7047 if (buf == NULL) {
7048 xmlGenericError(xmlGenericErrorContext,
7049 "realloc of %d byte failed\n", size);
7050 return(NULL);
7051 }
7052 }
7053 buf[len++] = cur;
7054 NEXT;
7055 cur = CUR;
7056 if (cur == 0) {
7057 SHRINK;
7058 GROW;
7059 cur = CUR;
7060 }
7061 }
7062 buf[len] = 0;
7063 } else {
7064 ctxt->errNo = XML_ERR_ENCODING_NAME;
7065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7066 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7067 ctxt->wellFormed = 0;
7068 ctxt->disableSAX = 1;
7069 }
7070 return(buf);
7071}
7072
7073/**
7074 * xmlParseEncodingDecl:
7075 * @ctxt: an XML parser context
7076 *
7077 * parse the XML encoding declaration
7078 *
7079 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7080 *
7081 * this setups the conversion filters.
7082 *
7083 * Returns the encoding value or NULL
7084 */
7085
7086xmlChar *
7087xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7088 xmlChar *encoding = NULL;
7089 const xmlChar *q;
7090
7091 SKIP_BLANKS;
7092 if ((RAW == 'e') && (NXT(1) == 'n') &&
7093 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7094 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7095 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7096 SKIP(8);
7097 SKIP_BLANKS;
7098 if (RAW != '=') {
7099 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7100 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7101 ctxt->sax->error(ctxt->userData,
7102 "xmlParseEncodingDecl : expected '='\n");
7103 ctxt->wellFormed = 0;
7104 ctxt->disableSAX = 1;
7105 return(NULL);
7106 }
7107 NEXT;
7108 SKIP_BLANKS;
7109 if (RAW == '"') {
7110 NEXT;
7111 q = CUR_PTR;
7112 encoding = xmlParseEncName(ctxt);
7113 if (RAW != '"') {
7114 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7115 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7116 ctxt->sax->error(ctxt->userData,
7117 "String not closed\n%.50s\n", q);
7118 ctxt->wellFormed = 0;
7119 ctxt->disableSAX = 1;
7120 } else
7121 NEXT;
7122 } else if (RAW == '\''){
7123 NEXT;
7124 q = CUR_PTR;
7125 encoding = xmlParseEncName(ctxt);
7126 if (RAW != '\'') {
7127 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7129 ctxt->sax->error(ctxt->userData,
7130 "String not closed\n%.50s\n", q);
7131 ctxt->wellFormed = 0;
7132 ctxt->disableSAX = 1;
7133 } else
7134 NEXT;
7135 } else if (RAW == '"'){
7136 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7138 ctxt->sax->error(ctxt->userData,
7139 "xmlParseEncodingDecl : expected ' or \"\n");
7140 ctxt->wellFormed = 0;
7141 ctxt->disableSAX = 1;
7142 }
7143 if (encoding != NULL) {
7144 xmlCharEncoding enc;
7145 xmlCharEncodingHandlerPtr handler;
7146
7147 if (ctxt->input->encoding != NULL)
7148 xmlFree((xmlChar *) ctxt->input->encoding);
7149 ctxt->input->encoding = encoding;
7150
7151 enc = xmlParseCharEncoding((const char *) encoding);
7152 /*
7153 * registered set of known encodings
7154 */
7155 if (enc != XML_CHAR_ENCODING_ERROR) {
7156 xmlSwitchEncoding(ctxt, enc);
7157 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7158 xmlFree(encoding);
7159 return(NULL);
7160 }
7161 } else {
7162 /*
7163 * fallback for unknown encodings
7164 */
7165 handler = xmlFindCharEncodingHandler((const char *) encoding);
7166 if (handler != NULL) {
7167 xmlSwitchToEncoding(ctxt, handler);
7168 } else {
7169 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7171 ctxt->sax->error(ctxt->userData,
7172 "Unsupported encoding %s\n", encoding);
7173 return(NULL);
7174 }
7175 }
7176 }
7177 }
7178 return(encoding);
7179}
7180
7181/**
7182 * xmlParseSDDecl:
7183 * @ctxt: an XML parser context
7184 *
7185 * parse the XML standalone declaration
7186 *
7187 * [32] SDDecl ::= S 'standalone' Eq
7188 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7189 *
7190 * [ VC: Standalone Document Declaration ]
7191 * TODO The standalone document declaration must have the value "no"
7192 * if any external markup declarations contain declarations of:
7193 * - attributes with default values, if elements to which these
7194 * attributes apply appear in the document without specifications
7195 * of values for these attributes, or
7196 * - entities (other than amp, lt, gt, apos, quot), if references
7197 * to those entities appear in the document, or
7198 * - attributes with values subject to normalization, where the
7199 * attribute appears in the document with a value which will change
7200 * as a result of normalization, or
7201 * - element types with element content, if white space occurs directly
7202 * within any instance of those types.
7203 *
7204 * Returns 1 if standalone, 0 otherwise
7205 */
7206
7207int
7208xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7209 int standalone = -1;
7210
7211 SKIP_BLANKS;
7212 if ((RAW == 's') && (NXT(1) == 't') &&
7213 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7214 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7215 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7216 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7217 SKIP(10);
7218 SKIP_BLANKS;
7219 if (RAW != '=') {
7220 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7221 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7222 ctxt->sax->error(ctxt->userData,
7223 "XML standalone declaration : expected '='\n");
7224 ctxt->wellFormed = 0;
7225 ctxt->disableSAX = 1;
7226 return(standalone);
7227 }
7228 NEXT;
7229 SKIP_BLANKS;
7230 if (RAW == '\''){
7231 NEXT;
7232 if ((RAW == 'n') && (NXT(1) == 'o')) {
7233 standalone = 0;
7234 SKIP(2);
7235 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7236 (NXT(2) == 's')) {
7237 standalone = 1;
7238 SKIP(3);
7239 } else {
7240 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7242 ctxt->sax->error(ctxt->userData,
7243 "standalone accepts only 'yes' or 'no'\n");
7244 ctxt->wellFormed = 0;
7245 ctxt->disableSAX = 1;
7246 }
7247 if (RAW != '\'') {
7248 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7250 ctxt->sax->error(ctxt->userData, "String not closed\n");
7251 ctxt->wellFormed = 0;
7252 ctxt->disableSAX = 1;
7253 } else
7254 NEXT;
7255 } else if (RAW == '"'){
7256 NEXT;
7257 if ((RAW == 'n') && (NXT(1) == 'o')) {
7258 standalone = 0;
7259 SKIP(2);
7260 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7261 (NXT(2) == 's')) {
7262 standalone = 1;
7263 SKIP(3);
7264 } else {
7265 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7267 ctxt->sax->error(ctxt->userData,
7268 "standalone accepts only 'yes' or 'no'\n");
7269 ctxt->wellFormed = 0;
7270 ctxt->disableSAX = 1;
7271 }
7272 if (RAW != '"') {
7273 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7275 ctxt->sax->error(ctxt->userData, "String not closed\n");
7276 ctxt->wellFormed = 0;
7277 ctxt->disableSAX = 1;
7278 } else
7279 NEXT;
7280 } else {
7281 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7282 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7283 ctxt->sax->error(ctxt->userData,
7284 "Standalone value not found\n");
7285 ctxt->wellFormed = 0;
7286 ctxt->disableSAX = 1;
7287 }
7288 }
7289 return(standalone);
7290}
7291
7292/**
7293 * xmlParseXMLDecl:
7294 * @ctxt: an XML parser context
7295 *
7296 * parse an XML declaration header
7297 *
7298 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7299 */
7300
7301void
7302xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7303 xmlChar *version;
7304
7305 /*
7306 * We know that '<?xml' is here.
7307 */
7308 SKIP(5);
7309
7310 if (!IS_BLANK(RAW)) {
7311 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7312 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7313 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7314 ctxt->wellFormed = 0;
7315 ctxt->disableSAX = 1;
7316 }
7317 SKIP_BLANKS;
7318
7319 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007320 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007321 */
7322 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007323 if (version == NULL) {
7324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7325 ctxt->sax->error(ctxt->userData,
7326 "Malformed declaration expecting version\n");
7327 ctxt->wellFormed = 0;
7328 ctxt->disableSAX = 1;
7329 } else {
7330 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7331 /*
7332 * TODO: Blueberry should be detected here
7333 */
7334 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7335 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7336 version);
7337 }
7338 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007339 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007340 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007341 }
Owen Taylor3473f882001-02-23 17:55:21 +00007342
7343 /*
7344 * We may have the encoding declaration
7345 */
7346 if (!IS_BLANK(RAW)) {
7347 if ((RAW == '?') && (NXT(1) == '>')) {
7348 SKIP(2);
7349 return;
7350 }
7351 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7353 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7354 ctxt->wellFormed = 0;
7355 ctxt->disableSAX = 1;
7356 }
7357 xmlParseEncodingDecl(ctxt);
7358 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7359 /*
7360 * The XML REC instructs us to stop parsing right here
7361 */
7362 return;
7363 }
7364
7365 /*
7366 * We may have the standalone status.
7367 */
7368 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7369 if ((RAW == '?') && (NXT(1) == '>')) {
7370 SKIP(2);
7371 return;
7372 }
7373 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7375 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7376 ctxt->wellFormed = 0;
7377 ctxt->disableSAX = 1;
7378 }
7379 SKIP_BLANKS;
7380 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7381
7382 SKIP_BLANKS;
7383 if ((RAW == '?') && (NXT(1) == '>')) {
7384 SKIP(2);
7385 } else if (RAW == '>') {
7386 /* Deprecated old WD ... */
7387 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7389 ctxt->sax->error(ctxt->userData,
7390 "XML declaration must end-up with '?>'\n");
7391 ctxt->wellFormed = 0;
7392 ctxt->disableSAX = 1;
7393 NEXT;
7394 } else {
7395 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7397 ctxt->sax->error(ctxt->userData,
7398 "parsing XML declaration: '?>' expected\n");
7399 ctxt->wellFormed = 0;
7400 ctxt->disableSAX = 1;
7401 MOVETO_ENDTAG(CUR_PTR);
7402 NEXT;
7403 }
7404}
7405
7406/**
7407 * xmlParseMisc:
7408 * @ctxt: an XML parser context
7409 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007410 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007411 *
7412 * [27] Misc ::= Comment | PI | S
7413 */
7414
7415void
7416xmlParseMisc(xmlParserCtxtPtr ctxt) {
7417 while (((RAW == '<') && (NXT(1) == '?')) ||
7418 ((RAW == '<') && (NXT(1) == '!') &&
7419 (NXT(2) == '-') && (NXT(3) == '-')) ||
7420 IS_BLANK(CUR)) {
7421 if ((RAW == '<') && (NXT(1) == '?')) {
7422 xmlParsePI(ctxt);
7423 } else if (IS_BLANK(CUR)) {
7424 NEXT;
7425 } else
7426 xmlParseComment(ctxt);
7427 }
7428}
7429
7430/**
7431 * xmlParseDocument:
7432 * @ctxt: an XML parser context
7433 *
7434 * parse an XML document (and build a tree if using the standard SAX
7435 * interface).
7436 *
7437 * [1] document ::= prolog element Misc*
7438 *
7439 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7440 *
7441 * Returns 0, -1 in case of error. the parser context is augmented
7442 * as a result of the parsing.
7443 */
7444
7445int
7446xmlParseDocument(xmlParserCtxtPtr ctxt) {
7447 xmlChar start[4];
7448 xmlCharEncoding enc;
7449
7450 xmlInitParser();
7451
7452 GROW;
7453
7454 /*
7455 * SAX: beginning of the document processing.
7456 */
7457 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7458 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7459
Daniel Veillard50f34372001-08-03 12:06:36 +00007460 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007461 /*
7462 * Get the 4 first bytes and decode the charset
7463 * if enc != XML_CHAR_ENCODING_NONE
7464 * plug some encoding conversion routines.
7465 */
7466 start[0] = RAW;
7467 start[1] = NXT(1);
7468 start[2] = NXT(2);
7469 start[3] = NXT(3);
7470 enc = xmlDetectCharEncoding(start, 4);
7471 if (enc != XML_CHAR_ENCODING_NONE) {
7472 xmlSwitchEncoding(ctxt, enc);
7473 }
Owen Taylor3473f882001-02-23 17:55:21 +00007474 }
7475
7476
7477 if (CUR == 0) {
7478 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7480 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7481 ctxt->wellFormed = 0;
7482 ctxt->disableSAX = 1;
7483 }
7484
7485 /*
7486 * Check for the XMLDecl in the Prolog.
7487 */
7488 GROW;
7489 if ((RAW == '<') && (NXT(1) == '?') &&
7490 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7491 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7492
7493 /*
7494 * Note that we will switch encoding on the fly.
7495 */
7496 xmlParseXMLDecl(ctxt);
7497 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7498 /*
7499 * The XML REC instructs us to stop parsing right here
7500 */
7501 return(-1);
7502 }
7503 ctxt->standalone = ctxt->input->standalone;
7504 SKIP_BLANKS;
7505 } else {
7506 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7507 }
7508 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7509 ctxt->sax->startDocument(ctxt->userData);
7510
7511 /*
7512 * The Misc part of the Prolog
7513 */
7514 GROW;
7515 xmlParseMisc(ctxt);
7516
7517 /*
7518 * Then possibly doc type declaration(s) and more Misc
7519 * (doctypedecl Misc*)?
7520 */
7521 GROW;
7522 if ((RAW == '<') && (NXT(1) == '!') &&
7523 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7524 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7525 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7526 (NXT(8) == 'E')) {
7527
7528 ctxt->inSubset = 1;
7529 xmlParseDocTypeDecl(ctxt);
7530 if (RAW == '[') {
7531 ctxt->instate = XML_PARSER_DTD;
7532 xmlParseInternalSubset(ctxt);
7533 }
7534
7535 /*
7536 * Create and update the external subset.
7537 */
7538 ctxt->inSubset = 2;
7539 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7540 (!ctxt->disableSAX))
7541 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7542 ctxt->extSubSystem, ctxt->extSubURI);
7543 ctxt->inSubset = 0;
7544
7545
7546 ctxt->instate = XML_PARSER_PROLOG;
7547 xmlParseMisc(ctxt);
7548 }
7549
7550 /*
7551 * Time to start parsing the tree itself
7552 */
7553 GROW;
7554 if (RAW != '<') {
7555 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7557 ctxt->sax->error(ctxt->userData,
7558 "Start tag expected, '<' not found\n");
7559 ctxt->wellFormed = 0;
7560 ctxt->disableSAX = 1;
7561 ctxt->instate = XML_PARSER_EOF;
7562 } else {
7563 ctxt->instate = XML_PARSER_CONTENT;
7564 xmlParseElement(ctxt);
7565 ctxt->instate = XML_PARSER_EPILOG;
7566
7567
7568 /*
7569 * The Misc part at the end
7570 */
7571 xmlParseMisc(ctxt);
7572
7573 if (RAW != 0) {
7574 ctxt->errNo = XML_ERR_DOCUMENT_END;
7575 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7576 ctxt->sax->error(ctxt->userData,
7577 "Extra content at the end of the document\n");
7578 ctxt->wellFormed = 0;
7579 ctxt->disableSAX = 1;
7580 }
7581 ctxt->instate = XML_PARSER_EOF;
7582 }
7583
7584 /*
7585 * SAX: end of the document processing.
7586 */
7587 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7588 (!ctxt->disableSAX))
7589 ctxt->sax->endDocument(ctxt->userData);
7590
7591 if (! ctxt->wellFormed) return(-1);
7592 return(0);
7593}
7594
7595/**
7596 * xmlParseExtParsedEnt:
7597 * @ctxt: an XML parser context
7598 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007599 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007600 * An external general parsed entity is well-formed if it matches the
7601 * production labeled extParsedEnt.
7602 *
7603 * [78] extParsedEnt ::= TextDecl? content
7604 *
7605 * Returns 0, -1 in case of error. the parser context is augmented
7606 * as a result of the parsing.
7607 */
7608
7609int
7610xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7611 xmlChar start[4];
7612 xmlCharEncoding enc;
7613
7614 xmlDefaultSAXHandlerInit();
7615
7616 GROW;
7617
7618 /*
7619 * SAX: beginning of the document processing.
7620 */
7621 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7622 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7623
7624 /*
7625 * Get the 4 first bytes and decode the charset
7626 * if enc != XML_CHAR_ENCODING_NONE
7627 * plug some encoding conversion routines.
7628 */
7629 start[0] = RAW;
7630 start[1] = NXT(1);
7631 start[2] = NXT(2);
7632 start[3] = NXT(3);
7633 enc = xmlDetectCharEncoding(start, 4);
7634 if (enc != XML_CHAR_ENCODING_NONE) {
7635 xmlSwitchEncoding(ctxt, enc);
7636 }
7637
7638
7639 if (CUR == 0) {
7640 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7642 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7643 ctxt->wellFormed = 0;
7644 ctxt->disableSAX = 1;
7645 }
7646
7647 /*
7648 * Check for the XMLDecl in the Prolog.
7649 */
7650 GROW;
7651 if ((RAW == '<') && (NXT(1) == '?') &&
7652 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7653 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7654
7655 /*
7656 * Note that we will switch encoding on the fly.
7657 */
7658 xmlParseXMLDecl(ctxt);
7659 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7660 /*
7661 * The XML REC instructs us to stop parsing right here
7662 */
7663 return(-1);
7664 }
7665 SKIP_BLANKS;
7666 } else {
7667 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7668 }
7669 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7670 ctxt->sax->startDocument(ctxt->userData);
7671
7672 /*
7673 * Doing validity checking on chunk doesn't make sense
7674 */
7675 ctxt->instate = XML_PARSER_CONTENT;
7676 ctxt->validate = 0;
7677 ctxt->loadsubset = 0;
7678 ctxt->depth = 0;
7679
7680 xmlParseContent(ctxt);
7681
7682 if ((RAW == '<') && (NXT(1) == '/')) {
7683 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7684 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7685 ctxt->sax->error(ctxt->userData,
7686 "chunk is not well balanced\n");
7687 ctxt->wellFormed = 0;
7688 ctxt->disableSAX = 1;
7689 } else if (RAW != 0) {
7690 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7692 ctxt->sax->error(ctxt->userData,
7693 "extra content at the end of well balanced chunk\n");
7694 ctxt->wellFormed = 0;
7695 ctxt->disableSAX = 1;
7696 }
7697
7698 /*
7699 * SAX: end of the document processing.
7700 */
7701 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7702 (!ctxt->disableSAX))
7703 ctxt->sax->endDocument(ctxt->userData);
7704
7705 if (! ctxt->wellFormed) return(-1);
7706 return(0);
7707}
7708
7709/************************************************************************
7710 * *
7711 * Progressive parsing interfaces *
7712 * *
7713 ************************************************************************/
7714
7715/**
7716 * xmlParseLookupSequence:
7717 * @ctxt: an XML parser context
7718 * @first: the first char to lookup
7719 * @next: the next char to lookup or zero
7720 * @third: the next char to lookup or zero
7721 *
7722 * Try to find if a sequence (first, next, third) or just (first next) or
7723 * (first) is available in the input stream.
7724 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7725 * to avoid rescanning sequences of bytes, it DOES change the state of the
7726 * parser, do not use liberally.
7727 *
7728 * Returns the index to the current parsing point if the full sequence
7729 * is available, -1 otherwise.
7730 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007731static int
Owen Taylor3473f882001-02-23 17:55:21 +00007732xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7733 xmlChar next, xmlChar third) {
7734 int base, len;
7735 xmlParserInputPtr in;
7736 const xmlChar *buf;
7737
7738 in = ctxt->input;
7739 if (in == NULL) return(-1);
7740 base = in->cur - in->base;
7741 if (base < 0) return(-1);
7742 if (ctxt->checkIndex > base)
7743 base = ctxt->checkIndex;
7744 if (in->buf == NULL) {
7745 buf = in->base;
7746 len = in->length;
7747 } else {
7748 buf = in->buf->buffer->content;
7749 len = in->buf->buffer->use;
7750 }
7751 /* take into account the sequence length */
7752 if (third) len -= 2;
7753 else if (next) len --;
7754 for (;base < len;base++) {
7755 if (buf[base] == first) {
7756 if (third != 0) {
7757 if ((buf[base + 1] != next) ||
7758 (buf[base + 2] != third)) continue;
7759 } else if (next != 0) {
7760 if (buf[base + 1] != next) continue;
7761 }
7762 ctxt->checkIndex = 0;
7763#ifdef DEBUG_PUSH
7764 if (next == 0)
7765 xmlGenericError(xmlGenericErrorContext,
7766 "PP: lookup '%c' found at %d\n",
7767 first, base);
7768 else if (third == 0)
7769 xmlGenericError(xmlGenericErrorContext,
7770 "PP: lookup '%c%c' found at %d\n",
7771 first, next, base);
7772 else
7773 xmlGenericError(xmlGenericErrorContext,
7774 "PP: lookup '%c%c%c' found at %d\n",
7775 first, next, third, base);
7776#endif
7777 return(base - (in->cur - in->base));
7778 }
7779 }
7780 ctxt->checkIndex = base;
7781#ifdef DEBUG_PUSH
7782 if (next == 0)
7783 xmlGenericError(xmlGenericErrorContext,
7784 "PP: lookup '%c' failed\n", first);
7785 else if (third == 0)
7786 xmlGenericError(xmlGenericErrorContext,
7787 "PP: lookup '%c%c' failed\n", first, next);
7788 else
7789 xmlGenericError(xmlGenericErrorContext,
7790 "PP: lookup '%c%c%c' failed\n", first, next, third);
7791#endif
7792 return(-1);
7793}
7794
7795/**
7796 * xmlParseTryOrFinish:
7797 * @ctxt: an XML parser context
7798 * @terminate: last chunk indicator
7799 *
7800 * Try to progress on parsing
7801 *
7802 * Returns zero if no parsing was possible
7803 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007804static int
Owen Taylor3473f882001-02-23 17:55:21 +00007805xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7806 int ret = 0;
7807 int avail;
7808 xmlChar cur, next;
7809
7810#ifdef DEBUG_PUSH
7811 switch (ctxt->instate) {
7812 case XML_PARSER_EOF:
7813 xmlGenericError(xmlGenericErrorContext,
7814 "PP: try EOF\n"); break;
7815 case XML_PARSER_START:
7816 xmlGenericError(xmlGenericErrorContext,
7817 "PP: try START\n"); break;
7818 case XML_PARSER_MISC:
7819 xmlGenericError(xmlGenericErrorContext,
7820 "PP: try MISC\n");break;
7821 case XML_PARSER_COMMENT:
7822 xmlGenericError(xmlGenericErrorContext,
7823 "PP: try COMMENT\n");break;
7824 case XML_PARSER_PROLOG:
7825 xmlGenericError(xmlGenericErrorContext,
7826 "PP: try PROLOG\n");break;
7827 case XML_PARSER_START_TAG:
7828 xmlGenericError(xmlGenericErrorContext,
7829 "PP: try START_TAG\n");break;
7830 case XML_PARSER_CONTENT:
7831 xmlGenericError(xmlGenericErrorContext,
7832 "PP: try CONTENT\n");break;
7833 case XML_PARSER_CDATA_SECTION:
7834 xmlGenericError(xmlGenericErrorContext,
7835 "PP: try CDATA_SECTION\n");break;
7836 case XML_PARSER_END_TAG:
7837 xmlGenericError(xmlGenericErrorContext,
7838 "PP: try END_TAG\n");break;
7839 case XML_PARSER_ENTITY_DECL:
7840 xmlGenericError(xmlGenericErrorContext,
7841 "PP: try ENTITY_DECL\n");break;
7842 case XML_PARSER_ENTITY_VALUE:
7843 xmlGenericError(xmlGenericErrorContext,
7844 "PP: try ENTITY_VALUE\n");break;
7845 case XML_PARSER_ATTRIBUTE_VALUE:
7846 xmlGenericError(xmlGenericErrorContext,
7847 "PP: try ATTRIBUTE_VALUE\n");break;
7848 case XML_PARSER_DTD:
7849 xmlGenericError(xmlGenericErrorContext,
7850 "PP: try DTD\n");break;
7851 case XML_PARSER_EPILOG:
7852 xmlGenericError(xmlGenericErrorContext,
7853 "PP: try EPILOG\n");break;
7854 case XML_PARSER_PI:
7855 xmlGenericError(xmlGenericErrorContext,
7856 "PP: try PI\n");break;
7857 case XML_PARSER_IGNORE:
7858 xmlGenericError(xmlGenericErrorContext,
7859 "PP: try IGNORE\n");break;
7860 }
7861#endif
7862
7863 while (1) {
7864 /*
7865 * Pop-up of finished entities.
7866 */
7867 while ((RAW == 0) && (ctxt->inputNr > 1))
7868 xmlPopInput(ctxt);
7869
7870 if (ctxt->input ==NULL) break;
7871 if (ctxt->input->buf == NULL)
7872 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7873 else
7874 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7875 if (avail < 1)
7876 goto done;
7877 switch (ctxt->instate) {
7878 case XML_PARSER_EOF:
7879 /*
7880 * Document parsing is done !
7881 */
7882 goto done;
7883 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007884 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7885 xmlChar start[4];
7886 xmlCharEncoding enc;
7887
7888 /*
7889 * Very first chars read from the document flow.
7890 */
7891 if (avail < 4)
7892 goto done;
7893
7894 /*
7895 * Get the 4 first bytes and decode the charset
7896 * if enc != XML_CHAR_ENCODING_NONE
7897 * plug some encoding conversion routines.
7898 */
7899 start[0] = RAW;
7900 start[1] = NXT(1);
7901 start[2] = NXT(2);
7902 start[3] = NXT(3);
7903 enc = xmlDetectCharEncoding(start, 4);
7904 if (enc != XML_CHAR_ENCODING_NONE) {
7905 xmlSwitchEncoding(ctxt, enc);
7906 }
7907 break;
7908 }
Owen Taylor3473f882001-02-23 17:55:21 +00007909
7910 cur = ctxt->input->cur[0];
7911 next = ctxt->input->cur[1];
7912 if (cur == 0) {
7913 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7914 ctxt->sax->setDocumentLocator(ctxt->userData,
7915 &xmlDefaultSAXLocator);
7916 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7917 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7918 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7919 ctxt->wellFormed = 0;
7920 ctxt->disableSAX = 1;
7921 ctxt->instate = XML_PARSER_EOF;
7922#ifdef DEBUG_PUSH
7923 xmlGenericError(xmlGenericErrorContext,
7924 "PP: entering EOF\n");
7925#endif
7926 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7927 ctxt->sax->endDocument(ctxt->userData);
7928 goto done;
7929 }
7930 if ((cur == '<') && (next == '?')) {
7931 /* PI or XML decl */
7932 if (avail < 5) return(ret);
7933 if ((!terminate) &&
7934 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7935 return(ret);
7936 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7937 ctxt->sax->setDocumentLocator(ctxt->userData,
7938 &xmlDefaultSAXLocator);
7939 if ((ctxt->input->cur[2] == 'x') &&
7940 (ctxt->input->cur[3] == 'm') &&
7941 (ctxt->input->cur[4] == 'l') &&
7942 (IS_BLANK(ctxt->input->cur[5]))) {
7943 ret += 5;
7944#ifdef DEBUG_PUSH
7945 xmlGenericError(xmlGenericErrorContext,
7946 "PP: Parsing XML Decl\n");
7947#endif
7948 xmlParseXMLDecl(ctxt);
7949 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7950 /*
7951 * The XML REC instructs us to stop parsing right
7952 * here
7953 */
7954 ctxt->instate = XML_PARSER_EOF;
7955 return(0);
7956 }
7957 ctxt->standalone = ctxt->input->standalone;
7958 if ((ctxt->encoding == NULL) &&
7959 (ctxt->input->encoding != NULL))
7960 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7961 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7962 (!ctxt->disableSAX))
7963 ctxt->sax->startDocument(ctxt->userData);
7964 ctxt->instate = XML_PARSER_MISC;
7965#ifdef DEBUG_PUSH
7966 xmlGenericError(xmlGenericErrorContext,
7967 "PP: entering MISC\n");
7968#endif
7969 } else {
7970 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7971 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7972 (!ctxt->disableSAX))
7973 ctxt->sax->startDocument(ctxt->userData);
7974 ctxt->instate = XML_PARSER_MISC;
7975#ifdef DEBUG_PUSH
7976 xmlGenericError(xmlGenericErrorContext,
7977 "PP: entering MISC\n");
7978#endif
7979 }
7980 } else {
7981 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7982 ctxt->sax->setDocumentLocator(ctxt->userData,
7983 &xmlDefaultSAXLocator);
7984 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7985 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7986 (!ctxt->disableSAX))
7987 ctxt->sax->startDocument(ctxt->userData);
7988 ctxt->instate = XML_PARSER_MISC;
7989#ifdef DEBUG_PUSH
7990 xmlGenericError(xmlGenericErrorContext,
7991 "PP: entering MISC\n");
7992#endif
7993 }
7994 break;
7995 case XML_PARSER_MISC:
7996 SKIP_BLANKS;
7997 if (ctxt->input->buf == NULL)
7998 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7999 else
8000 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8001 if (avail < 2)
8002 goto done;
8003 cur = ctxt->input->cur[0];
8004 next = ctxt->input->cur[1];
8005 if ((cur == '<') && (next == '?')) {
8006 if ((!terminate) &&
8007 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8008 goto done;
8009#ifdef DEBUG_PUSH
8010 xmlGenericError(xmlGenericErrorContext,
8011 "PP: Parsing PI\n");
8012#endif
8013 xmlParsePI(ctxt);
8014 } else if ((cur == '<') && (next == '!') &&
8015 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8016 if ((!terminate) &&
8017 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8018 goto done;
8019#ifdef DEBUG_PUSH
8020 xmlGenericError(xmlGenericErrorContext,
8021 "PP: Parsing Comment\n");
8022#endif
8023 xmlParseComment(ctxt);
8024 ctxt->instate = XML_PARSER_MISC;
8025 } else if ((cur == '<') && (next == '!') &&
8026 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8027 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8028 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8029 (ctxt->input->cur[8] == 'E')) {
8030 if ((!terminate) &&
8031 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8032 goto done;
8033#ifdef DEBUG_PUSH
8034 xmlGenericError(xmlGenericErrorContext,
8035 "PP: Parsing internal subset\n");
8036#endif
8037 ctxt->inSubset = 1;
8038 xmlParseDocTypeDecl(ctxt);
8039 if (RAW == '[') {
8040 ctxt->instate = XML_PARSER_DTD;
8041#ifdef DEBUG_PUSH
8042 xmlGenericError(xmlGenericErrorContext,
8043 "PP: entering DTD\n");
8044#endif
8045 } else {
8046 /*
8047 * Create and update the external subset.
8048 */
8049 ctxt->inSubset = 2;
8050 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8051 (ctxt->sax->externalSubset != NULL))
8052 ctxt->sax->externalSubset(ctxt->userData,
8053 ctxt->intSubName, ctxt->extSubSystem,
8054 ctxt->extSubURI);
8055 ctxt->inSubset = 0;
8056 ctxt->instate = XML_PARSER_PROLOG;
8057#ifdef DEBUG_PUSH
8058 xmlGenericError(xmlGenericErrorContext,
8059 "PP: entering PROLOG\n");
8060#endif
8061 }
8062 } else if ((cur == '<') && (next == '!') &&
8063 (avail < 9)) {
8064 goto done;
8065 } else {
8066 ctxt->instate = XML_PARSER_START_TAG;
8067#ifdef DEBUG_PUSH
8068 xmlGenericError(xmlGenericErrorContext,
8069 "PP: entering START_TAG\n");
8070#endif
8071 }
8072 break;
8073 case XML_PARSER_IGNORE:
8074 xmlGenericError(xmlGenericErrorContext,
8075 "PP: internal error, state == IGNORE");
8076 ctxt->instate = XML_PARSER_DTD;
8077#ifdef DEBUG_PUSH
8078 xmlGenericError(xmlGenericErrorContext,
8079 "PP: entering DTD\n");
8080#endif
8081 break;
8082 case XML_PARSER_PROLOG:
8083 SKIP_BLANKS;
8084 if (ctxt->input->buf == NULL)
8085 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8086 else
8087 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8088 if (avail < 2)
8089 goto done;
8090 cur = ctxt->input->cur[0];
8091 next = ctxt->input->cur[1];
8092 if ((cur == '<') && (next == '?')) {
8093 if ((!terminate) &&
8094 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8095 goto done;
8096#ifdef DEBUG_PUSH
8097 xmlGenericError(xmlGenericErrorContext,
8098 "PP: Parsing PI\n");
8099#endif
8100 xmlParsePI(ctxt);
8101 } else if ((cur == '<') && (next == '!') &&
8102 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8103 if ((!terminate) &&
8104 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8105 goto done;
8106#ifdef DEBUG_PUSH
8107 xmlGenericError(xmlGenericErrorContext,
8108 "PP: Parsing Comment\n");
8109#endif
8110 xmlParseComment(ctxt);
8111 ctxt->instate = XML_PARSER_PROLOG;
8112 } else if ((cur == '<') && (next == '!') &&
8113 (avail < 4)) {
8114 goto done;
8115 } else {
8116 ctxt->instate = XML_PARSER_START_TAG;
8117#ifdef DEBUG_PUSH
8118 xmlGenericError(xmlGenericErrorContext,
8119 "PP: entering START_TAG\n");
8120#endif
8121 }
8122 break;
8123 case XML_PARSER_EPILOG:
8124 SKIP_BLANKS;
8125 if (ctxt->input->buf == NULL)
8126 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8127 else
8128 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8129 if (avail < 2)
8130 goto done;
8131 cur = ctxt->input->cur[0];
8132 next = ctxt->input->cur[1];
8133 if ((cur == '<') && (next == '?')) {
8134 if ((!terminate) &&
8135 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8136 goto done;
8137#ifdef DEBUG_PUSH
8138 xmlGenericError(xmlGenericErrorContext,
8139 "PP: Parsing PI\n");
8140#endif
8141 xmlParsePI(ctxt);
8142 ctxt->instate = XML_PARSER_EPILOG;
8143 } else if ((cur == '<') && (next == '!') &&
8144 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8145 if ((!terminate) &&
8146 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8147 goto done;
8148#ifdef DEBUG_PUSH
8149 xmlGenericError(xmlGenericErrorContext,
8150 "PP: Parsing Comment\n");
8151#endif
8152 xmlParseComment(ctxt);
8153 ctxt->instate = XML_PARSER_EPILOG;
8154 } else if ((cur == '<') && (next == '!') &&
8155 (avail < 4)) {
8156 goto done;
8157 } else {
8158 ctxt->errNo = XML_ERR_DOCUMENT_END;
8159 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8160 ctxt->sax->error(ctxt->userData,
8161 "Extra content at the end of the document\n");
8162 ctxt->wellFormed = 0;
8163 ctxt->disableSAX = 1;
8164 ctxt->instate = XML_PARSER_EOF;
8165#ifdef DEBUG_PUSH
8166 xmlGenericError(xmlGenericErrorContext,
8167 "PP: entering EOF\n");
8168#endif
8169 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8170 (!ctxt->disableSAX))
8171 ctxt->sax->endDocument(ctxt->userData);
8172 goto done;
8173 }
8174 break;
8175 case XML_PARSER_START_TAG: {
8176 xmlChar *name, *oldname;
8177
8178 if ((avail < 2) && (ctxt->inputNr == 1))
8179 goto done;
8180 cur = ctxt->input->cur[0];
8181 if (cur != '<') {
8182 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8184 ctxt->sax->error(ctxt->userData,
8185 "Start tag expect, '<' not found\n");
8186 ctxt->wellFormed = 0;
8187 ctxt->disableSAX = 1;
8188 ctxt->instate = XML_PARSER_EOF;
8189#ifdef DEBUG_PUSH
8190 xmlGenericError(xmlGenericErrorContext,
8191 "PP: entering EOF\n");
8192#endif
8193 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8194 (!ctxt->disableSAX))
8195 ctxt->sax->endDocument(ctxt->userData);
8196 goto done;
8197 }
8198 if ((!terminate) &&
8199 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8200 goto done;
8201 if (ctxt->spaceNr == 0)
8202 spacePush(ctxt, -1);
8203 else
8204 spacePush(ctxt, *ctxt->space);
8205 name = xmlParseStartTag(ctxt);
8206 if (name == NULL) {
8207 spacePop(ctxt);
8208 ctxt->instate = XML_PARSER_EOF;
8209#ifdef DEBUG_PUSH
8210 xmlGenericError(xmlGenericErrorContext,
8211 "PP: entering EOF\n");
8212#endif
8213 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8214 (!ctxt->disableSAX))
8215 ctxt->sax->endDocument(ctxt->userData);
8216 goto done;
8217 }
8218 namePush(ctxt, xmlStrdup(name));
8219
8220 /*
8221 * [ VC: Root Element Type ]
8222 * The Name in the document type declaration must match
8223 * the element type of the root element.
8224 */
8225 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8226 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8227 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8228
8229 /*
8230 * Check for an Empty Element.
8231 */
8232 if ((RAW == '/') && (NXT(1) == '>')) {
8233 SKIP(2);
8234 if ((ctxt->sax != NULL) &&
8235 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8236 ctxt->sax->endElement(ctxt->userData, name);
8237 xmlFree(name);
8238 oldname = namePop(ctxt);
8239 spacePop(ctxt);
8240 if (oldname != NULL) {
8241#ifdef DEBUG_STACK
8242 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8243#endif
8244 xmlFree(oldname);
8245 }
8246 if (ctxt->name == NULL) {
8247 ctxt->instate = XML_PARSER_EPILOG;
8248#ifdef DEBUG_PUSH
8249 xmlGenericError(xmlGenericErrorContext,
8250 "PP: entering EPILOG\n");
8251#endif
8252 } else {
8253 ctxt->instate = XML_PARSER_CONTENT;
8254#ifdef DEBUG_PUSH
8255 xmlGenericError(xmlGenericErrorContext,
8256 "PP: entering CONTENT\n");
8257#endif
8258 }
8259 break;
8260 }
8261 if (RAW == '>') {
8262 NEXT;
8263 } else {
8264 ctxt->errNo = XML_ERR_GT_REQUIRED;
8265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8266 ctxt->sax->error(ctxt->userData,
8267 "Couldn't find end of Start Tag %s\n",
8268 name);
8269 ctxt->wellFormed = 0;
8270 ctxt->disableSAX = 1;
8271
8272 /*
8273 * end of parsing of this node.
8274 */
8275 nodePop(ctxt);
8276 oldname = namePop(ctxt);
8277 spacePop(ctxt);
8278 if (oldname != NULL) {
8279#ifdef DEBUG_STACK
8280 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8281#endif
8282 xmlFree(oldname);
8283 }
8284 }
8285 xmlFree(name);
8286 ctxt->instate = XML_PARSER_CONTENT;
8287#ifdef DEBUG_PUSH
8288 xmlGenericError(xmlGenericErrorContext,
8289 "PP: entering CONTENT\n");
8290#endif
8291 break;
8292 }
8293 case XML_PARSER_CONTENT: {
8294 const xmlChar *test;
8295 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008296 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008297
8298 /*
8299 * Handle preparsed entities and charRef
8300 */
8301 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008302 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008303
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008304 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008305 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8306 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008307 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008308 ctxt->token = 0;
8309 }
8310 if ((avail < 2) && (ctxt->inputNr == 1))
8311 goto done;
8312 cur = ctxt->input->cur[0];
8313 next = ctxt->input->cur[1];
8314
8315 test = CUR_PTR;
8316 cons = ctxt->input->consumed;
8317 tok = ctxt->token;
8318 if ((cur == '<') && (next == '?')) {
8319 if ((!terminate) &&
8320 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8321 goto done;
8322#ifdef DEBUG_PUSH
8323 xmlGenericError(xmlGenericErrorContext,
8324 "PP: Parsing PI\n");
8325#endif
8326 xmlParsePI(ctxt);
8327 } else if ((cur == '<') && (next == '!') &&
8328 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8329 if ((!terminate) &&
8330 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8331 goto done;
8332#ifdef DEBUG_PUSH
8333 xmlGenericError(xmlGenericErrorContext,
8334 "PP: Parsing Comment\n");
8335#endif
8336 xmlParseComment(ctxt);
8337 ctxt->instate = XML_PARSER_CONTENT;
8338 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8339 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8340 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8341 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8342 (ctxt->input->cur[8] == '[')) {
8343 SKIP(9);
8344 ctxt->instate = XML_PARSER_CDATA_SECTION;
8345#ifdef DEBUG_PUSH
8346 xmlGenericError(xmlGenericErrorContext,
8347 "PP: entering CDATA_SECTION\n");
8348#endif
8349 break;
8350 } else if ((cur == '<') && (next == '!') &&
8351 (avail < 9)) {
8352 goto done;
8353 } else if ((cur == '<') && (next == '/')) {
8354 ctxt->instate = XML_PARSER_END_TAG;
8355#ifdef DEBUG_PUSH
8356 xmlGenericError(xmlGenericErrorContext,
8357 "PP: entering END_TAG\n");
8358#endif
8359 break;
8360 } else if (cur == '<') {
8361 ctxt->instate = XML_PARSER_START_TAG;
8362#ifdef DEBUG_PUSH
8363 xmlGenericError(xmlGenericErrorContext,
8364 "PP: entering START_TAG\n");
8365#endif
8366 break;
8367 } else if (cur == '&') {
8368 if ((!terminate) &&
8369 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8370 goto done;
8371#ifdef DEBUG_PUSH
8372 xmlGenericError(xmlGenericErrorContext,
8373 "PP: Parsing Reference\n");
8374#endif
8375 xmlParseReference(ctxt);
8376 } else {
8377 /* TODO Avoid the extra copy, handle directly !!! */
8378 /*
8379 * Goal of the following test is:
8380 * - minimize calls to the SAX 'character' callback
8381 * when they are mergeable
8382 * - handle an problem for isBlank when we only parse
8383 * a sequence of blank chars and the next one is
8384 * not available to check against '<' presence.
8385 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008386 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008387 * of the parser.
8388 */
8389 if ((ctxt->inputNr == 1) &&
8390 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8391 if ((!terminate) &&
8392 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8393 goto done;
8394 }
8395 ctxt->checkIndex = 0;
8396#ifdef DEBUG_PUSH
8397 xmlGenericError(xmlGenericErrorContext,
8398 "PP: Parsing char data\n");
8399#endif
8400 xmlParseCharData(ctxt, 0);
8401 }
8402 /*
8403 * Pop-up of finished entities.
8404 */
8405 while ((RAW == 0) && (ctxt->inputNr > 1))
8406 xmlPopInput(ctxt);
8407 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8408 (tok == ctxt->token)) {
8409 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8411 ctxt->sax->error(ctxt->userData,
8412 "detected an error in element content\n");
8413 ctxt->wellFormed = 0;
8414 ctxt->disableSAX = 1;
8415 ctxt->instate = XML_PARSER_EOF;
8416 break;
8417 }
8418 break;
8419 }
8420 case XML_PARSER_CDATA_SECTION: {
8421 /*
8422 * The Push mode need to have the SAX callback for
8423 * cdataBlock merge back contiguous callbacks.
8424 */
8425 int base;
8426
8427 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8428 if (base < 0) {
8429 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8430 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8431 if (ctxt->sax->cdataBlock != NULL)
8432 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8433 XML_PARSER_BIG_BUFFER_SIZE);
8434 }
8435 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8436 ctxt->checkIndex = 0;
8437 }
8438 goto done;
8439 } else {
8440 if ((ctxt->sax != NULL) && (base > 0) &&
8441 (!ctxt->disableSAX)) {
8442 if (ctxt->sax->cdataBlock != NULL)
8443 ctxt->sax->cdataBlock(ctxt->userData,
8444 ctxt->input->cur, base);
8445 }
8446 SKIP(base + 3);
8447 ctxt->checkIndex = 0;
8448 ctxt->instate = XML_PARSER_CONTENT;
8449#ifdef DEBUG_PUSH
8450 xmlGenericError(xmlGenericErrorContext,
8451 "PP: entering CONTENT\n");
8452#endif
8453 }
8454 break;
8455 }
8456 case XML_PARSER_END_TAG:
8457 if (avail < 2)
8458 goto done;
8459 if ((!terminate) &&
8460 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8461 goto done;
8462 xmlParseEndTag(ctxt);
8463 if (ctxt->name == NULL) {
8464 ctxt->instate = XML_PARSER_EPILOG;
8465#ifdef DEBUG_PUSH
8466 xmlGenericError(xmlGenericErrorContext,
8467 "PP: entering EPILOG\n");
8468#endif
8469 } else {
8470 ctxt->instate = XML_PARSER_CONTENT;
8471#ifdef DEBUG_PUSH
8472 xmlGenericError(xmlGenericErrorContext,
8473 "PP: entering CONTENT\n");
8474#endif
8475 }
8476 break;
8477 case XML_PARSER_DTD: {
8478 /*
8479 * Sorry but progressive parsing of the internal subset
8480 * is not expected to be supported. We first check that
8481 * the full content of the internal subset is available and
8482 * the parsing is launched only at that point.
8483 * Internal subset ends up with "']' S? '>'" in an unescaped
8484 * section and not in a ']]>' sequence which are conditional
8485 * sections (whoever argued to keep that crap in XML deserve
8486 * a place in hell !).
8487 */
8488 int base, i;
8489 xmlChar *buf;
8490 xmlChar quote = 0;
8491
8492 base = ctxt->input->cur - ctxt->input->base;
8493 if (base < 0) return(0);
8494 if (ctxt->checkIndex > base)
8495 base = ctxt->checkIndex;
8496 buf = ctxt->input->buf->buffer->content;
8497 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8498 base++) {
8499 if (quote != 0) {
8500 if (buf[base] == quote)
8501 quote = 0;
8502 continue;
8503 }
8504 if (buf[base] == '"') {
8505 quote = '"';
8506 continue;
8507 }
8508 if (buf[base] == '\'') {
8509 quote = '\'';
8510 continue;
8511 }
8512 if (buf[base] == ']') {
8513 if ((unsigned int) base +1 >=
8514 ctxt->input->buf->buffer->use)
8515 break;
8516 if (buf[base + 1] == ']') {
8517 /* conditional crap, skip both ']' ! */
8518 base++;
8519 continue;
8520 }
8521 for (i = 0;
8522 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8523 i++) {
8524 if (buf[base + i] == '>')
8525 goto found_end_int_subset;
8526 }
8527 break;
8528 }
8529 }
8530 /*
8531 * We didn't found the end of the Internal subset
8532 */
8533 if (quote == 0)
8534 ctxt->checkIndex = base;
8535#ifdef DEBUG_PUSH
8536 if (next == 0)
8537 xmlGenericError(xmlGenericErrorContext,
8538 "PP: lookup of int subset end filed\n");
8539#endif
8540 goto done;
8541
8542found_end_int_subset:
8543 xmlParseInternalSubset(ctxt);
8544 ctxt->inSubset = 2;
8545 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8546 (ctxt->sax->externalSubset != NULL))
8547 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8548 ctxt->extSubSystem, ctxt->extSubURI);
8549 ctxt->inSubset = 0;
8550 ctxt->instate = XML_PARSER_PROLOG;
8551 ctxt->checkIndex = 0;
8552#ifdef DEBUG_PUSH
8553 xmlGenericError(xmlGenericErrorContext,
8554 "PP: entering PROLOG\n");
8555#endif
8556 break;
8557 }
8558 case XML_PARSER_COMMENT:
8559 xmlGenericError(xmlGenericErrorContext,
8560 "PP: internal error, state == COMMENT\n");
8561 ctxt->instate = XML_PARSER_CONTENT;
8562#ifdef DEBUG_PUSH
8563 xmlGenericError(xmlGenericErrorContext,
8564 "PP: entering CONTENT\n");
8565#endif
8566 break;
8567 case XML_PARSER_PI:
8568 xmlGenericError(xmlGenericErrorContext,
8569 "PP: internal error, state == PI\n");
8570 ctxt->instate = XML_PARSER_CONTENT;
8571#ifdef DEBUG_PUSH
8572 xmlGenericError(xmlGenericErrorContext,
8573 "PP: entering CONTENT\n");
8574#endif
8575 break;
8576 case XML_PARSER_ENTITY_DECL:
8577 xmlGenericError(xmlGenericErrorContext,
8578 "PP: internal error, state == ENTITY_DECL\n");
8579 ctxt->instate = XML_PARSER_DTD;
8580#ifdef DEBUG_PUSH
8581 xmlGenericError(xmlGenericErrorContext,
8582 "PP: entering DTD\n");
8583#endif
8584 break;
8585 case XML_PARSER_ENTITY_VALUE:
8586 xmlGenericError(xmlGenericErrorContext,
8587 "PP: internal error, state == ENTITY_VALUE\n");
8588 ctxt->instate = XML_PARSER_CONTENT;
8589#ifdef DEBUG_PUSH
8590 xmlGenericError(xmlGenericErrorContext,
8591 "PP: entering DTD\n");
8592#endif
8593 break;
8594 case XML_PARSER_ATTRIBUTE_VALUE:
8595 xmlGenericError(xmlGenericErrorContext,
8596 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8597 ctxt->instate = XML_PARSER_START_TAG;
8598#ifdef DEBUG_PUSH
8599 xmlGenericError(xmlGenericErrorContext,
8600 "PP: entering START_TAG\n");
8601#endif
8602 break;
8603 case XML_PARSER_SYSTEM_LITERAL:
8604 xmlGenericError(xmlGenericErrorContext,
8605 "PP: internal error, state == SYSTEM_LITERAL\n");
8606 ctxt->instate = XML_PARSER_START_TAG;
8607#ifdef DEBUG_PUSH
8608 xmlGenericError(xmlGenericErrorContext,
8609 "PP: entering START_TAG\n");
8610#endif
8611 break;
8612 }
8613 }
8614done:
8615#ifdef DEBUG_PUSH
8616 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8617#endif
8618 return(ret);
8619}
8620
8621/**
Owen Taylor3473f882001-02-23 17:55:21 +00008622 * xmlParseChunk:
8623 * @ctxt: an XML parser context
8624 * @chunk: an char array
8625 * @size: the size in byte of the chunk
8626 * @terminate: last chunk indicator
8627 *
8628 * Parse a Chunk of memory
8629 *
8630 * Returns zero if no error, the xmlParserErrors otherwise.
8631 */
8632int
8633xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8634 int terminate) {
8635 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8636 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8637 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8638 int cur = ctxt->input->cur - ctxt->input->base;
8639
8640 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8641 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8642 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008643 ctxt->input->end =
8644 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008645#ifdef DEBUG_PUSH
8646 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8647#endif
8648
8649 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8650 xmlParseTryOrFinish(ctxt, terminate);
8651 } else if (ctxt->instate != XML_PARSER_EOF) {
8652 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8653 xmlParserInputBufferPtr in = ctxt->input->buf;
8654 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8655 (in->raw != NULL)) {
8656 int nbchars;
8657
8658 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8659 if (nbchars < 0) {
8660 xmlGenericError(xmlGenericErrorContext,
8661 "xmlParseChunk: encoder error\n");
8662 return(XML_ERR_INVALID_ENCODING);
8663 }
8664 }
8665 }
8666 }
8667 xmlParseTryOrFinish(ctxt, terminate);
8668 if (terminate) {
8669 /*
8670 * Check for termination
8671 */
8672 if ((ctxt->instate != XML_PARSER_EOF) &&
8673 (ctxt->instate != XML_PARSER_EPILOG)) {
8674 ctxt->errNo = XML_ERR_DOCUMENT_END;
8675 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8676 ctxt->sax->error(ctxt->userData,
8677 "Extra content at the end of the document\n");
8678 ctxt->wellFormed = 0;
8679 ctxt->disableSAX = 1;
8680 }
8681 if (ctxt->instate != XML_PARSER_EOF) {
8682 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8683 (!ctxt->disableSAX))
8684 ctxt->sax->endDocument(ctxt->userData);
8685 }
8686 ctxt->instate = XML_PARSER_EOF;
8687 }
8688 return((xmlParserErrors) ctxt->errNo);
8689}
8690
8691/************************************************************************
8692 * *
8693 * I/O front end functions to the parser *
8694 * *
8695 ************************************************************************/
8696
8697/**
8698 * xmlStopParser:
8699 * @ctxt: an XML parser context
8700 *
8701 * Blocks further parser processing
8702 */
8703void
8704xmlStopParser(xmlParserCtxtPtr ctxt) {
8705 ctxt->instate = XML_PARSER_EOF;
8706 if (ctxt->input != NULL)
8707 ctxt->input->cur = BAD_CAST"";
8708}
8709
8710/**
8711 * xmlCreatePushParserCtxt:
8712 * @sax: a SAX handler
8713 * @user_data: The user data returned on SAX callbacks
8714 * @chunk: a pointer to an array of chars
8715 * @size: number of chars in the array
8716 * @filename: an optional file name or URI
8717 *
8718 * Create a parser context for using the XML parser in push mode
8719 * To allow content encoding detection, @size should be >= 4
8720 * The value of @filename is used for fetching external entities
8721 * and error/warning reports.
8722 *
8723 * Returns the new parser context or NULL
8724 */
8725xmlParserCtxtPtr
8726xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8727 const char *chunk, int size, const char *filename) {
8728 xmlParserCtxtPtr ctxt;
8729 xmlParserInputPtr inputStream;
8730 xmlParserInputBufferPtr buf;
8731 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8732
8733 /*
8734 * plug some encoding conversion routines
8735 */
8736 if ((chunk != NULL) && (size >= 4))
8737 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8738
8739 buf = xmlAllocParserInputBuffer(enc);
8740 if (buf == NULL) return(NULL);
8741
8742 ctxt = xmlNewParserCtxt();
8743 if (ctxt == NULL) {
8744 xmlFree(buf);
8745 return(NULL);
8746 }
8747 if (sax != NULL) {
8748 if (ctxt->sax != &xmlDefaultSAXHandler)
8749 xmlFree(ctxt->sax);
8750 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8751 if (ctxt->sax == NULL) {
8752 xmlFree(buf);
8753 xmlFree(ctxt);
8754 return(NULL);
8755 }
8756 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8757 if (user_data != NULL)
8758 ctxt->userData = user_data;
8759 }
8760 if (filename == NULL) {
8761 ctxt->directory = NULL;
8762 } else {
8763 ctxt->directory = xmlParserGetDirectory(filename);
8764 }
8765
8766 inputStream = xmlNewInputStream(ctxt);
8767 if (inputStream == NULL) {
8768 xmlFreeParserCtxt(ctxt);
8769 return(NULL);
8770 }
8771
8772 if (filename == NULL)
8773 inputStream->filename = NULL;
8774 else
8775 inputStream->filename = xmlMemStrdup(filename);
8776 inputStream->buf = buf;
8777 inputStream->base = inputStream->buf->buffer->content;
8778 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008779 inputStream->end =
8780 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008781
8782 inputPush(ctxt, inputStream);
8783
8784 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8785 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008786 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8787 int cur = ctxt->input->cur - ctxt->input->base;
8788
Owen Taylor3473f882001-02-23 17:55:21 +00008789 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008790
8791 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8792 ctxt->input->cur = ctxt->input->base + cur;
8793 ctxt->input->end =
8794 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008795#ifdef DEBUG_PUSH
8796 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8797#endif
8798 }
8799
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008800 if (enc != XML_CHAR_ENCODING_NONE) {
8801 xmlSwitchEncoding(ctxt, enc);
8802 }
8803
Owen Taylor3473f882001-02-23 17:55:21 +00008804 return(ctxt);
8805}
8806
8807/**
8808 * xmlCreateIOParserCtxt:
8809 * @sax: a SAX handler
8810 * @user_data: The user data returned on SAX callbacks
8811 * @ioread: an I/O read function
8812 * @ioclose: an I/O close function
8813 * @ioctx: an I/O handler
8814 * @enc: the charset encoding if known
8815 *
8816 * Create a parser context for using the XML parser with an existing
8817 * I/O stream
8818 *
8819 * Returns the new parser context or NULL
8820 */
8821xmlParserCtxtPtr
8822xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8823 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8824 void *ioctx, xmlCharEncoding enc) {
8825 xmlParserCtxtPtr ctxt;
8826 xmlParserInputPtr inputStream;
8827 xmlParserInputBufferPtr buf;
8828
8829 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8830 if (buf == NULL) return(NULL);
8831
8832 ctxt = xmlNewParserCtxt();
8833 if (ctxt == NULL) {
8834 xmlFree(buf);
8835 return(NULL);
8836 }
8837 if (sax != NULL) {
8838 if (ctxt->sax != &xmlDefaultSAXHandler)
8839 xmlFree(ctxt->sax);
8840 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8841 if (ctxt->sax == NULL) {
8842 xmlFree(buf);
8843 xmlFree(ctxt);
8844 return(NULL);
8845 }
8846 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8847 if (user_data != NULL)
8848 ctxt->userData = user_data;
8849 }
8850
8851 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8852 if (inputStream == NULL) {
8853 xmlFreeParserCtxt(ctxt);
8854 return(NULL);
8855 }
8856 inputPush(ctxt, inputStream);
8857
8858 return(ctxt);
8859}
8860
8861/************************************************************************
8862 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008863 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00008864 * *
8865 ************************************************************************/
8866
8867/**
8868 * xmlIOParseDTD:
8869 * @sax: the SAX handler block or NULL
8870 * @input: an Input Buffer
8871 * @enc: the charset encoding if known
8872 *
8873 * Load and parse a DTD
8874 *
8875 * Returns the resulting xmlDtdPtr or NULL in case of error.
8876 * @input will be freed at parsing end.
8877 */
8878
8879xmlDtdPtr
8880xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8881 xmlCharEncoding enc) {
8882 xmlDtdPtr ret = NULL;
8883 xmlParserCtxtPtr ctxt;
8884 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008885 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008886
8887 if (input == NULL)
8888 return(NULL);
8889
8890 ctxt = xmlNewParserCtxt();
8891 if (ctxt == NULL) {
8892 return(NULL);
8893 }
8894
8895 /*
8896 * Set-up the SAX context
8897 */
8898 if (sax != NULL) {
8899 if (ctxt->sax != NULL)
8900 xmlFree(ctxt->sax);
8901 ctxt->sax = sax;
8902 ctxt->userData = NULL;
8903 }
8904
8905 /*
8906 * generate a parser input from the I/O handler
8907 */
8908
8909 pinput = xmlNewIOInputStream(ctxt, input, enc);
8910 if (pinput == NULL) {
8911 if (sax != NULL) ctxt->sax = NULL;
8912 xmlFreeParserCtxt(ctxt);
8913 return(NULL);
8914 }
8915
8916 /*
8917 * plug some encoding conversion routines here.
8918 */
8919 xmlPushInput(ctxt, pinput);
8920
8921 pinput->filename = NULL;
8922 pinput->line = 1;
8923 pinput->col = 1;
8924 pinput->base = ctxt->input->cur;
8925 pinput->cur = ctxt->input->cur;
8926 pinput->free = NULL;
8927
8928 /*
8929 * let's parse that entity knowing it's an external subset.
8930 */
8931 ctxt->inSubset = 2;
8932 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8933 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8934 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008935
8936 if (enc == XML_CHAR_ENCODING_NONE) {
8937 /*
8938 * Get the 4 first bytes and decode the charset
8939 * if enc != XML_CHAR_ENCODING_NONE
8940 * plug some encoding conversion routines.
8941 */
8942 start[0] = RAW;
8943 start[1] = NXT(1);
8944 start[2] = NXT(2);
8945 start[3] = NXT(3);
8946 enc = xmlDetectCharEncoding(start, 4);
8947 if (enc != XML_CHAR_ENCODING_NONE) {
8948 xmlSwitchEncoding(ctxt, enc);
8949 }
8950 }
8951
Owen Taylor3473f882001-02-23 17:55:21 +00008952 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8953
8954 if (ctxt->myDoc != NULL) {
8955 if (ctxt->wellFormed) {
8956 ret = ctxt->myDoc->extSubset;
8957 ctxt->myDoc->extSubset = NULL;
8958 } else {
8959 ret = NULL;
8960 }
8961 xmlFreeDoc(ctxt->myDoc);
8962 ctxt->myDoc = NULL;
8963 }
8964 if (sax != NULL) ctxt->sax = NULL;
8965 xmlFreeParserCtxt(ctxt);
8966
8967 return(ret);
8968}
8969
8970/**
8971 * xmlSAXParseDTD:
8972 * @sax: the SAX handler block
8973 * @ExternalID: a NAME* containing the External ID of the DTD
8974 * @SystemID: a NAME* containing the URL to the DTD
8975 *
8976 * Load and parse an external subset.
8977 *
8978 * Returns the resulting xmlDtdPtr or NULL in case of error.
8979 */
8980
8981xmlDtdPtr
8982xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8983 const xmlChar *SystemID) {
8984 xmlDtdPtr ret = NULL;
8985 xmlParserCtxtPtr ctxt;
8986 xmlParserInputPtr input = NULL;
8987 xmlCharEncoding enc;
8988
8989 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8990
8991 ctxt = xmlNewParserCtxt();
8992 if (ctxt == NULL) {
8993 return(NULL);
8994 }
8995
8996 /*
8997 * Set-up the SAX context
8998 */
8999 if (sax != NULL) {
9000 if (ctxt->sax != NULL)
9001 xmlFree(ctxt->sax);
9002 ctxt->sax = sax;
9003 ctxt->userData = NULL;
9004 }
9005
9006 /*
9007 * Ask the Entity resolver to load the damn thing
9008 */
9009
9010 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9011 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9012 if (input == NULL) {
9013 if (sax != NULL) ctxt->sax = NULL;
9014 xmlFreeParserCtxt(ctxt);
9015 return(NULL);
9016 }
9017
9018 /*
9019 * plug some encoding conversion routines here.
9020 */
9021 xmlPushInput(ctxt, input);
9022 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9023 xmlSwitchEncoding(ctxt, enc);
9024
9025 if (input->filename == NULL)
9026 input->filename = (char *) xmlStrdup(SystemID);
9027 input->line = 1;
9028 input->col = 1;
9029 input->base = ctxt->input->cur;
9030 input->cur = ctxt->input->cur;
9031 input->free = NULL;
9032
9033 /*
9034 * let's parse that entity knowing it's an external subset.
9035 */
9036 ctxt->inSubset = 2;
9037 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9038 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9039 ExternalID, SystemID);
9040 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9041
9042 if (ctxt->myDoc != NULL) {
9043 if (ctxt->wellFormed) {
9044 ret = ctxt->myDoc->extSubset;
9045 ctxt->myDoc->extSubset = NULL;
9046 } else {
9047 ret = NULL;
9048 }
9049 xmlFreeDoc(ctxt->myDoc);
9050 ctxt->myDoc = NULL;
9051 }
9052 if (sax != NULL) ctxt->sax = NULL;
9053 xmlFreeParserCtxt(ctxt);
9054
9055 return(ret);
9056}
9057
9058/**
9059 * xmlParseDTD:
9060 * @ExternalID: a NAME* containing the External ID of the DTD
9061 * @SystemID: a NAME* containing the URL to the DTD
9062 *
9063 * Load and parse an external subset.
9064 *
9065 * Returns the resulting xmlDtdPtr or NULL in case of error.
9066 */
9067
9068xmlDtdPtr
9069xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9070 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9071}
9072
9073/************************************************************************
9074 * *
9075 * Front ends when parsing an Entity *
9076 * *
9077 ************************************************************************/
9078
9079/**
Owen Taylor3473f882001-02-23 17:55:21 +00009080 * xmlParseCtxtExternalEntity:
9081 * @ctx: the existing parsing context
9082 * @URL: the URL for the entity to load
9083 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009084 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009085 *
9086 * Parse an external general entity within an existing parsing context
9087 * An external general parsed entity is well-formed if it matches the
9088 * production labeled extParsedEnt.
9089 *
9090 * [78] extParsedEnt ::= TextDecl? content
9091 *
9092 * Returns 0 if the entity is well formed, -1 in case of args problem and
9093 * the parser error code otherwise
9094 */
9095
9096int
9097xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009098 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009099 xmlParserCtxtPtr ctxt;
9100 xmlDocPtr newDoc;
9101 xmlSAXHandlerPtr oldsax = NULL;
9102 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009103 xmlChar start[4];
9104 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009105
9106 if (ctx->depth > 40) {
9107 return(XML_ERR_ENTITY_LOOP);
9108 }
9109
Daniel Veillardcda96922001-08-21 10:56:31 +00009110 if (lst != NULL)
9111 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009112 if ((URL == NULL) && (ID == NULL))
9113 return(-1);
9114 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9115 return(-1);
9116
9117
9118 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9119 if (ctxt == NULL) return(-1);
9120 ctxt->userData = ctxt;
9121 oldsax = ctxt->sax;
9122 ctxt->sax = ctx->sax;
9123 newDoc = xmlNewDoc(BAD_CAST "1.0");
9124 if (newDoc == NULL) {
9125 xmlFreeParserCtxt(ctxt);
9126 return(-1);
9127 }
9128 if (ctx->myDoc != NULL) {
9129 newDoc->intSubset = ctx->myDoc->intSubset;
9130 newDoc->extSubset = ctx->myDoc->extSubset;
9131 }
9132 if (ctx->myDoc->URL != NULL) {
9133 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9134 }
9135 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9136 if (newDoc->children == NULL) {
9137 ctxt->sax = oldsax;
9138 xmlFreeParserCtxt(ctxt);
9139 newDoc->intSubset = NULL;
9140 newDoc->extSubset = NULL;
9141 xmlFreeDoc(newDoc);
9142 return(-1);
9143 }
9144 nodePush(ctxt, newDoc->children);
9145 if (ctx->myDoc == NULL) {
9146 ctxt->myDoc = newDoc;
9147 } else {
9148 ctxt->myDoc = ctx->myDoc;
9149 newDoc->children->doc = ctx->myDoc;
9150 }
9151
Daniel Veillard87a764e2001-06-20 17:41:10 +00009152 /*
9153 * Get the 4 first bytes and decode the charset
9154 * if enc != XML_CHAR_ENCODING_NONE
9155 * plug some encoding conversion routines.
9156 */
9157 GROW
9158 start[0] = RAW;
9159 start[1] = NXT(1);
9160 start[2] = NXT(2);
9161 start[3] = NXT(3);
9162 enc = xmlDetectCharEncoding(start, 4);
9163 if (enc != XML_CHAR_ENCODING_NONE) {
9164 xmlSwitchEncoding(ctxt, enc);
9165 }
9166
Owen Taylor3473f882001-02-23 17:55:21 +00009167 /*
9168 * Parse a possible text declaration first
9169 */
Owen Taylor3473f882001-02-23 17:55:21 +00009170 if ((RAW == '<') && (NXT(1) == '?') &&
9171 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9172 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9173 xmlParseTextDecl(ctxt);
9174 }
9175
9176 /*
9177 * Doing validity checking on chunk doesn't make sense
9178 */
9179 ctxt->instate = XML_PARSER_CONTENT;
9180 ctxt->validate = ctx->validate;
9181 ctxt->loadsubset = ctx->loadsubset;
9182 ctxt->depth = ctx->depth + 1;
9183 ctxt->replaceEntities = ctx->replaceEntities;
9184 if (ctxt->validate) {
9185 ctxt->vctxt.error = ctx->vctxt.error;
9186 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009187 } else {
9188 ctxt->vctxt.error = NULL;
9189 ctxt->vctxt.warning = NULL;
9190 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009191 ctxt->vctxt.nodeTab = NULL;
9192 ctxt->vctxt.nodeNr = 0;
9193 ctxt->vctxt.nodeMax = 0;
9194 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009195
9196 xmlParseContent(ctxt);
9197
9198 if ((RAW == '<') && (NXT(1) == '/')) {
9199 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9201 ctxt->sax->error(ctxt->userData,
9202 "chunk is not well balanced\n");
9203 ctxt->wellFormed = 0;
9204 ctxt->disableSAX = 1;
9205 } else if (RAW != 0) {
9206 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9208 ctxt->sax->error(ctxt->userData,
9209 "extra content at the end of well balanced chunk\n");
9210 ctxt->wellFormed = 0;
9211 ctxt->disableSAX = 1;
9212 }
9213 if (ctxt->node != newDoc->children) {
9214 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9216 ctxt->sax->error(ctxt->userData,
9217 "chunk is not well balanced\n");
9218 ctxt->wellFormed = 0;
9219 ctxt->disableSAX = 1;
9220 }
9221
9222 if (!ctxt->wellFormed) {
9223 if (ctxt->errNo == 0)
9224 ret = 1;
9225 else
9226 ret = ctxt->errNo;
9227 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009228 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009229 xmlNodePtr cur;
9230
9231 /*
9232 * Return the newly created nodeset after unlinking it from
9233 * they pseudo parent.
9234 */
9235 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009236 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009237 while (cur != NULL) {
9238 cur->parent = NULL;
9239 cur = cur->next;
9240 }
9241 newDoc->children->children = NULL;
9242 }
9243 ret = 0;
9244 }
9245 ctxt->sax = oldsax;
9246 xmlFreeParserCtxt(ctxt);
9247 newDoc->intSubset = NULL;
9248 newDoc->extSubset = NULL;
9249 xmlFreeDoc(newDoc);
9250
9251 return(ret);
9252}
9253
9254/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009255 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009256 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009257 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009258 * @sax: the SAX handler bloc (possibly NULL)
9259 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9260 * @depth: Used for loop detection, use 0
9261 * @URL: the URL for the entity to load
9262 * @ID: the System ID for the entity to load
9263 * @list: the return value for the set of parsed nodes
9264 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009265 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009266 *
9267 * Returns 0 if the entity is well formed, -1 in case of args problem and
9268 * the parser error code otherwise
9269 */
9270
Daniel Veillard257d9102001-05-08 10:41:44 +00009271static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009272xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9273 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009274 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009275 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009276 xmlParserCtxtPtr ctxt;
9277 xmlDocPtr newDoc;
9278 xmlSAXHandlerPtr oldsax = NULL;
9279 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009280 xmlChar start[4];
9281 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009282
9283 if (depth > 40) {
9284 return(XML_ERR_ENTITY_LOOP);
9285 }
9286
9287
9288
9289 if (list != NULL)
9290 *list = NULL;
9291 if ((URL == NULL) && (ID == NULL))
9292 return(-1);
9293 if (doc == NULL) /* @@ relax but check for dereferences */
9294 return(-1);
9295
9296
9297 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9298 if (ctxt == NULL) return(-1);
9299 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009300 if (oldctxt != NULL) {
9301 ctxt->_private = oldctxt->_private;
9302 ctxt->loadsubset = oldctxt->loadsubset;
9303 ctxt->validate = oldctxt->validate;
9304 ctxt->external = oldctxt->external;
9305 } else {
9306 /*
9307 * Doing validity checking on chunk without context
9308 * doesn't make sense
9309 */
9310 ctxt->_private = NULL;
9311 ctxt->validate = 0;
9312 ctxt->external = 2;
9313 ctxt->loadsubset = 0;
9314 }
Owen Taylor3473f882001-02-23 17:55:21 +00009315 if (sax != NULL) {
9316 oldsax = ctxt->sax;
9317 ctxt->sax = sax;
9318 if (user_data != NULL)
9319 ctxt->userData = user_data;
9320 }
9321 newDoc = xmlNewDoc(BAD_CAST "1.0");
9322 if (newDoc == NULL) {
9323 xmlFreeParserCtxt(ctxt);
9324 return(-1);
9325 }
9326 if (doc != NULL) {
9327 newDoc->intSubset = doc->intSubset;
9328 newDoc->extSubset = doc->extSubset;
9329 }
9330 if (doc->URL != NULL) {
9331 newDoc->URL = xmlStrdup(doc->URL);
9332 }
9333 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9334 if (newDoc->children == NULL) {
9335 if (sax != NULL)
9336 ctxt->sax = oldsax;
9337 xmlFreeParserCtxt(ctxt);
9338 newDoc->intSubset = NULL;
9339 newDoc->extSubset = NULL;
9340 xmlFreeDoc(newDoc);
9341 return(-1);
9342 }
9343 nodePush(ctxt, newDoc->children);
9344 if (doc == NULL) {
9345 ctxt->myDoc = newDoc;
9346 } else {
9347 ctxt->myDoc = doc;
9348 newDoc->children->doc = doc;
9349 }
9350
Daniel Veillard87a764e2001-06-20 17:41:10 +00009351 /*
9352 * Get the 4 first bytes and decode the charset
9353 * if enc != XML_CHAR_ENCODING_NONE
9354 * plug some encoding conversion routines.
9355 */
9356 GROW;
9357 start[0] = RAW;
9358 start[1] = NXT(1);
9359 start[2] = NXT(2);
9360 start[3] = NXT(3);
9361 enc = xmlDetectCharEncoding(start, 4);
9362 if (enc != XML_CHAR_ENCODING_NONE) {
9363 xmlSwitchEncoding(ctxt, enc);
9364 }
9365
Owen Taylor3473f882001-02-23 17:55:21 +00009366 /*
9367 * Parse a possible text declaration first
9368 */
Owen Taylor3473f882001-02-23 17:55:21 +00009369 if ((RAW == '<') && (NXT(1) == '?') &&
9370 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9371 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9372 xmlParseTextDecl(ctxt);
9373 }
9374
Owen Taylor3473f882001-02-23 17:55:21 +00009375 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009376 ctxt->depth = depth;
9377
9378 xmlParseContent(ctxt);
9379
9380 if ((RAW == '<') && (NXT(1) == '/')) {
9381 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9383 ctxt->sax->error(ctxt->userData,
9384 "chunk is not well balanced\n");
9385 ctxt->wellFormed = 0;
9386 ctxt->disableSAX = 1;
9387 } else if (RAW != 0) {
9388 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9389 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9390 ctxt->sax->error(ctxt->userData,
9391 "extra content at the end of well balanced chunk\n");
9392 ctxt->wellFormed = 0;
9393 ctxt->disableSAX = 1;
9394 }
9395 if (ctxt->node != newDoc->children) {
9396 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9397 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9398 ctxt->sax->error(ctxt->userData,
9399 "chunk is not well balanced\n");
9400 ctxt->wellFormed = 0;
9401 ctxt->disableSAX = 1;
9402 }
9403
9404 if (!ctxt->wellFormed) {
9405 if (ctxt->errNo == 0)
9406 ret = 1;
9407 else
9408 ret = ctxt->errNo;
9409 } else {
9410 if (list != NULL) {
9411 xmlNodePtr cur;
9412
9413 /*
9414 * Return the newly created nodeset after unlinking it from
9415 * they pseudo parent.
9416 */
9417 cur = newDoc->children->children;
9418 *list = cur;
9419 while (cur != NULL) {
9420 cur->parent = NULL;
9421 cur = cur->next;
9422 }
9423 newDoc->children->children = NULL;
9424 }
9425 ret = 0;
9426 }
9427 if (sax != NULL)
9428 ctxt->sax = oldsax;
9429 xmlFreeParserCtxt(ctxt);
9430 newDoc->intSubset = NULL;
9431 newDoc->extSubset = NULL;
9432 xmlFreeDoc(newDoc);
9433
9434 return(ret);
9435}
9436
9437/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009438 * xmlParseExternalEntity:
9439 * @doc: the document the chunk pertains to
9440 * @sax: the SAX handler bloc (possibly NULL)
9441 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9442 * @depth: Used for loop detection, use 0
9443 * @URL: the URL for the entity to load
9444 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009445 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009446 *
9447 * Parse an external general entity
9448 * An external general parsed entity is well-formed if it matches the
9449 * production labeled extParsedEnt.
9450 *
9451 * [78] extParsedEnt ::= TextDecl? content
9452 *
9453 * Returns 0 if the entity is well formed, -1 in case of args problem and
9454 * the parser error code otherwise
9455 */
9456
9457int
9458xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009459 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009460 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009461 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009462}
9463
9464/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009465 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009466 * @doc: the document the chunk pertains to
9467 * @sax: the SAX handler bloc (possibly NULL)
9468 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9469 * @depth: Used for loop detection, use 0
9470 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009471 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009472 *
9473 * Parse a well-balanced chunk of an XML document
9474 * called by the parser
9475 * The allowed sequence for the Well Balanced Chunk is the one defined by
9476 * the content production in the XML grammar:
9477 *
9478 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9479 *
9480 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9481 * the parser error code otherwise
9482 */
9483
9484int
9485xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009486 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009487 xmlParserCtxtPtr ctxt;
9488 xmlDocPtr newDoc;
9489 xmlSAXHandlerPtr oldsax = NULL;
9490 int size;
9491 int ret = 0;
9492
9493 if (depth > 40) {
9494 return(XML_ERR_ENTITY_LOOP);
9495 }
9496
9497
Daniel Veillardcda96922001-08-21 10:56:31 +00009498 if (lst != NULL)
9499 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009500 if (string == NULL)
9501 return(-1);
9502
9503 size = xmlStrlen(string);
9504
9505 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9506 if (ctxt == NULL) return(-1);
9507 ctxt->userData = ctxt;
9508 if (sax != NULL) {
9509 oldsax = ctxt->sax;
9510 ctxt->sax = sax;
9511 if (user_data != NULL)
9512 ctxt->userData = user_data;
9513 }
9514 newDoc = xmlNewDoc(BAD_CAST "1.0");
9515 if (newDoc == NULL) {
9516 xmlFreeParserCtxt(ctxt);
9517 return(-1);
9518 }
9519 if (doc != NULL) {
9520 newDoc->intSubset = doc->intSubset;
9521 newDoc->extSubset = doc->extSubset;
9522 }
9523 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9524 if (newDoc->children == NULL) {
9525 if (sax != NULL)
9526 ctxt->sax = oldsax;
9527 xmlFreeParserCtxt(ctxt);
9528 newDoc->intSubset = NULL;
9529 newDoc->extSubset = NULL;
9530 xmlFreeDoc(newDoc);
9531 return(-1);
9532 }
9533 nodePush(ctxt, newDoc->children);
9534 if (doc == NULL) {
9535 ctxt->myDoc = newDoc;
9536 } else {
9537 ctxt->myDoc = doc;
9538 newDoc->children->doc = doc;
9539 }
9540 ctxt->instate = XML_PARSER_CONTENT;
9541 ctxt->depth = depth;
9542
9543 /*
9544 * Doing validity checking on chunk doesn't make sense
9545 */
9546 ctxt->validate = 0;
9547 ctxt->loadsubset = 0;
9548
9549 xmlParseContent(ctxt);
9550
9551 if ((RAW == '<') && (NXT(1) == '/')) {
9552 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9554 ctxt->sax->error(ctxt->userData,
9555 "chunk is not well balanced\n");
9556 ctxt->wellFormed = 0;
9557 ctxt->disableSAX = 1;
9558 } else if (RAW != 0) {
9559 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9561 ctxt->sax->error(ctxt->userData,
9562 "extra content at the end of well balanced chunk\n");
9563 ctxt->wellFormed = 0;
9564 ctxt->disableSAX = 1;
9565 }
9566 if (ctxt->node != newDoc->children) {
9567 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9569 ctxt->sax->error(ctxt->userData,
9570 "chunk is not well balanced\n");
9571 ctxt->wellFormed = 0;
9572 ctxt->disableSAX = 1;
9573 }
9574
9575 if (!ctxt->wellFormed) {
9576 if (ctxt->errNo == 0)
9577 ret = 1;
9578 else
9579 ret = ctxt->errNo;
9580 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009581 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009582 xmlNodePtr cur;
9583
9584 /*
9585 * Return the newly created nodeset after unlinking it from
9586 * they pseudo parent.
9587 */
9588 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009589 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009590 while (cur != NULL) {
9591 cur->parent = NULL;
9592 cur = cur->next;
9593 }
9594 newDoc->children->children = NULL;
9595 }
9596 ret = 0;
9597 }
9598 if (sax != NULL)
9599 ctxt->sax = oldsax;
9600 xmlFreeParserCtxt(ctxt);
9601 newDoc->intSubset = NULL;
9602 newDoc->extSubset = NULL;
9603 xmlFreeDoc(newDoc);
9604
9605 return(ret);
9606}
9607
9608/**
9609 * xmlSAXParseEntity:
9610 * @sax: the SAX handler block
9611 * @filename: the filename
9612 *
9613 * parse an XML external entity out of context and build a tree.
9614 * It use the given SAX function block to handle the parsing callback.
9615 * If sax is NULL, fallback to the default DOM tree building routines.
9616 *
9617 * [78] extParsedEnt ::= TextDecl? content
9618 *
9619 * This correspond to a "Well Balanced" chunk
9620 *
9621 * Returns the resulting document tree
9622 */
9623
9624xmlDocPtr
9625xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9626 xmlDocPtr ret;
9627 xmlParserCtxtPtr ctxt;
9628 char *directory = NULL;
9629
9630 ctxt = xmlCreateFileParserCtxt(filename);
9631 if (ctxt == NULL) {
9632 return(NULL);
9633 }
9634 if (sax != NULL) {
9635 if (ctxt->sax != NULL)
9636 xmlFree(ctxt->sax);
9637 ctxt->sax = sax;
9638 ctxt->userData = NULL;
9639 }
9640
9641 if ((ctxt->directory == NULL) && (directory == NULL))
9642 directory = xmlParserGetDirectory(filename);
9643
9644 xmlParseExtParsedEnt(ctxt);
9645
9646 if (ctxt->wellFormed)
9647 ret = ctxt->myDoc;
9648 else {
9649 ret = NULL;
9650 xmlFreeDoc(ctxt->myDoc);
9651 ctxt->myDoc = NULL;
9652 }
9653 if (sax != NULL)
9654 ctxt->sax = NULL;
9655 xmlFreeParserCtxt(ctxt);
9656
9657 return(ret);
9658}
9659
9660/**
9661 * xmlParseEntity:
9662 * @filename: the filename
9663 *
9664 * parse an XML external entity out of context and build a tree.
9665 *
9666 * [78] extParsedEnt ::= TextDecl? content
9667 *
9668 * This correspond to a "Well Balanced" chunk
9669 *
9670 * Returns the resulting document tree
9671 */
9672
9673xmlDocPtr
9674xmlParseEntity(const char *filename) {
9675 return(xmlSAXParseEntity(NULL, filename));
9676}
9677
9678/**
9679 * xmlCreateEntityParserCtxt:
9680 * @URL: the entity URL
9681 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009682 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009683 *
9684 * Create a parser context for an external entity
9685 * Automatic support for ZLIB/Compress compressed document is provided
9686 * by default if found at compile-time.
9687 *
9688 * Returns the new parser context or NULL
9689 */
9690xmlParserCtxtPtr
9691xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9692 const xmlChar *base) {
9693 xmlParserCtxtPtr ctxt;
9694 xmlParserInputPtr inputStream;
9695 char *directory = NULL;
9696 xmlChar *uri;
9697
9698 ctxt = xmlNewParserCtxt();
9699 if (ctxt == NULL) {
9700 return(NULL);
9701 }
9702
9703 uri = xmlBuildURI(URL, base);
9704
9705 if (uri == NULL) {
9706 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9707 if (inputStream == NULL) {
9708 xmlFreeParserCtxt(ctxt);
9709 return(NULL);
9710 }
9711
9712 inputPush(ctxt, inputStream);
9713
9714 if ((ctxt->directory == NULL) && (directory == NULL))
9715 directory = xmlParserGetDirectory((char *)URL);
9716 if ((ctxt->directory == NULL) && (directory != NULL))
9717 ctxt->directory = directory;
9718 } else {
9719 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9720 if (inputStream == NULL) {
9721 xmlFree(uri);
9722 xmlFreeParserCtxt(ctxt);
9723 return(NULL);
9724 }
9725
9726 inputPush(ctxt, inputStream);
9727
9728 if ((ctxt->directory == NULL) && (directory == NULL))
9729 directory = xmlParserGetDirectory((char *)uri);
9730 if ((ctxt->directory == NULL) && (directory != NULL))
9731 ctxt->directory = directory;
9732 xmlFree(uri);
9733 }
9734
9735 return(ctxt);
9736}
9737
9738/************************************************************************
9739 * *
9740 * Front ends when parsing from a file *
9741 * *
9742 ************************************************************************/
9743
9744/**
9745 * xmlCreateFileParserCtxt:
9746 * @filename: the filename
9747 *
9748 * Create a parser context for a file content.
9749 * Automatic support for ZLIB/Compress compressed document is provided
9750 * by default if found at compile-time.
9751 *
9752 * Returns the new parser context or NULL
9753 */
9754xmlParserCtxtPtr
9755xmlCreateFileParserCtxt(const char *filename)
9756{
9757 xmlParserCtxtPtr ctxt;
9758 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009759 char *directory = NULL;
9760
Owen Taylor3473f882001-02-23 17:55:21 +00009761 ctxt = xmlNewParserCtxt();
9762 if (ctxt == NULL) {
9763 if (xmlDefaultSAXHandler.error != NULL) {
9764 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9765 }
9766 return(NULL);
9767 }
9768
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009769 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009770 if (inputStream == NULL) {
9771 xmlFreeParserCtxt(ctxt);
9772 return(NULL);
9773 }
9774
Owen Taylor3473f882001-02-23 17:55:21 +00009775 inputPush(ctxt, inputStream);
9776 if ((ctxt->directory == NULL) && (directory == NULL))
9777 directory = xmlParserGetDirectory(filename);
9778 if ((ctxt->directory == NULL) && (directory != NULL))
9779 ctxt->directory = directory;
9780
9781 return(ctxt);
9782}
9783
9784/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009785 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009786 * @sax: the SAX handler block
9787 * @filename: the filename
9788 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9789 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009790 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009791 *
9792 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9793 * compressed document is provided by default if found at compile-time.
9794 * It use the given SAX function block to handle the parsing callback.
9795 * If sax is NULL, fallback to the default DOM tree building routines.
9796 *
Daniel Veillarda293c322001-10-02 13:54:14 +00009797 * User data (void *) is stored within the parser context, so it is
9798 * available nearly everywhere in libxml.
9799 *
Owen Taylor3473f882001-02-23 17:55:21 +00009800 * Returns the resulting document tree
9801 */
9802
9803xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009804xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9805 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009806 xmlDocPtr ret;
9807 xmlParserCtxtPtr ctxt;
9808 char *directory = NULL;
9809
Daniel Veillard635ef722001-10-29 11:48:19 +00009810 xmlInitParser();
9811
Owen Taylor3473f882001-02-23 17:55:21 +00009812 ctxt = xmlCreateFileParserCtxt(filename);
9813 if (ctxt == NULL) {
9814 return(NULL);
9815 }
9816 if (sax != NULL) {
9817 if (ctxt->sax != NULL)
9818 xmlFree(ctxt->sax);
9819 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009820 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009821 if (data!=NULL) {
9822 ctxt->_private=data;
9823 }
Owen Taylor3473f882001-02-23 17:55:21 +00009824
9825 if ((ctxt->directory == NULL) && (directory == NULL))
9826 directory = xmlParserGetDirectory(filename);
9827 if ((ctxt->directory == NULL) && (directory != NULL))
9828 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9829
9830 xmlParseDocument(ctxt);
9831
9832 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9833 else {
9834 ret = NULL;
9835 xmlFreeDoc(ctxt->myDoc);
9836 ctxt->myDoc = NULL;
9837 }
9838 if (sax != NULL)
9839 ctxt->sax = NULL;
9840 xmlFreeParserCtxt(ctxt);
9841
9842 return(ret);
9843}
9844
9845/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009846 * xmlSAXParseFile:
9847 * @sax: the SAX handler block
9848 * @filename: the filename
9849 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9850 * documents
9851 *
9852 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9853 * compressed document is provided by default if found at compile-time.
9854 * It use the given SAX function block to handle the parsing callback.
9855 * If sax is NULL, fallback to the default DOM tree building routines.
9856 *
9857 * Returns the resulting document tree
9858 */
9859
9860xmlDocPtr
9861xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9862 int recovery) {
9863 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
9864}
9865
9866/**
Owen Taylor3473f882001-02-23 17:55:21 +00009867 * xmlRecoverDoc:
9868 * @cur: a pointer to an array of xmlChar
9869 *
9870 * parse an XML in-memory document and build a tree.
9871 * In the case the document is not Well Formed, a tree is built anyway
9872 *
9873 * Returns the resulting document tree
9874 */
9875
9876xmlDocPtr
9877xmlRecoverDoc(xmlChar *cur) {
9878 return(xmlSAXParseDoc(NULL, cur, 1));
9879}
9880
9881/**
9882 * xmlParseFile:
9883 * @filename: the filename
9884 *
9885 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9886 * compressed document is provided by default if found at compile-time.
9887 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +00009888 * Returns the resulting document tree if the file was wellformed,
9889 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +00009890 */
9891
9892xmlDocPtr
9893xmlParseFile(const char *filename) {
9894 return(xmlSAXParseFile(NULL, filename, 0));
9895}
9896
9897/**
9898 * xmlRecoverFile:
9899 * @filename: the filename
9900 *
9901 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9902 * compressed document is provided by default if found at compile-time.
9903 * In the case the document is not Well Formed, a tree is built anyway
9904 *
9905 * Returns the resulting document tree
9906 */
9907
9908xmlDocPtr
9909xmlRecoverFile(const char *filename) {
9910 return(xmlSAXParseFile(NULL, filename, 1));
9911}
9912
9913
9914/**
9915 * xmlSetupParserForBuffer:
9916 * @ctxt: an XML parser context
9917 * @buffer: a xmlChar * buffer
9918 * @filename: a file name
9919 *
9920 * Setup the parser context to parse a new buffer; Clears any prior
9921 * contents from the parser context. The buffer parameter must not be
9922 * NULL, but the filename parameter can be
9923 */
9924void
9925xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9926 const char* filename)
9927{
9928 xmlParserInputPtr input;
9929
9930 input = xmlNewInputStream(ctxt);
9931 if (input == NULL) {
9932 perror("malloc");
9933 xmlFree(ctxt);
9934 return;
9935 }
9936
9937 xmlClearParserCtxt(ctxt);
9938 if (filename != NULL)
9939 input->filename = xmlMemStrdup(filename);
9940 input->base = buffer;
9941 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009942 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009943 inputPush(ctxt, input);
9944}
9945
9946/**
9947 * xmlSAXUserParseFile:
9948 * @sax: a SAX handler
9949 * @user_data: The user data returned on SAX callbacks
9950 * @filename: a file name
9951 *
9952 * parse an XML file and call the given SAX handler routines.
9953 * Automatic support for ZLIB/Compress compressed document is provided
9954 *
9955 * Returns 0 in case of success or a error number otherwise
9956 */
9957int
9958xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9959 const char *filename) {
9960 int ret = 0;
9961 xmlParserCtxtPtr ctxt;
9962
9963 ctxt = xmlCreateFileParserCtxt(filename);
9964 if (ctxt == NULL) return -1;
9965 if (ctxt->sax != &xmlDefaultSAXHandler)
9966 xmlFree(ctxt->sax);
9967 ctxt->sax = sax;
9968 if (user_data != NULL)
9969 ctxt->userData = user_data;
9970
9971 xmlParseDocument(ctxt);
9972
9973 if (ctxt->wellFormed)
9974 ret = 0;
9975 else {
9976 if (ctxt->errNo != 0)
9977 ret = ctxt->errNo;
9978 else
9979 ret = -1;
9980 }
9981 if (sax != NULL)
9982 ctxt->sax = NULL;
9983 xmlFreeParserCtxt(ctxt);
9984
9985 return ret;
9986}
9987
9988/************************************************************************
9989 * *
9990 * Front ends when parsing from memory *
9991 * *
9992 ************************************************************************/
9993
9994/**
9995 * xmlCreateMemoryParserCtxt:
9996 * @buffer: a pointer to a char array
9997 * @size: the size of the array
9998 *
9999 * Create a parser context for an XML in-memory document.
10000 *
10001 * Returns the new parser context or NULL
10002 */
10003xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010004xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010005 xmlParserCtxtPtr ctxt;
10006 xmlParserInputPtr input;
10007 xmlParserInputBufferPtr buf;
10008
10009 if (buffer == NULL)
10010 return(NULL);
10011 if (size <= 0)
10012 return(NULL);
10013
10014 ctxt = xmlNewParserCtxt();
10015 if (ctxt == NULL)
10016 return(NULL);
10017
10018 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10019 if (buf == NULL) return(NULL);
10020
10021 input = xmlNewInputStream(ctxt);
10022 if (input == NULL) {
10023 xmlFreeParserCtxt(ctxt);
10024 return(NULL);
10025 }
10026
10027 input->filename = NULL;
10028 input->buf = buf;
10029 input->base = input->buf->buffer->content;
10030 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010031 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010032
10033 inputPush(ctxt, input);
10034 return(ctxt);
10035}
10036
10037/**
10038 * xmlSAXParseMemory:
10039 * @sax: the SAX handler block
10040 * @buffer: an pointer to a char array
10041 * @size: the size of the array
10042 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10043 * documents
10044 *
10045 * parse an XML in-memory block and use the given SAX function block
10046 * to handle the parsing callback. If sax is NULL, fallback to the default
10047 * DOM tree building routines.
10048 *
10049 * Returns the resulting document tree
10050 */
10051xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010052xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10053 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010054 xmlDocPtr ret;
10055 xmlParserCtxtPtr ctxt;
10056
10057 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10058 if (ctxt == NULL) return(NULL);
10059 if (sax != NULL) {
10060 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010061 }
10062
10063 xmlParseDocument(ctxt);
10064
10065 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10066 else {
10067 ret = NULL;
10068 xmlFreeDoc(ctxt->myDoc);
10069 ctxt->myDoc = NULL;
10070 }
10071 if (sax != NULL)
10072 ctxt->sax = NULL;
10073 xmlFreeParserCtxt(ctxt);
10074
10075 return(ret);
10076}
10077
10078/**
10079 * xmlParseMemory:
10080 * @buffer: an pointer to a char array
10081 * @size: the size of the array
10082 *
10083 * parse an XML in-memory block and build a tree.
10084 *
10085 * Returns the resulting document tree
10086 */
10087
Daniel Veillard50822cb2001-07-26 20:05:51 +000010088xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010089 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10090}
10091
10092/**
10093 * xmlRecoverMemory:
10094 * @buffer: an pointer to a char array
10095 * @size: the size of the array
10096 *
10097 * parse an XML in-memory block and build a tree.
10098 * In the case the document is not Well Formed, a tree is built anyway
10099 *
10100 * Returns the resulting document tree
10101 */
10102
Daniel Veillard50822cb2001-07-26 20:05:51 +000010103xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010104 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10105}
10106
10107/**
10108 * xmlSAXUserParseMemory:
10109 * @sax: a SAX handler
10110 * @user_data: The user data returned on SAX callbacks
10111 * @buffer: an in-memory XML document input
10112 * @size: the length of the XML document in bytes
10113 *
10114 * A better SAX parsing routine.
10115 * parse an XML in-memory buffer and call the given SAX handler routines.
10116 *
10117 * Returns 0 in case of success or a error number otherwise
10118 */
10119int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010120 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010121 int ret = 0;
10122 xmlParserCtxtPtr ctxt;
10123 xmlSAXHandlerPtr oldsax = NULL;
10124
10125 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10126 if (ctxt == NULL) return -1;
10127 if (sax != NULL) {
10128 oldsax = ctxt->sax;
10129 ctxt->sax = sax;
10130 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010131 if (user_data != NULL)
10132 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010133
10134 xmlParseDocument(ctxt);
10135
10136 if (ctxt->wellFormed)
10137 ret = 0;
10138 else {
10139 if (ctxt->errNo != 0)
10140 ret = ctxt->errNo;
10141 else
10142 ret = -1;
10143 }
10144 if (sax != NULL) {
10145 ctxt->sax = oldsax;
10146 }
10147 xmlFreeParserCtxt(ctxt);
10148
10149 return ret;
10150}
10151
10152/**
10153 * xmlCreateDocParserCtxt:
10154 * @cur: a pointer to an array of xmlChar
10155 *
10156 * Creates a parser context for an XML in-memory document.
10157 *
10158 * Returns the new parser context or NULL
10159 */
10160xmlParserCtxtPtr
10161xmlCreateDocParserCtxt(xmlChar *cur) {
10162 int len;
10163
10164 if (cur == NULL)
10165 return(NULL);
10166 len = xmlStrlen(cur);
10167 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10168}
10169
10170/**
10171 * xmlSAXParseDoc:
10172 * @sax: the SAX handler block
10173 * @cur: a pointer to an array of xmlChar
10174 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10175 * documents
10176 *
10177 * parse an XML in-memory document and build a tree.
10178 * It use the given SAX function block to handle the parsing callback.
10179 * If sax is NULL, fallback to the default DOM tree building routines.
10180 *
10181 * Returns the resulting document tree
10182 */
10183
10184xmlDocPtr
10185xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10186 xmlDocPtr ret;
10187 xmlParserCtxtPtr ctxt;
10188
10189 if (cur == NULL) return(NULL);
10190
10191
10192 ctxt = xmlCreateDocParserCtxt(cur);
10193 if (ctxt == NULL) return(NULL);
10194 if (sax != NULL) {
10195 ctxt->sax = sax;
10196 ctxt->userData = NULL;
10197 }
10198
10199 xmlParseDocument(ctxt);
10200 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10201 else {
10202 ret = NULL;
10203 xmlFreeDoc(ctxt->myDoc);
10204 ctxt->myDoc = NULL;
10205 }
10206 if (sax != NULL)
10207 ctxt->sax = NULL;
10208 xmlFreeParserCtxt(ctxt);
10209
10210 return(ret);
10211}
10212
10213/**
10214 * xmlParseDoc:
10215 * @cur: a pointer to an array of xmlChar
10216 *
10217 * parse an XML in-memory document and build a tree.
10218 *
10219 * Returns the resulting document tree
10220 */
10221
10222xmlDocPtr
10223xmlParseDoc(xmlChar *cur) {
10224 return(xmlSAXParseDoc(NULL, cur, 0));
10225}
10226
10227
10228/************************************************************************
10229 * *
10230 * Miscellaneous *
10231 * *
10232 ************************************************************************/
10233
10234#ifdef LIBXML_XPATH_ENABLED
10235#include <libxml/xpath.h>
10236#endif
10237
10238static int xmlParserInitialized = 0;
10239
10240/**
10241 * xmlInitParser:
10242 *
10243 * Initialization function for the XML parser.
10244 * This is not reentrant. Call once before processing in case of
10245 * use in multithreaded programs.
10246 */
10247
10248void
10249xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010250 if (xmlParserInitialized != 0)
10251 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010252
Daniel Veillard6f350292001-10-14 09:56:15 +000010253 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010254 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010255 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010256 xmlInitCharEncodingHandlers();
10257 xmlInitializePredefinedEntities();
10258 xmlDefaultSAXHandlerInit();
10259 xmlRegisterDefaultInputCallbacks();
10260 xmlRegisterDefaultOutputCallbacks();
10261#ifdef LIBXML_HTML_ENABLED
10262 htmlInitAutoClose();
10263 htmlDefaultSAXHandlerInit();
10264#endif
10265#ifdef LIBXML_XPATH_ENABLED
10266 xmlXPathInit();
10267#endif
10268 xmlParserInitialized = 1;
10269}
10270
10271/**
10272 * xmlCleanupParser:
10273 *
10274 * Cleanup function for the XML parser. It tries to reclaim all
10275 * parsing related global memory allocated for the parser processing.
10276 * It doesn't deallocate any document related memory. Calling this
10277 * function should not prevent reusing the parser.
10278 */
10279
10280void
10281xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010282 xmlCleanupCharEncodingHandlers();
10283 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010284#ifdef LIBXML_CATALOG_ENABLED
10285 xmlCatalogCleanup();
10286#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010287 xmlCleanupThreads();
10288 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010289}