blob: f68c4242e08d871aeac9ce29114b43eed7a33ea7 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Daniel Veillard328f48c2002-11-15 15:24:34 +0000108static int
109xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
110 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000111/************************************************************************
112 * *
113 * Parser stacks related functions and macros *
114 * *
115 ************************************************************************/
116
117xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
118 const xmlChar ** str);
119
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000120/**
121 * inputPush:
122 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000123 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000124 *
125 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000126 *
127 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000129extern int
130inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
131{
132 if (ctxt->inputNr >= ctxt->inputMax) {
133 ctxt->inputMax *= 2;
134 ctxt->inputTab =
135 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
136 ctxt->inputMax *
137 sizeof(ctxt->inputTab[0]));
138 if (ctxt->inputTab == NULL) {
139 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
140 return (0);
141 }
142 }
143 ctxt->inputTab[ctxt->inputNr] = value;
144 ctxt->input = value;
145 return (ctxt->inputNr++);
146}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000148 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000149 * @ctxt: an XML parser context
150 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000151 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000152 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000153 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000154 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000155extern xmlParserInputPtr
156inputPop(xmlParserCtxtPtr ctxt)
157{
158 xmlParserInputPtr ret;
159
160 if (ctxt->inputNr <= 0)
161 return (0);
162 ctxt->inputNr--;
163 if (ctxt->inputNr > 0)
164 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
165 else
166 ctxt->input = NULL;
167 ret = ctxt->inputTab[ctxt->inputNr];
168 ctxt->inputTab[ctxt->inputNr] = 0;
169 return (ret);
170}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000171/**
172 * nodePush:
173 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000174 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000175 *
176 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000177 *
178 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000180extern int
181nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
182{
183 if (ctxt->nodeNr >= ctxt->nodeMax) {
184 ctxt->nodeMax *= 2;
185 ctxt->nodeTab =
186 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
187 ctxt->nodeMax *
188 sizeof(ctxt->nodeTab[0]));
189 if (ctxt->nodeTab == NULL) {
190 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
191 return (0);
192 }
193 }
194 ctxt->nodeTab[ctxt->nodeNr] = value;
195 ctxt->node = value;
196 return (ctxt->nodeNr++);
197}
198/**
199 * nodePop:
200 * @ctxt: an XML parser context
201 *
202 * Pops the top element node from the node stack
203 *
204 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000205 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000206extern xmlNodePtr
207nodePop(xmlParserCtxtPtr ctxt)
208{
209 xmlNodePtr ret;
210
211 if (ctxt->nodeNr <= 0)
212 return (0);
213 ctxt->nodeNr--;
214 if (ctxt->nodeNr > 0)
215 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
216 else
217 ctxt->node = NULL;
218 ret = ctxt->nodeTab[ctxt->nodeNr];
219 ctxt->nodeTab[ctxt->nodeNr] = 0;
220 return (ret);
221}
222/**
223 * namePush:
224 * @ctxt: an XML parser context
225 * @value: the element name
226 *
227 * Pushes a new element name on top of the name stack
228 *
229 * Returns 0 in case of error, the index in the stack otherwise
230 */
231extern int
232namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
233{
234 if (ctxt->nameNr >= ctxt->nameMax) {
235 ctxt->nameMax *= 2;
236 ctxt->nameTab =
237 (xmlChar * *)xmlRealloc(ctxt->nameTab,
238 ctxt->nameMax *
239 sizeof(ctxt->nameTab[0]));
240 if (ctxt->nameTab == NULL) {
241 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
242 return (0);
243 }
244 }
245 ctxt->nameTab[ctxt->nameNr] = value;
246 ctxt->name = value;
247 return (ctxt->nameNr++);
248}
249/**
250 * namePop:
251 * @ctxt: an XML parser context
252 *
253 * Pops the top element name from the name stack
254 *
255 * Returns the name just removed
256 */
257extern xmlChar *
258namePop(xmlParserCtxtPtr ctxt)
259{
260 xmlChar *ret;
261
262 if (ctxt->nameNr <= 0)
263 return (0);
264 ctxt->nameNr--;
265 if (ctxt->nameNr > 0)
266 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
267 else
268 ctxt->name = NULL;
269 ret = ctxt->nameTab[ctxt->nameNr];
270 ctxt->nameTab[ctxt->nameNr] = 0;
271 return (ret);
272}
Owen Taylor3473f882001-02-23 17:55:21 +0000273
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000274static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000275 if (ctxt->spaceNr >= ctxt->spaceMax) {
276 ctxt->spaceMax *= 2;
277 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
278 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
279 if (ctxt->spaceTab == NULL) {
280 xmlGenericError(xmlGenericErrorContext,
281 "realloc failed !\n");
282 return(0);
283 }
284 }
285 ctxt->spaceTab[ctxt->spaceNr] = val;
286 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
287 return(ctxt->spaceNr++);
288}
289
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000290static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000291 int ret;
292 if (ctxt->spaceNr <= 0) return(0);
293 ctxt->spaceNr--;
294 if (ctxt->spaceNr > 0)
295 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
296 else
297 ctxt->space = NULL;
298 ret = ctxt->spaceTab[ctxt->spaceNr];
299 ctxt->spaceTab[ctxt->spaceNr] = -1;
300 return(ret);
301}
302
303/*
304 * Macros for accessing the content. Those should be used only by the parser,
305 * and not exported.
306 *
307 * Dirty macros, i.e. one often need to make assumption on the context to
308 * use them
309 *
310 * CUR_PTR return the current pointer to the xmlChar to be parsed.
311 * To be used with extreme caution since operations consuming
312 * characters may move the input buffer to a different location !
313 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
314 * This should be used internally by the parser
315 * only to compare to ASCII values otherwise it would break when
316 * running with UTF-8 encoding.
317 * RAW same as CUR but in the input buffer, bypass any token
318 * extraction that may have been done
319 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
320 * to compare on ASCII based substring.
321 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
322 * strings within the parser.
323 *
324 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
325 *
326 * NEXT Skip to the next character, this does the proper decoding
327 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000328 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000329 * CUR_CHAR(l) returns the current unicode character (int), set l
330 * to the number of xmlChars used for the encoding [0-5].
331 * CUR_SCHAR same but operate on a string instead of the context
332 * COPY_BUF copy the current unicode char to the target buffer, increment
333 * the index
334 * GROW, SHRINK handling of input buffers
335 */
336
Daniel Veillardfdc91562002-07-01 21:52:03 +0000337#define RAW (*ctxt->input->cur)
338#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000339#define NXT(val) ctxt->input->cur[(val)]
340#define CUR_PTR ctxt->input->cur
341
342#define SKIP(val) do { \
343 ctxt->nbChars += (val),ctxt->input->cur += (val); \
344 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000345 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000346 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
347 xmlPopInput(ctxt); \
348 } while (0)
349
Daniel Veillard46de64e2002-05-29 08:21:33 +0000350#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
351 xmlSHRINK (ctxt);
352
353static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
354 xmlParserInputShrink(ctxt->input);
355 if ((*ctxt->input->cur == 0) &&
356 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
357 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000358 }
Owen Taylor3473f882001-02-23 17:55:21 +0000359
Daniel Veillard46de64e2002-05-29 08:21:33 +0000360#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
361 xmlGROW (ctxt);
362
363static void xmlGROW (xmlParserCtxtPtr ctxt) {
364 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
365 if ((*ctxt->input->cur == 0) &&
366 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
367 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000368 }
Owen Taylor3473f882001-02-23 17:55:21 +0000369
370#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
371
372#define NEXT xmlNextChar(ctxt)
373
Daniel Veillard21a0f912001-02-25 19:54:14 +0000374#define NEXT1 { \
375 ctxt->input->cur++; \
376 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000377 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000378 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
379 }
380
Owen Taylor3473f882001-02-23 17:55:21 +0000381#define NEXTL(l) do { \
382 if (*(ctxt->input->cur) == '\n') { \
383 ctxt->input->line++; ctxt->input->col = 1; \
384 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000385 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000386 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000387 } while (0)
388
389#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
390#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
391
392#define COPY_BUF(l,b,i,v) \
393 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000394 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000395
396/**
397 * xmlSkipBlankChars:
398 * @ctxt: the XML parser context
399 *
400 * skip all blanks character found at that point in the input streams.
401 * It pops up finished entities in the process if allowable at that point.
402 *
403 * Returns the number of space chars skipped
404 */
405
406int
407xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000408 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000409
410 /*
411 * It's Okay to use CUR/NEXT here since all the blanks are on
412 * the ASCII range.
413 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000414 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
415 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000416 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000417 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000418 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000419 cur = ctxt->input->cur;
420 while (IS_BLANK(*cur)) {
421 if (*cur == '\n') {
422 ctxt->input->line++; ctxt->input->col = 1;
423 }
424 cur++;
425 res++;
426 if (*cur == 0) {
427 ctxt->input->cur = cur;
428 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
429 cur = ctxt->input->cur;
430 }
431 }
432 ctxt->input->cur = cur;
433 } else {
434 int cur;
435 do {
436 cur = CUR;
437 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
438 NEXT;
439 cur = CUR;
440 res++;
441 }
442 while ((cur == 0) && (ctxt->inputNr > 1) &&
443 (ctxt->instate != XML_PARSER_COMMENT)) {
444 xmlPopInput(ctxt);
445 cur = CUR;
446 }
447 /*
448 * Need to handle support of entities branching here
449 */
450 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
451 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
452 }
Owen Taylor3473f882001-02-23 17:55:21 +0000453 return(res);
454}
455
456/************************************************************************
457 * *
458 * Commodity functions to handle entities *
459 * *
460 ************************************************************************/
461
462/**
463 * xmlPopInput:
464 * @ctxt: an XML parser context
465 *
466 * xmlPopInput: the current input pointed by ctxt->input came to an end
467 * pop it and return the next char.
468 *
469 * Returns the current xmlChar in the parser context
470 */
471xmlChar
472xmlPopInput(xmlParserCtxtPtr ctxt) {
473 if (ctxt->inputNr == 1) return(0); /* End of main Input */
474 if (xmlParserDebugEntities)
475 xmlGenericError(xmlGenericErrorContext,
476 "Popping input %d\n", ctxt->inputNr);
477 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000478 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000479 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
480 return(xmlPopInput(ctxt));
481 return(CUR);
482}
483
484/**
485 * xmlPushInput:
486 * @ctxt: an XML parser context
487 * @input: an XML parser input fragment (entity, XML fragment ...).
488 *
489 * xmlPushInput: switch to a new input stream which is stacked on top
490 * of the previous one(s).
491 */
492void
493xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
494 if (input == NULL) return;
495
496 if (xmlParserDebugEntities) {
497 if ((ctxt->input != NULL) && (ctxt->input->filename))
498 xmlGenericError(xmlGenericErrorContext,
499 "%s(%d): ", ctxt->input->filename,
500 ctxt->input->line);
501 xmlGenericError(xmlGenericErrorContext,
502 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
503 }
504 inputPush(ctxt, input);
505 GROW;
506}
507
508/**
509 * xmlParseCharRef:
510 * @ctxt: an XML parser context
511 *
512 * parse Reference declarations
513 *
514 * [66] CharRef ::= '&#' [0-9]+ ';' |
515 * '&#x' [0-9a-fA-F]+ ';'
516 *
517 * [ WFC: Legal Character ]
518 * Characters referred to using character references must match the
519 * production for Char.
520 *
521 * Returns the value parsed (as an int), 0 in case of error
522 */
523int
524xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000525 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000526 int count = 0;
527
Owen Taylor3473f882001-02-23 17:55:21 +0000528 /*
529 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
530 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000531 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000532 (NXT(2) == 'x')) {
533 SKIP(3);
534 GROW;
535 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000536 if (count++ > 20) {
537 count = 0;
538 GROW;
539 }
540 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000541 val = val * 16 + (CUR - '0');
542 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
543 val = val * 16 + (CUR - 'a') + 10;
544 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
545 val = val * 16 + (CUR - 'A') + 10;
546 else {
547 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
549 ctxt->sax->error(ctxt->userData,
550 "xmlParseCharRef: invalid hexadecimal value\n");
551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000553 val = 0;
554 break;
555 }
556 NEXT;
557 count++;
558 }
559 if (RAW == ';') {
560 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
561 ctxt->nbChars ++;
562 ctxt->input->cur++;
563 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000564 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000565 SKIP(2);
566 GROW;
567 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000568 if (count++ > 20) {
569 count = 0;
570 GROW;
571 }
572 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000573 val = val * 10 + (CUR - '0');
574 else {
575 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
577 ctxt->sax->error(ctxt->userData,
578 "xmlParseCharRef: invalid decimal value\n");
579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000581 val = 0;
582 break;
583 }
584 NEXT;
585 count++;
586 }
587 if (RAW == ';') {
588 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
589 ctxt->nbChars ++;
590 ctxt->input->cur++;
591 }
592 } else {
593 ctxt->errNo = XML_ERR_INVALID_CHARREF;
594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
595 ctxt->sax->error(ctxt->userData,
596 "xmlParseCharRef: invalid value\n");
597 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000599 }
600
601 /*
602 * [ WFC: Legal Character ]
603 * Characters referred to using character references must match the
604 * production for Char.
605 */
606 if (IS_CHAR(val)) {
607 return(val);
608 } else {
609 ctxt->errNo = XML_ERR_INVALID_CHAR;
610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000611 ctxt->sax->error(ctxt->userData,
612 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000613 val);
614 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000615 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000616 }
617 return(0);
618}
619
620/**
621 * xmlParseStringCharRef:
622 * @ctxt: an XML parser context
623 * @str: a pointer to an index in the string
624 *
625 * parse Reference declarations, variant parsing from a string rather
626 * than an an input flow.
627 *
628 * [66] CharRef ::= '&#' [0-9]+ ';' |
629 * '&#x' [0-9a-fA-F]+ ';'
630 *
631 * [ WFC: Legal Character ]
632 * Characters referred to using character references must match the
633 * production for Char.
634 *
635 * Returns the value parsed (as an int), 0 in case of error, str will be
636 * updated to the current value of the index
637 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000638static int
Owen Taylor3473f882001-02-23 17:55:21 +0000639xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
640 const xmlChar *ptr;
641 xmlChar cur;
642 int val = 0;
643
644 if ((str == NULL) || (*str == NULL)) return(0);
645 ptr = *str;
646 cur = *ptr;
647 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
648 ptr += 3;
649 cur = *ptr;
650 while (cur != ';') { /* Non input consuming loop */
651 if ((cur >= '0') && (cur <= '9'))
652 val = val * 16 + (cur - '0');
653 else if ((cur >= 'a') && (cur <= 'f'))
654 val = val * 16 + (cur - 'a') + 10;
655 else if ((cur >= 'A') && (cur <= 'F'))
656 val = val * 16 + (cur - 'A') + 10;
657 else {
658 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
660 ctxt->sax->error(ctxt->userData,
661 "xmlParseStringCharRef: invalid hexadecimal value\n");
662 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000663 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000664 val = 0;
665 break;
666 }
667 ptr++;
668 cur = *ptr;
669 }
670 if (cur == ';')
671 ptr++;
672 } else if ((cur == '&') && (ptr[1] == '#')){
673 ptr += 2;
674 cur = *ptr;
675 while (cur != ';') { /* Non input consuming loops */
676 if ((cur >= '0') && (cur <= '9'))
677 val = val * 10 + (cur - '0');
678 else {
679 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
681 ctxt->sax->error(ctxt->userData,
682 "xmlParseStringCharRef: invalid decimal value\n");
683 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000684 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000685 val = 0;
686 break;
687 }
688 ptr++;
689 cur = *ptr;
690 }
691 if (cur == ';')
692 ptr++;
693 } else {
694 ctxt->errNo = XML_ERR_INVALID_CHARREF;
695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
696 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000697 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000698 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000699 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000700 return(0);
701 }
702 *str = ptr;
703
704 /*
705 * [ WFC: Legal Character ]
706 * Characters referred to using character references must match the
707 * production for Char.
708 */
709 if (IS_CHAR(val)) {
710 return(val);
711 } else {
712 ctxt->errNo = XML_ERR_INVALID_CHAR;
713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
714 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000715 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000716 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000717 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000718 }
719 return(0);
720}
721
722/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000723 * xmlNewBlanksWrapperInputStream:
724 * @ctxt: an XML parser context
725 * @entity: an Entity pointer
726 *
727 * Create a new input stream for wrapping
728 * blanks around a PEReference
729 *
730 * Returns the new input stream or NULL
731 */
732
733static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
734
Daniel Veillardf4862f02002-09-10 11:13:43 +0000735static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000736xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
737 xmlParserInputPtr input;
738 xmlChar *buffer;
739 size_t length;
740 if (entity == NULL) {
741 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
743 ctxt->sax->error(ctxt->userData,
744 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
745 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
746 return(NULL);
747 }
748 if (xmlParserDebugEntities)
749 xmlGenericError(xmlGenericErrorContext,
750 "new blanks wrapper for entity: %s\n", entity->name);
751 input = xmlNewInputStream(ctxt);
752 if (input == NULL) {
753 return(NULL);
754 }
755 length = xmlStrlen(entity->name) + 5;
756 buffer = xmlMalloc(length);
757 if (buffer == NULL) {
758 return(NULL);
759 }
760 buffer [0] = ' ';
761 buffer [1] = '%';
762 buffer [length-3] = ';';
763 buffer [length-2] = ' ';
764 buffer [length-1] = 0;
765 memcpy(buffer + 2, entity->name, length - 5);
766 input->free = deallocblankswrapper;
767 input->base = buffer;
768 input->cur = buffer;
769 input->length = length;
770 input->end = &buffer[length];
771 return(input);
772}
773
774/**
Owen Taylor3473f882001-02-23 17:55:21 +0000775 * xmlParserHandlePEReference:
776 * @ctxt: the parser context
777 *
778 * [69] PEReference ::= '%' Name ';'
779 *
780 * [ WFC: No Recursion ]
781 * A parsed entity must not contain a recursive
782 * reference to itself, either directly or indirectly.
783 *
784 * [ WFC: Entity Declared ]
785 * In a document without any DTD, a document with only an internal DTD
786 * subset which contains no parameter entity references, or a document
787 * with "standalone='yes'", ... ... The declaration of a parameter
788 * entity must precede any reference to it...
789 *
790 * [ VC: Entity Declared ]
791 * In a document with an external subset or external parameter entities
792 * with "standalone='no'", ... ... The declaration of a parameter entity
793 * must precede any reference to it...
794 *
795 * [ WFC: In DTD ]
796 * Parameter-entity references may only appear in the DTD.
797 * NOTE: misleading but this is handled.
798 *
799 * A PEReference may have been detected in the current input stream
800 * the handling is done accordingly to
801 * http://www.w3.org/TR/REC-xml#entproc
802 * i.e.
803 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000804 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000805 */
806void
807xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
808 xmlChar *name;
809 xmlEntityPtr entity = NULL;
810 xmlParserInputPtr input;
811
Owen Taylor3473f882001-02-23 17:55:21 +0000812 if (RAW != '%') return;
813 switch(ctxt->instate) {
814 case XML_PARSER_CDATA_SECTION:
815 return;
816 case XML_PARSER_COMMENT:
817 return;
818 case XML_PARSER_START_TAG:
819 return;
820 case XML_PARSER_END_TAG:
821 return;
822 case XML_PARSER_EOF:
823 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
825 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
826 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000827 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000828 return;
829 case XML_PARSER_PROLOG:
830 case XML_PARSER_START:
831 case XML_PARSER_MISC:
832 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
834 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
835 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000836 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000837 return;
838 case XML_PARSER_ENTITY_DECL:
839 case XML_PARSER_CONTENT:
840 case XML_PARSER_ATTRIBUTE_VALUE:
841 case XML_PARSER_PI:
842 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000843 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000844 /* we just ignore it there */
845 return;
846 case XML_PARSER_EPILOG:
847 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
849 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
850 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000851 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000852 return;
853 case XML_PARSER_ENTITY_VALUE:
854 /*
855 * NOTE: in the case of entity values, we don't do the
856 * substitution here since we need the literal
857 * entity value to be able to save the internal
858 * subset of the document.
859 * This will be handled by xmlStringDecodeEntities
860 */
861 return;
862 case XML_PARSER_DTD:
863 /*
864 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
865 * In the internal DTD subset, parameter-entity references
866 * can occur only where markup declarations can occur, not
867 * within markup declarations.
868 * In that case this is handled in xmlParseMarkupDecl
869 */
870 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
871 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000872 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
873 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000874 break;
875 case XML_PARSER_IGNORE:
876 return;
877 }
878
879 NEXT;
880 name = xmlParseName(ctxt);
881 if (xmlParserDebugEntities)
882 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000883 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000884 if (name == NULL) {
885 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000887 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000888 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000889 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000890 } else {
891 if (RAW == ';') {
892 NEXT;
893 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
894 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
895 if (entity == NULL) {
896
897 /*
898 * [ WFC: Entity Declared ]
899 * In a document without any DTD, a document with only an
900 * internal DTD subset which contains no parameter entity
901 * references, or a document with "standalone='yes'", ...
902 * ... The declaration of a parameter entity must precede
903 * any reference to it...
904 */
905 if ((ctxt->standalone == 1) ||
906 ((ctxt->hasExternalSubset == 0) &&
907 (ctxt->hasPErefs == 0))) {
908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
909 ctxt->sax->error(ctxt->userData,
910 "PEReference: %%%s; not found\n", name);
911 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000912 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000913 } else {
914 /*
915 * [ VC: Entity Declared ]
916 * In a document with an external subset or external
917 * parameter entities with "standalone='no'", ...
918 * ... The declaration of a parameter entity must precede
919 * any reference to it...
920 */
921 if ((!ctxt->disableSAX) &&
922 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
923 ctxt->vctxt.error(ctxt->vctxt.userData,
924 "PEReference: %%%s; not found\n", name);
925 } else if ((!ctxt->disableSAX) &&
926 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
927 ctxt->sax->warning(ctxt->userData,
928 "PEReference: %%%s; not found\n", name);
929 ctxt->valid = 0;
930 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000931 } else if (ctxt->input->free != deallocblankswrapper) {
932 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
933 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000934 } else {
935 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
936 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000937 xmlChar start[4];
938 xmlCharEncoding enc;
939
Owen Taylor3473f882001-02-23 17:55:21 +0000940 /*
941 * handle the extra spaces added before and after
942 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000943 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000944 */
945 input = xmlNewEntityInputStream(ctxt, entity);
946 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000947
948 /*
949 * Get the 4 first bytes and decode the charset
950 * if enc != XML_CHAR_ENCODING_NONE
951 * plug some encoding conversion routines.
952 */
953 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000954 if (entity->length >= 4) {
955 start[0] = RAW;
956 start[1] = NXT(1);
957 start[2] = NXT(2);
958 start[3] = NXT(3);
959 enc = xmlDetectCharEncoding(start, 4);
960 if (enc != XML_CHAR_ENCODING_NONE) {
961 xmlSwitchEncoding(ctxt, enc);
962 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000963 }
964
Owen Taylor3473f882001-02-23 17:55:21 +0000965 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
966 (RAW == '<') && (NXT(1) == '?') &&
967 (NXT(2) == 'x') && (NXT(3) == 'm') &&
968 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
969 xmlParseTextDecl(ctxt);
970 }
Owen Taylor3473f882001-02-23 17:55:21 +0000971 } else {
972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
973 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000974 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000975 name);
976 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000977 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000978 }
979 }
980 } else {
981 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
983 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000984 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000985 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000986 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000987 }
988 xmlFree(name);
989 }
990}
991
992/*
993 * Macro used to grow the current buffer.
994 */
995#define growBuffer(buffer) { \
996 buffer##_size *= 2; \
997 buffer = (xmlChar *) \
998 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
999 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001000 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001001 return(NULL); \
1002 } \
1003}
1004
1005/**
1006 * xmlStringDecodeEntities:
1007 * @ctxt: the parser context
1008 * @str: the input string
1009 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1010 * @end: an end marker xmlChar, 0 if none
1011 * @end2: an end marker xmlChar, 0 if none
1012 * @end3: an end marker xmlChar, 0 if none
1013 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001014 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001015 *
1016 * [67] Reference ::= EntityRef | CharRef
1017 *
1018 * [69] PEReference ::= '%' Name ';'
1019 *
1020 * Returns A newly allocated string with the substitution done. The caller
1021 * must deallocate it !
1022 */
1023xmlChar *
1024xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1025 xmlChar end, xmlChar end2, xmlChar end3) {
1026 xmlChar *buffer = NULL;
1027 int buffer_size = 0;
1028
1029 xmlChar *current = NULL;
1030 xmlEntityPtr ent;
1031 int c,l;
1032 int nbchars = 0;
1033
1034 if (str == NULL)
1035 return(NULL);
1036
1037 if (ctxt->depth > 40) {
1038 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1040 ctxt->sax->error(ctxt->userData,
1041 "Detected entity reference loop\n");
1042 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001043 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001044 return(NULL);
1045 }
1046
1047 /*
1048 * allocate a translation buffer.
1049 */
1050 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1051 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1052 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001053 xmlGenericError(xmlGenericErrorContext,
1054 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001055 return(NULL);
1056 }
1057
1058 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001059 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001060 * we are operating on already parsed values.
1061 */
1062 c = CUR_SCHAR(str, l);
1063 while ((c != 0) && (c != end) && /* non input consuming loop */
1064 (c != end2) && (c != end3)) {
1065
1066 if (c == 0) break;
1067 if ((c == '&') && (str[1] == '#')) {
1068 int val = xmlParseStringCharRef(ctxt, &str);
1069 if (val != 0) {
1070 COPY_BUF(0,buffer,nbchars,val);
1071 }
1072 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1073 if (xmlParserDebugEntities)
1074 xmlGenericError(xmlGenericErrorContext,
1075 "String decoding Entity Reference: %.30s\n",
1076 str);
1077 ent = xmlParseStringEntityRef(ctxt, &str);
1078 if ((ent != NULL) &&
1079 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1080 if (ent->content != NULL) {
1081 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1082 } else {
1083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1084 ctxt->sax->error(ctxt->userData,
1085 "internal error entity has no content\n");
1086 }
1087 } else if ((ent != NULL) && (ent->content != NULL)) {
1088 xmlChar *rep;
1089
1090 ctxt->depth++;
1091 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1092 0, 0, 0);
1093 ctxt->depth--;
1094 if (rep != NULL) {
1095 current = rep;
1096 while (*current != 0) { /* non input consuming loop */
1097 buffer[nbchars++] = *current++;
1098 if (nbchars >
1099 buffer_size - XML_PARSER_BUFFER_SIZE) {
1100 growBuffer(buffer);
1101 }
1102 }
1103 xmlFree(rep);
1104 }
1105 } else if (ent != NULL) {
1106 int i = xmlStrlen(ent->name);
1107 const xmlChar *cur = ent->name;
1108
1109 buffer[nbchars++] = '&';
1110 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1111 growBuffer(buffer);
1112 }
1113 for (;i > 0;i--)
1114 buffer[nbchars++] = *cur++;
1115 buffer[nbchars++] = ';';
1116 }
1117 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1118 if (xmlParserDebugEntities)
1119 xmlGenericError(xmlGenericErrorContext,
1120 "String decoding PE Reference: %.30s\n", str);
1121 ent = xmlParseStringPEReference(ctxt, &str);
1122 if (ent != NULL) {
1123 xmlChar *rep;
1124
1125 ctxt->depth++;
1126 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1127 0, 0, 0);
1128 ctxt->depth--;
1129 if (rep != NULL) {
1130 current = rep;
1131 while (*current != 0) { /* non input consuming loop */
1132 buffer[nbchars++] = *current++;
1133 if (nbchars >
1134 buffer_size - XML_PARSER_BUFFER_SIZE) {
1135 growBuffer(buffer);
1136 }
1137 }
1138 xmlFree(rep);
1139 }
1140 }
1141 } else {
1142 COPY_BUF(l,buffer,nbchars,c);
1143 str += l;
1144 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1145 growBuffer(buffer);
1146 }
1147 }
1148 c = CUR_SCHAR(str, l);
1149 }
1150 buffer[nbchars++] = 0;
1151 return(buffer);
1152}
1153
1154
1155/************************************************************************
1156 * *
1157 * Commodity functions to handle xmlChars *
1158 * *
1159 ************************************************************************/
1160
1161/**
1162 * xmlStrndup:
1163 * @cur: the input xmlChar *
1164 * @len: the len of @cur
1165 *
1166 * a strndup for array of xmlChar's
1167 *
1168 * Returns a new xmlChar * or NULL
1169 */
1170xmlChar *
1171xmlStrndup(const xmlChar *cur, int len) {
1172 xmlChar *ret;
1173
1174 if ((cur == NULL) || (len < 0)) return(NULL);
1175 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1176 if (ret == NULL) {
1177 xmlGenericError(xmlGenericErrorContext,
1178 "malloc of %ld byte failed\n",
1179 (len + 1) * (long)sizeof(xmlChar));
1180 return(NULL);
1181 }
1182 memcpy(ret, cur, len * sizeof(xmlChar));
1183 ret[len] = 0;
1184 return(ret);
1185}
1186
1187/**
1188 * xmlStrdup:
1189 * @cur: the input xmlChar *
1190 *
1191 * a strdup for array of xmlChar's. Since they are supposed to be
1192 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1193 * a termination mark of '0'.
1194 *
1195 * Returns a new xmlChar * or NULL
1196 */
1197xmlChar *
1198xmlStrdup(const xmlChar *cur) {
1199 const xmlChar *p = cur;
1200
1201 if (cur == NULL) return(NULL);
1202 while (*p != 0) p++; /* non input consuming */
1203 return(xmlStrndup(cur, p - cur));
1204}
1205
1206/**
1207 * xmlCharStrndup:
1208 * @cur: the input char *
1209 * @len: the len of @cur
1210 *
1211 * a strndup for char's to xmlChar's
1212 *
1213 * Returns a new xmlChar * or NULL
1214 */
1215
1216xmlChar *
1217xmlCharStrndup(const char *cur, int len) {
1218 int i;
1219 xmlChar *ret;
1220
1221 if ((cur == NULL) || (len < 0)) return(NULL);
1222 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1223 if (ret == NULL) {
1224 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1225 (len + 1) * (long)sizeof(xmlChar));
1226 return(NULL);
1227 }
1228 for (i = 0;i < len;i++)
1229 ret[i] = (xmlChar) cur[i];
1230 ret[len] = 0;
1231 return(ret);
1232}
1233
1234/**
1235 * xmlCharStrdup:
1236 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001237 *
1238 * a strdup for char's to xmlChar's
1239 *
1240 * Returns a new xmlChar * or NULL
1241 */
1242
1243xmlChar *
1244xmlCharStrdup(const char *cur) {
1245 const char *p = cur;
1246
1247 if (cur == NULL) return(NULL);
1248 while (*p != '\0') p++; /* non input consuming */
1249 return(xmlCharStrndup(cur, p - cur));
1250}
1251
1252/**
1253 * xmlStrcmp:
1254 * @str1: the first xmlChar *
1255 * @str2: the second xmlChar *
1256 *
1257 * a strcmp for xmlChar's
1258 *
1259 * Returns the integer result of the comparison
1260 */
1261
1262int
1263xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1264 register int tmp;
1265
1266 if (str1 == str2) return(0);
1267 if (str1 == NULL) return(-1);
1268 if (str2 == NULL) return(1);
1269 do {
1270 tmp = *str1++ - *str2;
1271 if (tmp != 0) return(tmp);
1272 } while (*str2++ != 0);
1273 return 0;
1274}
1275
1276/**
1277 * xmlStrEqual:
1278 * @str1: the first xmlChar *
1279 * @str2: the second xmlChar *
1280 *
1281 * Check if both string are equal of have same content
1282 * Should be a bit more readable and faster than xmlStrEqual()
1283 *
1284 * Returns 1 if they are equal, 0 if they are different
1285 */
1286
1287int
1288xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1289 if (str1 == str2) return(1);
1290 if (str1 == NULL) return(0);
1291 if (str2 == NULL) return(0);
1292 do {
1293 if (*str1++ != *str2) return(0);
1294 } while (*str2++);
1295 return(1);
1296}
1297
1298/**
1299 * xmlStrncmp:
1300 * @str1: the first xmlChar *
1301 * @str2: the second xmlChar *
1302 * @len: the max comparison length
1303 *
1304 * a strncmp for xmlChar's
1305 *
1306 * Returns the integer result of the comparison
1307 */
1308
1309int
1310xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1311 register int tmp;
1312
1313 if (len <= 0) return(0);
1314 if (str1 == str2) return(0);
1315 if (str1 == NULL) return(-1);
1316 if (str2 == NULL) return(1);
1317 do {
1318 tmp = *str1++ - *str2;
1319 if (tmp != 0 || --len == 0) return(tmp);
1320 } while (*str2++ != 0);
1321 return 0;
1322}
1323
Daniel Veillardb44025c2001-10-11 22:55:55 +00001324static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001325 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1326 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1327 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1328 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1329 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1330 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1331 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1332 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1333 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1334 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1335 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1336 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1337 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1338 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1339 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1340 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1341 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1342 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1343 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1344 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1345 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1346 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1347 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1348 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1349 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1350 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1351 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1352 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1353 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1354 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1355 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1356 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1357};
1358
1359/**
1360 * xmlStrcasecmp:
1361 * @str1: the first xmlChar *
1362 * @str2: the second xmlChar *
1363 *
1364 * a strcasecmp for xmlChar's
1365 *
1366 * Returns the integer result of the comparison
1367 */
1368
1369int
1370xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1371 register int tmp;
1372
1373 if (str1 == str2) return(0);
1374 if (str1 == NULL) return(-1);
1375 if (str2 == NULL) return(1);
1376 do {
1377 tmp = casemap[*str1++] - casemap[*str2];
1378 if (tmp != 0) return(tmp);
1379 } while (*str2++ != 0);
1380 return 0;
1381}
1382
1383/**
1384 * xmlStrncasecmp:
1385 * @str1: the first xmlChar *
1386 * @str2: the second xmlChar *
1387 * @len: the max comparison length
1388 *
1389 * a strncasecmp for xmlChar's
1390 *
1391 * Returns the integer result of the comparison
1392 */
1393
1394int
1395xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1396 register int tmp;
1397
1398 if (len <= 0) return(0);
1399 if (str1 == str2) return(0);
1400 if (str1 == NULL) return(-1);
1401 if (str2 == NULL) return(1);
1402 do {
1403 tmp = casemap[*str1++] - casemap[*str2];
1404 if (tmp != 0 || --len == 0) return(tmp);
1405 } while (*str2++ != 0);
1406 return 0;
1407}
1408
1409/**
1410 * xmlStrchr:
1411 * @str: the xmlChar * array
1412 * @val: the xmlChar to search
1413 *
1414 * a strchr for xmlChar's
1415 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001416 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001417 */
1418
1419const xmlChar *
1420xmlStrchr(const xmlChar *str, xmlChar val) {
1421 if (str == NULL) return(NULL);
1422 while (*str != 0) { /* non input consuming */
1423 if (*str == val) return((xmlChar *) str);
1424 str++;
1425 }
1426 return(NULL);
1427}
1428
1429/**
1430 * xmlStrstr:
1431 * @str: the xmlChar * array (haystack)
1432 * @val: the xmlChar to search (needle)
1433 *
1434 * a strstr for xmlChar's
1435 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001436 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001437 */
1438
1439const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001440xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001441 int n;
1442
1443 if (str == NULL) return(NULL);
1444 if (val == NULL) return(NULL);
1445 n = xmlStrlen(val);
1446
1447 if (n == 0) return(str);
1448 while (*str != 0) { /* non input consuming */
1449 if (*str == *val) {
1450 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1451 }
1452 str++;
1453 }
1454 return(NULL);
1455}
1456
1457/**
1458 * xmlStrcasestr:
1459 * @str: the xmlChar * array (haystack)
1460 * @val: the xmlChar to search (needle)
1461 *
1462 * a case-ignoring strstr for xmlChar's
1463 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001464 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001465 */
1466
1467const xmlChar *
1468xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1469 int n;
1470
1471 if (str == NULL) return(NULL);
1472 if (val == NULL) return(NULL);
1473 n = xmlStrlen(val);
1474
1475 if (n == 0) return(str);
1476 while (*str != 0) { /* non input consuming */
1477 if (casemap[*str] == casemap[*val])
1478 if (!xmlStrncasecmp(str, val, n)) return(str);
1479 str++;
1480 }
1481 return(NULL);
1482}
1483
1484/**
1485 * xmlStrsub:
1486 * @str: the xmlChar * array (haystack)
1487 * @start: the index of the first char (zero based)
1488 * @len: the length of the substring
1489 *
1490 * Extract a substring of a given string
1491 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001492 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001493 */
1494
1495xmlChar *
1496xmlStrsub(const xmlChar *str, int start, int len) {
1497 int i;
1498
1499 if (str == NULL) return(NULL);
1500 if (start < 0) return(NULL);
1501 if (len < 0) return(NULL);
1502
1503 for (i = 0;i < start;i++) {
1504 if (*str == 0) return(NULL);
1505 str++;
1506 }
1507 if (*str == 0) return(NULL);
1508 return(xmlStrndup(str, len));
1509}
1510
1511/**
1512 * xmlStrlen:
1513 * @str: the xmlChar * array
1514 *
1515 * length of a xmlChar's string
1516 *
1517 * Returns the number of xmlChar contained in the ARRAY.
1518 */
1519
1520int
1521xmlStrlen(const xmlChar *str) {
1522 int len = 0;
1523
1524 if (str == NULL) return(0);
1525 while (*str != 0) { /* non input consuming */
1526 str++;
1527 len++;
1528 }
1529 return(len);
1530}
1531
1532/**
1533 * xmlStrncat:
1534 * @cur: the original xmlChar * array
1535 * @add: the xmlChar * array added
1536 * @len: the length of @add
1537 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001538 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001539 * first bytes of @add.
1540 *
1541 * Returns a new xmlChar *, the original @cur is reallocated if needed
1542 * and should not be freed
1543 */
1544
1545xmlChar *
1546xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1547 int size;
1548 xmlChar *ret;
1549
1550 if ((add == NULL) || (len == 0))
1551 return(cur);
1552 if (cur == NULL)
1553 return(xmlStrndup(add, len));
1554
1555 size = xmlStrlen(cur);
1556 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1557 if (ret == NULL) {
1558 xmlGenericError(xmlGenericErrorContext,
1559 "xmlStrncat: realloc of %ld byte failed\n",
1560 (size + len + 1) * (long)sizeof(xmlChar));
1561 return(cur);
1562 }
1563 memcpy(&ret[size], add, len * sizeof(xmlChar));
1564 ret[size + len] = 0;
1565 return(ret);
1566}
1567
1568/**
1569 * xmlStrcat:
1570 * @cur: the original xmlChar * array
1571 * @add: the xmlChar * array added
1572 *
1573 * a strcat for array of xmlChar's. Since they are supposed to be
1574 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1575 * a termination mark of '0'.
1576 *
1577 * Returns a new xmlChar * containing the concatenated string.
1578 */
1579xmlChar *
1580xmlStrcat(xmlChar *cur, const xmlChar *add) {
1581 const xmlChar *p = add;
1582
1583 if (add == NULL) return(cur);
1584 if (cur == NULL)
1585 return(xmlStrdup(add));
1586
1587 while (*p != 0) p++; /* non input consuming */
1588 return(xmlStrncat(cur, add, p - add));
1589}
1590
1591/************************************************************************
1592 * *
1593 * Commodity functions, cleanup needed ? *
1594 * *
1595 ************************************************************************/
1596
1597/**
1598 * areBlanks:
1599 * @ctxt: an XML parser context
1600 * @str: a xmlChar *
1601 * @len: the size of @str
1602 *
1603 * Is this a sequence of blank chars that one can ignore ?
1604 *
1605 * Returns 1 if ignorable 0 otherwise.
1606 */
1607
1608static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1609 int i, ret;
1610 xmlNodePtr lastChild;
1611
Daniel Veillard05c13a22001-09-09 08:38:09 +00001612 /*
1613 * Don't spend time trying to differentiate them, the same callback is
1614 * used !
1615 */
1616 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001617 return(0);
1618
Owen Taylor3473f882001-02-23 17:55:21 +00001619 /*
1620 * Check for xml:space value.
1621 */
1622 if (*(ctxt->space) == 1)
1623 return(0);
1624
1625 /*
1626 * Check that the string is made of blanks
1627 */
1628 for (i = 0;i < len;i++)
1629 if (!(IS_BLANK(str[i]))) return(0);
1630
1631 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001632 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001633 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001634 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001635 if (ctxt->myDoc != NULL) {
1636 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1637 if (ret == 0) return(1);
1638 if (ret == 1) return(0);
1639 }
1640
1641 /*
1642 * Otherwise, heuristic :-\
1643 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001644 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001645 if ((ctxt->node->children == NULL) &&
1646 (RAW == '<') && (NXT(1) == '/')) return(0);
1647
1648 lastChild = xmlGetLastChild(ctxt->node);
1649 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001650 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1651 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001652 } else if (xmlNodeIsText(lastChild))
1653 return(0);
1654 else if ((ctxt->node->children != NULL) &&
1655 (xmlNodeIsText(ctxt->node->children)))
1656 return(0);
1657 return(1);
1658}
1659
Owen Taylor3473f882001-02-23 17:55:21 +00001660/************************************************************************
1661 * *
1662 * Extra stuff for namespace support *
1663 * Relates to http://www.w3.org/TR/WD-xml-names *
1664 * *
1665 ************************************************************************/
1666
1667/**
1668 * xmlSplitQName:
1669 * @ctxt: an XML parser context
1670 * @name: an XML parser context
1671 * @prefix: a xmlChar **
1672 *
1673 * parse an UTF8 encoded XML qualified name string
1674 *
1675 * [NS 5] QName ::= (Prefix ':')? LocalPart
1676 *
1677 * [NS 6] Prefix ::= NCName
1678 *
1679 * [NS 7] LocalPart ::= NCName
1680 *
1681 * Returns the local part, and prefix is updated
1682 * to get the Prefix if any.
1683 */
1684
1685xmlChar *
1686xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1687 xmlChar buf[XML_MAX_NAMELEN + 5];
1688 xmlChar *buffer = NULL;
1689 int len = 0;
1690 int max = XML_MAX_NAMELEN;
1691 xmlChar *ret = NULL;
1692 const xmlChar *cur = name;
1693 int c;
1694
1695 *prefix = NULL;
1696
1697#ifndef XML_XML_NAMESPACE
1698 /* xml: prefix is not really a namespace */
1699 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1700 (cur[2] == 'l') && (cur[3] == ':'))
1701 return(xmlStrdup(name));
1702#endif
1703
1704 /* nasty but valid */
1705 if (cur[0] == ':')
1706 return(xmlStrdup(name));
1707
1708 c = *cur++;
1709 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1710 buf[len++] = c;
1711 c = *cur++;
1712 }
1713 if (len >= max) {
1714 /*
1715 * Okay someone managed to make a huge name, so he's ready to pay
1716 * for the processing speed.
1717 */
1718 max = len * 2;
1719
1720 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1721 if (buffer == NULL) {
1722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1723 ctxt->sax->error(ctxt->userData,
1724 "xmlSplitQName: out of memory\n");
1725 return(NULL);
1726 }
1727 memcpy(buffer, buf, len);
1728 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1729 if (len + 10 > max) {
1730 max *= 2;
1731 buffer = (xmlChar *) xmlRealloc(buffer,
1732 max * sizeof(xmlChar));
1733 if (buffer == NULL) {
1734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1735 ctxt->sax->error(ctxt->userData,
1736 "xmlSplitQName: out of memory\n");
1737 return(NULL);
1738 }
1739 }
1740 buffer[len++] = c;
1741 c = *cur++;
1742 }
1743 buffer[len] = 0;
1744 }
1745
1746 if (buffer == NULL)
1747 ret = xmlStrndup(buf, len);
1748 else {
1749 ret = buffer;
1750 buffer = NULL;
1751 max = XML_MAX_NAMELEN;
1752 }
1753
1754
1755 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001756 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001757 if (c == 0) return(ret);
1758 *prefix = ret;
1759 len = 0;
1760
Daniel Veillardbb284f42002-10-16 18:02:47 +00001761 /*
1762 * Check that the first character is proper to start
1763 * a new name
1764 */
1765 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1766 ((c >= 0x41) && (c <= 0x5A)) ||
1767 (c == '_') || (c == ':'))) {
1768 int l;
1769 int first = CUR_SCHAR(cur, l);
1770
1771 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001772 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1773 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001774 ctxt->sax->error(ctxt->userData,
1775 "Name %s is not XML Namespace compliant\n",
1776 name);
1777 }
1778 }
1779 cur++;
1780
Owen Taylor3473f882001-02-23 17:55:21 +00001781 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1782 buf[len++] = c;
1783 c = *cur++;
1784 }
1785 if (len >= max) {
1786 /*
1787 * Okay someone managed to make a huge name, so he's ready to pay
1788 * for the processing speed.
1789 */
1790 max = len * 2;
1791
1792 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1793 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001794 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1795 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001796 ctxt->sax->error(ctxt->userData,
1797 "xmlSplitQName: out of memory\n");
1798 return(NULL);
1799 }
1800 memcpy(buffer, buf, len);
1801 while (c != 0) { /* tested bigname2.xml */
1802 if (len + 10 > max) {
1803 max *= 2;
1804 buffer = (xmlChar *) xmlRealloc(buffer,
1805 max * sizeof(xmlChar));
1806 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001807 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1808 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001809 ctxt->sax->error(ctxt->userData,
1810 "xmlSplitQName: out of memory\n");
1811 return(NULL);
1812 }
1813 }
1814 buffer[len++] = c;
1815 c = *cur++;
1816 }
1817 buffer[len] = 0;
1818 }
1819
1820 if (buffer == NULL)
1821 ret = xmlStrndup(buf, len);
1822 else {
1823 ret = buffer;
1824 }
1825 }
1826
1827 return(ret);
1828}
1829
1830/************************************************************************
1831 * *
1832 * The parser itself *
1833 * Relates to http://www.w3.org/TR/REC-xml *
1834 * *
1835 ************************************************************************/
1836
Daniel Veillard76d66f42001-05-16 21:05:17 +00001837static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001838/**
1839 * xmlParseName:
1840 * @ctxt: an XML parser context
1841 *
1842 * parse an XML name.
1843 *
1844 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1845 * CombiningChar | Extender
1846 *
1847 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1848 *
1849 * [6] Names ::= Name (S Name)*
1850 *
1851 * Returns the Name parsed or NULL
1852 */
1853
1854xmlChar *
1855xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001856 const xmlChar *in;
1857 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001858 int count = 0;
1859
1860 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001861
1862 /*
1863 * Accelerator for simple ASCII names
1864 */
1865 in = ctxt->input->cur;
1866 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1867 ((*in >= 0x41) && (*in <= 0x5A)) ||
1868 (*in == '_') || (*in == ':')) {
1869 in++;
1870 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1871 ((*in >= 0x41) && (*in <= 0x5A)) ||
1872 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001873 (*in == '_') || (*in == '-') ||
1874 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001875 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001876 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001877 count = in - ctxt->input->cur;
1878 ret = xmlStrndup(ctxt->input->cur, count);
1879 ctxt->input->cur = in;
1880 return(ret);
1881 }
1882 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001883 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001884}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001885
Daniel Veillard46de64e2002-05-29 08:21:33 +00001886/**
1887 * xmlParseNameAndCompare:
1888 * @ctxt: an XML parser context
1889 *
1890 * parse an XML name and compares for match
1891 * (specialized for endtag parsing)
1892 *
1893 *
1894 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1895 * and the name for mismatch
1896 */
1897
Daniel Veillardf4862f02002-09-10 11:13:43 +00001898static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001899xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1900 const xmlChar *cmp = other;
1901 const xmlChar *in;
1902 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001903
1904 GROW;
1905
1906 in = ctxt->input->cur;
1907 while (*in != 0 && *in == *cmp) {
1908 ++in;
1909 ++cmp;
1910 }
1911 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1912 /* success */
1913 ctxt->input->cur = in;
1914 return (xmlChar*) 1;
1915 }
1916 /* failure (or end of input buffer), check with full function */
1917 ret = xmlParseName (ctxt);
1918 if (ret != 0 && xmlStrEqual (ret, other)) {
1919 xmlFree (ret);
1920 return (xmlChar*) 1;
1921 }
1922 return ret;
1923}
1924
Daniel Veillard76d66f42001-05-16 21:05:17 +00001925static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001926xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1927 xmlChar buf[XML_MAX_NAMELEN + 5];
1928 int len = 0, l;
1929 int c;
1930 int count = 0;
1931
1932 /*
1933 * Handler for more complex cases
1934 */
1935 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001936 c = CUR_CHAR(l);
1937 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1938 (!IS_LETTER(c) && (c != '_') &&
1939 (c != ':'))) {
1940 return(NULL);
1941 }
1942
1943 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1944 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1945 (c == '.') || (c == '-') ||
1946 (c == '_') || (c == ':') ||
1947 (IS_COMBINING(c)) ||
1948 (IS_EXTENDER(c)))) {
1949 if (count++ > 100) {
1950 count = 0;
1951 GROW;
1952 }
1953 COPY_BUF(l,buf,len,c);
1954 NEXTL(l);
1955 c = CUR_CHAR(l);
1956 if (len >= XML_MAX_NAMELEN) {
1957 /*
1958 * Okay someone managed to make a huge name, so he's ready to pay
1959 * for the processing speed.
1960 */
1961 xmlChar *buffer;
1962 int max = len * 2;
1963
1964 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1965 if (buffer == NULL) {
1966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1967 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001968 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001969 return(NULL);
1970 }
1971 memcpy(buffer, buf, len);
1972 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1973 (c == '.') || (c == '-') ||
1974 (c == '_') || (c == ':') ||
1975 (IS_COMBINING(c)) ||
1976 (IS_EXTENDER(c))) {
1977 if (count++ > 100) {
1978 count = 0;
1979 GROW;
1980 }
1981 if (len + 10 > max) {
1982 max *= 2;
1983 buffer = (xmlChar *) xmlRealloc(buffer,
1984 max * sizeof(xmlChar));
1985 if (buffer == NULL) {
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1987 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001988 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001989 return(NULL);
1990 }
1991 }
1992 COPY_BUF(l,buffer,len,c);
1993 NEXTL(l);
1994 c = CUR_CHAR(l);
1995 }
1996 buffer[len] = 0;
1997 return(buffer);
1998 }
1999 }
2000 return(xmlStrndup(buf, len));
2001}
2002
2003/**
2004 * xmlParseStringName:
2005 * @ctxt: an XML parser context
2006 * @str: a pointer to the string pointer (IN/OUT)
2007 *
2008 * parse an XML name.
2009 *
2010 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2011 * CombiningChar | Extender
2012 *
2013 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2014 *
2015 * [6] Names ::= Name (S Name)*
2016 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002017 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002018 * is updated to the current location in the string.
2019 */
2020
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002021static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002022xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2023 xmlChar buf[XML_MAX_NAMELEN + 5];
2024 const xmlChar *cur = *str;
2025 int len = 0, l;
2026 int c;
2027
2028 c = CUR_SCHAR(cur, l);
2029 if (!IS_LETTER(c) && (c != '_') &&
2030 (c != ':')) {
2031 return(NULL);
2032 }
2033
2034 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2035 (c == '.') || (c == '-') ||
2036 (c == '_') || (c == ':') ||
2037 (IS_COMBINING(c)) ||
2038 (IS_EXTENDER(c))) {
2039 COPY_BUF(l,buf,len,c);
2040 cur += l;
2041 c = CUR_SCHAR(cur, l);
2042 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2043 /*
2044 * Okay someone managed to make a huge name, so he's ready to pay
2045 * for the processing speed.
2046 */
2047 xmlChar *buffer;
2048 int max = len * 2;
2049
2050 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2051 if (buffer == NULL) {
2052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2053 ctxt->sax->error(ctxt->userData,
2054 "xmlParseStringName: out of memory\n");
2055 return(NULL);
2056 }
2057 memcpy(buffer, buf, len);
2058 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2059 (c == '.') || (c == '-') ||
2060 (c == '_') || (c == ':') ||
2061 (IS_COMBINING(c)) ||
2062 (IS_EXTENDER(c))) {
2063 if (len + 10 > max) {
2064 max *= 2;
2065 buffer = (xmlChar *) xmlRealloc(buffer,
2066 max * sizeof(xmlChar));
2067 if (buffer == NULL) {
2068 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2069 ctxt->sax->error(ctxt->userData,
2070 "xmlParseStringName: out of memory\n");
2071 return(NULL);
2072 }
2073 }
2074 COPY_BUF(l,buffer,len,c);
2075 cur += l;
2076 c = CUR_SCHAR(cur, l);
2077 }
2078 buffer[len] = 0;
2079 *str = cur;
2080 return(buffer);
2081 }
2082 }
2083 *str = cur;
2084 return(xmlStrndup(buf, len));
2085}
2086
2087/**
2088 * xmlParseNmtoken:
2089 * @ctxt: an XML parser context
2090 *
2091 * parse an XML Nmtoken.
2092 *
2093 * [7] Nmtoken ::= (NameChar)+
2094 *
2095 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2096 *
2097 * Returns the Nmtoken parsed or NULL
2098 */
2099
2100xmlChar *
2101xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2102 xmlChar buf[XML_MAX_NAMELEN + 5];
2103 int len = 0, l;
2104 int c;
2105 int count = 0;
2106
2107 GROW;
2108 c = CUR_CHAR(l);
2109
2110 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2111 (c == '.') || (c == '-') ||
2112 (c == '_') || (c == ':') ||
2113 (IS_COMBINING(c)) ||
2114 (IS_EXTENDER(c))) {
2115 if (count++ > 100) {
2116 count = 0;
2117 GROW;
2118 }
2119 COPY_BUF(l,buf,len,c);
2120 NEXTL(l);
2121 c = CUR_CHAR(l);
2122 if (len >= XML_MAX_NAMELEN) {
2123 /*
2124 * Okay someone managed to make a huge token, so he's ready to pay
2125 * for the processing speed.
2126 */
2127 xmlChar *buffer;
2128 int max = len * 2;
2129
2130 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2131 if (buffer == NULL) {
2132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2133 ctxt->sax->error(ctxt->userData,
2134 "xmlParseNmtoken: out of memory\n");
2135 return(NULL);
2136 }
2137 memcpy(buffer, buf, len);
2138 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2139 (c == '.') || (c == '-') ||
2140 (c == '_') || (c == ':') ||
2141 (IS_COMBINING(c)) ||
2142 (IS_EXTENDER(c))) {
2143 if (count++ > 100) {
2144 count = 0;
2145 GROW;
2146 }
2147 if (len + 10 > max) {
2148 max *= 2;
2149 buffer = (xmlChar *) xmlRealloc(buffer,
2150 max * sizeof(xmlChar));
2151 if (buffer == NULL) {
2152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2153 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002154 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002155 return(NULL);
2156 }
2157 }
2158 COPY_BUF(l,buffer,len,c);
2159 NEXTL(l);
2160 c = CUR_CHAR(l);
2161 }
2162 buffer[len] = 0;
2163 return(buffer);
2164 }
2165 }
2166 if (len == 0)
2167 return(NULL);
2168 return(xmlStrndup(buf, len));
2169}
2170
2171/**
2172 * xmlParseEntityValue:
2173 * @ctxt: an XML parser context
2174 * @orig: if non-NULL store a copy of the original entity value
2175 *
2176 * parse a value for ENTITY declarations
2177 *
2178 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2179 * "'" ([^%&'] | PEReference | Reference)* "'"
2180 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002181 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002182 */
2183
2184xmlChar *
2185xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2186 xmlChar *buf = NULL;
2187 int len = 0;
2188 int size = XML_PARSER_BUFFER_SIZE;
2189 int c, l;
2190 xmlChar stop;
2191 xmlChar *ret = NULL;
2192 const xmlChar *cur = NULL;
2193 xmlParserInputPtr input;
2194
2195 if (RAW == '"') stop = '"';
2196 else if (RAW == '\'') stop = '\'';
2197 else {
2198 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2200 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2201 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002202 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 return(NULL);
2204 }
2205 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2206 if (buf == NULL) {
2207 xmlGenericError(xmlGenericErrorContext,
2208 "malloc of %d byte failed\n", size);
2209 return(NULL);
2210 }
2211
2212 /*
2213 * The content of the entity definition is copied in a buffer.
2214 */
2215
2216 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2217 input = ctxt->input;
2218 GROW;
2219 NEXT;
2220 c = CUR_CHAR(l);
2221 /*
2222 * NOTE: 4.4.5 Included in Literal
2223 * When a parameter entity reference appears in a literal entity
2224 * value, ... a single or double quote character in the replacement
2225 * text is always treated as a normal data character and will not
2226 * terminate the literal.
2227 * In practice it means we stop the loop only when back at parsing
2228 * the initial entity and the quote is found
2229 */
2230 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2231 (ctxt->input != input))) {
2232 if (len + 5 >= size) {
2233 size *= 2;
2234 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2235 if (buf == NULL) {
2236 xmlGenericError(xmlGenericErrorContext,
2237 "realloc of %d byte failed\n", size);
2238 return(NULL);
2239 }
2240 }
2241 COPY_BUF(l,buf,len,c);
2242 NEXTL(l);
2243 /*
2244 * Pop-up of finished entities.
2245 */
2246 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2247 xmlPopInput(ctxt);
2248
2249 GROW;
2250 c = CUR_CHAR(l);
2251 if (c == 0) {
2252 GROW;
2253 c = CUR_CHAR(l);
2254 }
2255 }
2256 buf[len] = 0;
2257
2258 /*
2259 * Raise problem w.r.t. '&' and '%' being used in non-entities
2260 * reference constructs. Note Charref will be handled in
2261 * xmlStringDecodeEntities()
2262 */
2263 cur = buf;
2264 while (*cur != 0) { /* non input consuming */
2265 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2266 xmlChar *name;
2267 xmlChar tmp = *cur;
2268
2269 cur++;
2270 name = xmlParseStringName(ctxt, &cur);
2271 if ((name == NULL) || (*cur != ';')) {
2272 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2274 ctxt->sax->error(ctxt->userData,
2275 "EntityValue: '%c' forbidden except for entities references\n",
2276 tmp);
2277 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002278 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002279 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002280 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2281 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002282 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2284 ctxt->sax->error(ctxt->userData,
2285 "EntityValue: PEReferences forbidden in internal subset\n",
2286 tmp);
2287 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002288 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002289 }
2290 if (name != NULL)
2291 xmlFree(name);
2292 }
2293 cur++;
2294 }
2295
2296 /*
2297 * Then PEReference entities are substituted.
2298 */
2299 if (c != stop) {
2300 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2302 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2303 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002304 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002305 xmlFree(buf);
2306 } else {
2307 NEXT;
2308 /*
2309 * NOTE: 4.4.7 Bypassed
2310 * When a general entity reference appears in the EntityValue in
2311 * an entity declaration, it is bypassed and left as is.
2312 * so XML_SUBSTITUTE_REF is not set here.
2313 */
2314 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2315 0, 0, 0);
2316 if (orig != NULL)
2317 *orig = buf;
2318 else
2319 xmlFree(buf);
2320 }
2321
2322 return(ret);
2323}
2324
2325/**
2326 * xmlParseAttValue:
2327 * @ctxt: an XML parser context
2328 *
2329 * parse a value for an attribute
2330 * Note: the parser won't do substitution of entities here, this
2331 * will be handled later in xmlStringGetNodeList
2332 *
2333 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2334 * "'" ([^<&'] | Reference)* "'"
2335 *
2336 * 3.3.3 Attribute-Value Normalization:
2337 * Before the value of an attribute is passed to the application or
2338 * checked for validity, the XML processor must normalize it as follows:
2339 * - a character reference is processed by appending the referenced
2340 * character to the attribute value
2341 * - an entity reference is processed by recursively processing the
2342 * replacement text of the entity
2343 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2344 * appending #x20 to the normalized value, except that only a single
2345 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2346 * parsed entity or the literal entity value of an internal parsed entity
2347 * - other characters are processed by appending them to the normalized value
2348 * If the declared value is not CDATA, then the XML processor must further
2349 * process the normalized attribute value by discarding any leading and
2350 * trailing space (#x20) characters, and by replacing sequences of space
2351 * (#x20) characters by a single space (#x20) character.
2352 * All attributes for which no declaration has been read should be treated
2353 * by a non-validating parser as if declared CDATA.
2354 *
2355 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2356 */
2357
2358xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002359xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2360
2361xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002362xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2363 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002364 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002365 xmlChar *ret = NULL;
2366 SHRINK;
2367 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002368 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002369 if (*in != '"' && *in != '\'') {
2370 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2372 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2373 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002374 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002375 return(NULL);
2376 }
2377 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2378 limit = *in;
2379 ++in;
2380
2381 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2382 *in != '&' && *in != '<'
2383 ) {
2384 ++in;
2385 }
2386 if (*in != limit) {
2387 return xmlParseAttValueComplex(ctxt);
2388 }
2389 ++in;
2390 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2391 CUR_PTR = in;
2392 return ret;
2393}
2394
Daniel Veillard01c13b52002-12-10 15:19:08 +00002395/**
2396 * xmlParseAttValueComplex:
2397 * @ctxt: an XML parser context
2398 *
2399 * parse a value for an attribute, this is the fallback function
2400 * of xmlParseAttValue() when the attribute parsing requires handling
2401 * of non-ASCII characters.
2402 *
2403 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2404 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002405xmlChar *
2406xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2407 xmlChar limit = 0;
2408 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002409 int len = 0;
2410 int buf_size = 0;
2411 int c, l;
2412 xmlChar *current = NULL;
2413 xmlEntityPtr ent;
2414
2415
2416 SHRINK;
2417 if (NXT(0) == '"') {
2418 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2419 limit = '"';
2420 NEXT;
2421 } else if (NXT(0) == '\'') {
2422 limit = '\'';
2423 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2424 NEXT;
2425 } else {
2426 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2427 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2428 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2429 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002430 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002431 return(NULL);
2432 }
2433
2434 /*
2435 * allocate a translation buffer.
2436 */
2437 buf_size = XML_PARSER_BUFFER_SIZE;
2438 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2439 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002440 xmlGenericError(xmlGenericErrorContext,
2441 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002442 return(NULL);
2443 }
2444
2445 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002446 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002447 */
2448 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002449 while ((NXT(0) != limit) && /* checked */
2450 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002451 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002452 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002453 if (NXT(1) == '#') {
2454 int val = xmlParseCharRef(ctxt);
2455 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002456 if (ctxt->replaceEntities) {
2457 if (len > buf_size - 10) {
2458 growBuffer(buf);
2459 }
2460 buf[len++] = '&';
2461 } else {
2462 /*
2463 * The reparsing will be done in xmlStringGetNodeList()
2464 * called by the attribute() function in SAX.c
2465 */
2466 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002467
Daniel Veillard319a7422001-09-11 09:27:09 +00002468 if (len > buf_size - 10) {
2469 growBuffer(buf);
2470 }
2471 current = &buffer[0];
2472 while (*current != 0) { /* non input consuming */
2473 buf[len++] = *current++;
2474 }
Owen Taylor3473f882001-02-23 17:55:21 +00002475 }
2476 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002477 if (len > buf_size - 10) {
2478 growBuffer(buf);
2479 }
Owen Taylor3473f882001-02-23 17:55:21 +00002480 len += xmlCopyChar(0, &buf[len], val);
2481 }
2482 } else {
2483 ent = xmlParseEntityRef(ctxt);
2484 if ((ent != NULL) &&
2485 (ctxt->replaceEntities != 0)) {
2486 xmlChar *rep;
2487
2488 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2489 rep = xmlStringDecodeEntities(ctxt, ent->content,
2490 XML_SUBSTITUTE_REF, 0, 0, 0);
2491 if (rep != NULL) {
2492 current = rep;
2493 while (*current != 0) { /* non input consuming */
2494 buf[len++] = *current++;
2495 if (len > buf_size - 10) {
2496 growBuffer(buf);
2497 }
2498 }
2499 xmlFree(rep);
2500 }
2501 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002502 if (len > buf_size - 10) {
2503 growBuffer(buf);
2504 }
Owen Taylor3473f882001-02-23 17:55:21 +00002505 if (ent->content != NULL)
2506 buf[len++] = ent->content[0];
2507 }
2508 } else if (ent != NULL) {
2509 int i = xmlStrlen(ent->name);
2510 const xmlChar *cur = ent->name;
2511
2512 /*
2513 * This may look absurd but is needed to detect
2514 * entities problems
2515 */
2516 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2517 (ent->content != NULL)) {
2518 xmlChar *rep;
2519 rep = xmlStringDecodeEntities(ctxt, ent->content,
2520 XML_SUBSTITUTE_REF, 0, 0, 0);
2521 if (rep != NULL)
2522 xmlFree(rep);
2523 }
2524
2525 /*
2526 * Just output the reference
2527 */
2528 buf[len++] = '&';
2529 if (len > buf_size - i - 10) {
2530 growBuffer(buf);
2531 }
2532 for (;i > 0;i--)
2533 buf[len++] = *cur++;
2534 buf[len++] = ';';
2535 }
2536 }
2537 } else {
2538 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2539 COPY_BUF(l,buf,len,0x20);
2540 if (len > buf_size - 10) {
2541 growBuffer(buf);
2542 }
2543 } else {
2544 COPY_BUF(l,buf,len,c);
2545 if (len > buf_size - 10) {
2546 growBuffer(buf);
2547 }
2548 }
2549 NEXTL(l);
2550 }
2551 GROW;
2552 c = CUR_CHAR(l);
2553 }
2554 buf[len++] = 0;
2555 if (RAW == '<') {
2556 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2558 ctxt->sax->error(ctxt->userData,
2559 "Unescaped '<' not allowed in attributes values\n");
2560 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002561 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002562 } else if (RAW != limit) {
2563 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2565 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2566 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002567 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002568 } else
2569 NEXT;
2570 return(buf);
2571}
2572
2573/**
2574 * xmlParseSystemLiteral:
2575 * @ctxt: an XML parser context
2576 *
2577 * parse an XML Literal
2578 *
2579 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2580 *
2581 * Returns the SystemLiteral parsed or NULL
2582 */
2583
2584xmlChar *
2585xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2586 xmlChar *buf = NULL;
2587 int len = 0;
2588 int size = XML_PARSER_BUFFER_SIZE;
2589 int cur, l;
2590 xmlChar stop;
2591 int state = ctxt->instate;
2592 int count = 0;
2593
2594 SHRINK;
2595 if (RAW == '"') {
2596 NEXT;
2597 stop = '"';
2598 } else if (RAW == '\'') {
2599 NEXT;
2600 stop = '\'';
2601 } else {
2602 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2604 ctxt->sax->error(ctxt->userData,
2605 "SystemLiteral \" or ' expected\n");
2606 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002607 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002608 return(NULL);
2609 }
2610
2611 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2612 if (buf == NULL) {
2613 xmlGenericError(xmlGenericErrorContext,
2614 "malloc of %d byte failed\n", size);
2615 return(NULL);
2616 }
2617 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2618 cur = CUR_CHAR(l);
2619 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2620 if (len + 5 >= size) {
2621 size *= 2;
2622 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2623 if (buf == NULL) {
2624 xmlGenericError(xmlGenericErrorContext,
2625 "realloc of %d byte failed\n", size);
2626 ctxt->instate = (xmlParserInputState) state;
2627 return(NULL);
2628 }
2629 }
2630 count++;
2631 if (count > 50) {
2632 GROW;
2633 count = 0;
2634 }
2635 COPY_BUF(l,buf,len,cur);
2636 NEXTL(l);
2637 cur = CUR_CHAR(l);
2638 if (cur == 0) {
2639 GROW;
2640 SHRINK;
2641 cur = CUR_CHAR(l);
2642 }
2643 }
2644 buf[len] = 0;
2645 ctxt->instate = (xmlParserInputState) state;
2646 if (!IS_CHAR(cur)) {
2647 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2649 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2650 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002651 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002652 } else {
2653 NEXT;
2654 }
2655 return(buf);
2656}
2657
2658/**
2659 * xmlParsePubidLiteral:
2660 * @ctxt: an XML parser context
2661 *
2662 * parse an XML public literal
2663 *
2664 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2665 *
2666 * Returns the PubidLiteral parsed or NULL.
2667 */
2668
2669xmlChar *
2670xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2671 xmlChar *buf = NULL;
2672 int len = 0;
2673 int size = XML_PARSER_BUFFER_SIZE;
2674 xmlChar cur;
2675 xmlChar stop;
2676 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002677 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002678
2679 SHRINK;
2680 if (RAW == '"') {
2681 NEXT;
2682 stop = '"';
2683 } else if (RAW == '\'') {
2684 NEXT;
2685 stop = '\'';
2686 } else {
2687 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2689 ctxt->sax->error(ctxt->userData,
2690 "SystemLiteral \" or ' expected\n");
2691 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002692 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002693 return(NULL);
2694 }
2695 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2696 if (buf == NULL) {
2697 xmlGenericError(xmlGenericErrorContext,
2698 "malloc of %d byte failed\n", size);
2699 return(NULL);
2700 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002701 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002702 cur = CUR;
2703 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2704 if (len + 1 >= size) {
2705 size *= 2;
2706 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2707 if (buf == NULL) {
2708 xmlGenericError(xmlGenericErrorContext,
2709 "realloc of %d byte failed\n", size);
2710 return(NULL);
2711 }
2712 }
2713 buf[len++] = cur;
2714 count++;
2715 if (count > 50) {
2716 GROW;
2717 count = 0;
2718 }
2719 NEXT;
2720 cur = CUR;
2721 if (cur == 0) {
2722 GROW;
2723 SHRINK;
2724 cur = CUR;
2725 }
2726 }
2727 buf[len] = 0;
2728 if (cur != stop) {
2729 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2731 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2732 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002733 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002734 } else {
2735 NEXT;
2736 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002737 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002738 return(buf);
2739}
2740
Daniel Veillard48b2f892001-02-25 16:11:03 +00002741void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002742/**
2743 * xmlParseCharData:
2744 * @ctxt: an XML parser context
2745 * @cdata: int indicating whether we are within a CDATA section
2746 *
2747 * parse a CharData section.
2748 * if we are within a CDATA section ']]>' marks an end of section.
2749 *
2750 * The right angle bracket (>) may be represented using the string "&gt;",
2751 * and must, for compatibility, be escaped using "&gt;" or a character
2752 * reference when it appears in the string "]]>" in content, when that
2753 * string is not marking the end of a CDATA section.
2754 *
2755 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2756 */
2757
2758void
2759xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002760 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002761 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002762 int line = ctxt->input->line;
2763 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002764
2765 SHRINK;
2766 GROW;
2767 /*
2768 * Accelerated common case where input don't need to be
2769 * modified before passing it to the handler.
2770 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002771 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002772 in = ctxt->input->cur;
2773 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002774get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002775 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2776 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002777 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002778 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002779 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002780 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002781 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002782 ctxt->input->line++;
2783 in++;
2784 }
2785 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002786 }
2787 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002788 if ((in[1] == ']') && (in[2] == '>')) {
2789 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2791 ctxt->sax->error(ctxt->userData,
2792 "Sequence ']]>' not allowed in content\n");
2793 ctxt->input->cur = in;
2794 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002795 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002796 return;
2797 }
2798 in++;
2799 goto get_more;
2800 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002801 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002802 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002803 if (IS_BLANK(*ctxt->input->cur)) {
2804 const xmlChar *tmp = ctxt->input->cur;
2805 ctxt->input->cur = in;
2806 if (areBlanks(ctxt, tmp, nbchar)) {
2807 if (ctxt->sax->ignorableWhitespace != NULL)
2808 ctxt->sax->ignorableWhitespace(ctxt->userData,
2809 tmp, nbchar);
2810 } else {
2811 if (ctxt->sax->characters != NULL)
2812 ctxt->sax->characters(ctxt->userData,
2813 tmp, nbchar);
2814 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002815 line = ctxt->input->line;
2816 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002817 } else {
2818 if (ctxt->sax->characters != NULL)
2819 ctxt->sax->characters(ctxt->userData,
2820 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002821 line = ctxt->input->line;
2822 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002823 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002824 }
2825 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002826 if (*in == 0xD) {
2827 in++;
2828 if (*in == 0xA) {
2829 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002830 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002831 ctxt->input->line++;
2832 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002833 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002834 in--;
2835 }
2836 if (*in == '<') {
2837 return;
2838 }
2839 if (*in == '&') {
2840 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002841 }
2842 SHRINK;
2843 GROW;
2844 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002845 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002846 nbchar = 0;
2847 }
Daniel Veillard50582112001-03-26 22:52:16 +00002848 ctxt->input->line = line;
2849 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002850 xmlParseCharDataComplex(ctxt, cdata);
2851}
2852
Daniel Veillard01c13b52002-12-10 15:19:08 +00002853/**
2854 * xmlParseCharDataComplex:
2855 * @ctxt: an XML parser context
2856 * @cdata: int indicating whether we are within a CDATA section
2857 *
2858 * parse a CharData section.this is the fallback function
2859 * of xmlParseCharData() when the parsing requires handling
2860 * of non-ASCII characters.
2861 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002862void
2863xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002864 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2865 int nbchar = 0;
2866 int cur, l;
2867 int count = 0;
2868
2869 SHRINK;
2870 GROW;
2871 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002872 while ((cur != '<') && /* checked */
2873 (cur != '&') &&
2874 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002875 if ((cur == ']') && (NXT(1) == ']') &&
2876 (NXT(2) == '>')) {
2877 if (cdata) break;
2878 else {
2879 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2881 ctxt->sax->error(ctxt->userData,
2882 "Sequence ']]>' not allowed in content\n");
2883 /* Should this be relaxed ??? I see a "must here */
2884 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002885 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002886 }
2887 }
2888 COPY_BUF(l,buf,nbchar,cur);
2889 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2890 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002891 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002892 */
2893 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2894 if (areBlanks(ctxt, buf, nbchar)) {
2895 if (ctxt->sax->ignorableWhitespace != NULL)
2896 ctxt->sax->ignorableWhitespace(ctxt->userData,
2897 buf, nbchar);
2898 } else {
2899 if (ctxt->sax->characters != NULL)
2900 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2901 }
2902 }
2903 nbchar = 0;
2904 }
2905 count++;
2906 if (count > 50) {
2907 GROW;
2908 count = 0;
2909 }
2910 NEXTL(l);
2911 cur = CUR_CHAR(l);
2912 }
2913 if (nbchar != 0) {
2914 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002915 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002916 */
2917 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2918 if (areBlanks(ctxt, buf, nbchar)) {
2919 if (ctxt->sax->ignorableWhitespace != NULL)
2920 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2921 } else {
2922 if (ctxt->sax->characters != NULL)
2923 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2924 }
2925 }
2926 }
2927}
2928
2929/**
2930 * xmlParseExternalID:
2931 * @ctxt: an XML parser context
2932 * @publicID: a xmlChar** receiving PubidLiteral
2933 * @strict: indicate whether we should restrict parsing to only
2934 * production [75], see NOTE below
2935 *
2936 * Parse an External ID or a Public ID
2937 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002938 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002939 * 'PUBLIC' S PubidLiteral S SystemLiteral
2940 *
2941 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2942 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2943 *
2944 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2945 *
2946 * Returns the function returns SystemLiteral and in the second
2947 * case publicID receives PubidLiteral, is strict is off
2948 * it is possible to return NULL and have publicID set.
2949 */
2950
2951xmlChar *
2952xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2953 xmlChar *URI = NULL;
2954
2955 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002956
2957 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002958 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2959 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2960 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2961 SKIP(6);
2962 if (!IS_BLANK(CUR)) {
2963 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2965 ctxt->sax->error(ctxt->userData,
2966 "Space required after 'SYSTEM'\n");
2967 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002968 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002969 }
2970 SKIP_BLANKS;
2971 URI = xmlParseSystemLiteral(ctxt);
2972 if (URI == NULL) {
2973 ctxt->errNo = XML_ERR_URI_REQUIRED;
2974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2975 ctxt->sax->error(ctxt->userData,
2976 "xmlParseExternalID: SYSTEM, no URI\n");
2977 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002978 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002979 }
2980 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2981 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2982 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2983 SKIP(6);
2984 if (!IS_BLANK(CUR)) {
2985 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2987 ctxt->sax->error(ctxt->userData,
2988 "Space required after 'PUBLIC'\n");
2989 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002990 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002991 }
2992 SKIP_BLANKS;
2993 *publicID = xmlParsePubidLiteral(ctxt);
2994 if (*publicID == NULL) {
2995 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2997 ctxt->sax->error(ctxt->userData,
2998 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2999 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003000 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003001 }
3002 if (strict) {
3003 /*
3004 * We don't handle [83] so "S SystemLiteral" is required.
3005 */
3006 if (!IS_BLANK(CUR)) {
3007 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3009 ctxt->sax->error(ctxt->userData,
3010 "Space required after the Public Identifier\n");
3011 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003012 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003013 }
3014 } else {
3015 /*
3016 * We handle [83] so we return immediately, if
3017 * "S SystemLiteral" is not detected. From a purely parsing
3018 * point of view that's a nice mess.
3019 */
3020 const xmlChar *ptr;
3021 GROW;
3022
3023 ptr = CUR_PTR;
3024 if (!IS_BLANK(*ptr)) return(NULL);
3025
3026 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3027 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3028 }
3029 SKIP_BLANKS;
3030 URI = xmlParseSystemLiteral(ctxt);
3031 if (URI == NULL) {
3032 ctxt->errNo = XML_ERR_URI_REQUIRED;
3033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3034 ctxt->sax->error(ctxt->userData,
3035 "xmlParseExternalID: PUBLIC, no URI\n");
3036 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003037 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003038 }
3039 }
3040 return(URI);
3041}
3042
3043/**
3044 * xmlParseComment:
3045 * @ctxt: an XML parser context
3046 *
3047 * Skip an XML (SGML) comment <!-- .... -->
3048 * The spec says that "For compatibility, the string "--" (double-hyphen)
3049 * must not occur within comments. "
3050 *
3051 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3052 */
3053void
3054xmlParseComment(xmlParserCtxtPtr ctxt) {
3055 xmlChar *buf = NULL;
3056 int len;
3057 int size = XML_PARSER_BUFFER_SIZE;
3058 int q, ql;
3059 int r, rl;
3060 int cur, l;
3061 xmlParserInputState state;
3062 xmlParserInputPtr input = ctxt->input;
3063 int count = 0;
3064
3065 /*
3066 * Check that there is a comment right here.
3067 */
3068 if ((RAW != '<') || (NXT(1) != '!') ||
3069 (NXT(2) != '-') || (NXT(3) != '-')) return;
3070
3071 state = ctxt->instate;
3072 ctxt->instate = XML_PARSER_COMMENT;
3073 SHRINK;
3074 SKIP(4);
3075 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3076 if (buf == NULL) {
3077 xmlGenericError(xmlGenericErrorContext,
3078 "malloc of %d byte failed\n", size);
3079 ctxt->instate = state;
3080 return;
3081 }
3082 q = CUR_CHAR(ql);
3083 NEXTL(ql);
3084 r = CUR_CHAR(rl);
3085 NEXTL(rl);
3086 cur = CUR_CHAR(l);
3087 len = 0;
3088 while (IS_CHAR(cur) && /* checked */
3089 ((cur != '>') ||
3090 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003091 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003092 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3094 ctxt->sax->error(ctxt->userData,
3095 "Comment must not contain '--' (double-hyphen)`\n");
3096 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003097 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003098 }
3099 if (len + 5 >= size) {
3100 size *= 2;
3101 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3102 if (buf == NULL) {
3103 xmlGenericError(xmlGenericErrorContext,
3104 "realloc of %d byte failed\n", size);
3105 ctxt->instate = state;
3106 return;
3107 }
3108 }
3109 COPY_BUF(ql,buf,len,q);
3110 q = r;
3111 ql = rl;
3112 r = cur;
3113 rl = l;
3114
3115 count++;
3116 if (count > 50) {
3117 GROW;
3118 count = 0;
3119 }
3120 NEXTL(l);
3121 cur = CUR_CHAR(l);
3122 if (cur == 0) {
3123 SHRINK;
3124 GROW;
3125 cur = CUR_CHAR(l);
3126 }
3127 }
3128 buf[len] = 0;
3129 if (!IS_CHAR(cur)) {
3130 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3132 ctxt->sax->error(ctxt->userData,
3133 "Comment not terminated \n<!--%.50s\n", buf);
3134 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003135 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003136 xmlFree(buf);
3137 } else {
3138 if (input != ctxt->input) {
3139 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3141 ctxt->sax->error(ctxt->userData,
3142"Comment doesn't start and stop in the same entity\n");
3143 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003144 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003145 }
3146 NEXT;
3147 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3148 (!ctxt->disableSAX))
3149 ctxt->sax->comment(ctxt->userData, buf);
3150 xmlFree(buf);
3151 }
3152 ctxt->instate = state;
3153}
3154
3155/**
3156 * xmlParsePITarget:
3157 * @ctxt: an XML parser context
3158 *
3159 * parse the name of a PI
3160 *
3161 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3162 *
3163 * Returns the PITarget name or NULL
3164 */
3165
3166xmlChar *
3167xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3168 xmlChar *name;
3169
3170 name = xmlParseName(ctxt);
3171 if ((name != NULL) &&
3172 ((name[0] == 'x') || (name[0] == 'X')) &&
3173 ((name[1] == 'm') || (name[1] == 'M')) &&
3174 ((name[2] == 'l') || (name[2] == 'L'))) {
3175 int i;
3176 if ((name[0] == 'x') && (name[1] == 'm') &&
3177 (name[2] == 'l') && (name[3] == 0)) {
3178 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3179 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3180 ctxt->sax->error(ctxt->userData,
3181 "XML declaration allowed only at the start of the document\n");
3182 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003183 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003184 return(name);
3185 } else if (name[3] == 0) {
3186 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3188 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3189 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003190 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003191 return(name);
3192 }
3193 for (i = 0;;i++) {
3194 if (xmlW3CPIs[i] == NULL) break;
3195 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3196 return(name);
3197 }
3198 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3199 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3200 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003201 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003202 }
3203 }
3204 return(name);
3205}
3206
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003207#ifdef LIBXML_CATALOG_ENABLED
3208/**
3209 * xmlParseCatalogPI:
3210 * @ctxt: an XML parser context
3211 * @catalog: the PI value string
3212 *
3213 * parse an XML Catalog Processing Instruction.
3214 *
3215 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3216 *
3217 * Occurs only if allowed by the user and if happening in the Misc
3218 * part of the document before any doctype informations
3219 * This will add the given catalog to the parsing context in order
3220 * to be used if there is a resolution need further down in the document
3221 */
3222
3223static void
3224xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3225 xmlChar *URL = NULL;
3226 const xmlChar *tmp, *base;
3227 xmlChar marker;
3228
3229 tmp = catalog;
3230 while (IS_BLANK(*tmp)) tmp++;
3231 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3232 goto error;
3233 tmp += 7;
3234 while (IS_BLANK(*tmp)) tmp++;
3235 if (*tmp != '=') {
3236 return;
3237 }
3238 tmp++;
3239 while (IS_BLANK(*tmp)) tmp++;
3240 marker = *tmp;
3241 if ((marker != '\'') && (marker != '"'))
3242 goto error;
3243 tmp++;
3244 base = tmp;
3245 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3246 if (*tmp == 0)
3247 goto error;
3248 URL = xmlStrndup(base, tmp - base);
3249 tmp++;
3250 while (IS_BLANK(*tmp)) tmp++;
3251 if (*tmp != 0)
3252 goto error;
3253
3254 if (URL != NULL) {
3255 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3256 xmlFree(URL);
3257 }
3258 return;
3259
3260error:
3261 ctxt->errNo = XML_WAR_CATALOG_PI;
3262 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3263 ctxt->sax->warning(ctxt->userData,
3264 "Catalog PI syntax error: %s\n", catalog);
3265 if (URL != NULL)
3266 xmlFree(URL);
3267}
3268#endif
3269
Owen Taylor3473f882001-02-23 17:55:21 +00003270/**
3271 * xmlParsePI:
3272 * @ctxt: an XML parser context
3273 *
3274 * parse an XML Processing Instruction.
3275 *
3276 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3277 *
3278 * The processing is transfered to SAX once parsed.
3279 */
3280
3281void
3282xmlParsePI(xmlParserCtxtPtr ctxt) {
3283 xmlChar *buf = NULL;
3284 int len = 0;
3285 int size = XML_PARSER_BUFFER_SIZE;
3286 int cur, l;
3287 xmlChar *target;
3288 xmlParserInputState state;
3289 int count = 0;
3290
3291 if ((RAW == '<') && (NXT(1) == '?')) {
3292 xmlParserInputPtr input = ctxt->input;
3293 state = ctxt->instate;
3294 ctxt->instate = XML_PARSER_PI;
3295 /*
3296 * this is a Processing Instruction.
3297 */
3298 SKIP(2);
3299 SHRINK;
3300
3301 /*
3302 * Parse the target name and check for special support like
3303 * namespace.
3304 */
3305 target = xmlParsePITarget(ctxt);
3306 if (target != NULL) {
3307 if ((RAW == '?') && (NXT(1) == '>')) {
3308 if (input != ctxt->input) {
3309 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3311 ctxt->sax->error(ctxt->userData,
3312 "PI declaration doesn't start and stop in the same entity\n");
3313 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003314 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003315 }
3316 SKIP(2);
3317
3318 /*
3319 * SAX: PI detected.
3320 */
3321 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3322 (ctxt->sax->processingInstruction != NULL))
3323 ctxt->sax->processingInstruction(ctxt->userData,
3324 target, NULL);
3325 ctxt->instate = state;
3326 xmlFree(target);
3327 return;
3328 }
3329 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3330 if (buf == NULL) {
3331 xmlGenericError(xmlGenericErrorContext,
3332 "malloc of %d byte failed\n", size);
3333 ctxt->instate = state;
3334 return;
3335 }
3336 cur = CUR;
3337 if (!IS_BLANK(cur)) {
3338 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3340 ctxt->sax->error(ctxt->userData,
3341 "xmlParsePI: PI %s space expected\n", target);
3342 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003343 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003344 }
3345 SKIP_BLANKS;
3346 cur = CUR_CHAR(l);
3347 while (IS_CHAR(cur) && /* checked */
3348 ((cur != '?') || (NXT(1) != '>'))) {
3349 if (len + 5 >= size) {
3350 size *= 2;
3351 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3352 if (buf == NULL) {
3353 xmlGenericError(xmlGenericErrorContext,
3354 "realloc of %d byte failed\n", size);
3355 ctxt->instate = state;
3356 return;
3357 }
3358 }
3359 count++;
3360 if (count > 50) {
3361 GROW;
3362 count = 0;
3363 }
3364 COPY_BUF(l,buf,len,cur);
3365 NEXTL(l);
3366 cur = CUR_CHAR(l);
3367 if (cur == 0) {
3368 SHRINK;
3369 GROW;
3370 cur = CUR_CHAR(l);
3371 }
3372 }
3373 buf[len] = 0;
3374 if (cur != '?') {
3375 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3377 ctxt->sax->error(ctxt->userData,
3378 "xmlParsePI: PI %s never end ...\n", target);
3379 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003380 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003381 } else {
3382 if (input != ctxt->input) {
3383 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3385 ctxt->sax->error(ctxt->userData,
3386 "PI declaration doesn't start and stop in the same entity\n");
3387 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003388 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003389 }
3390 SKIP(2);
3391
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003392#ifdef LIBXML_CATALOG_ENABLED
3393 if (((state == XML_PARSER_MISC) ||
3394 (state == XML_PARSER_START)) &&
3395 (xmlStrEqual(target, XML_CATALOG_PI))) {
3396 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3397 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3398 (allow == XML_CATA_ALLOW_ALL))
3399 xmlParseCatalogPI(ctxt, buf);
3400 }
3401#endif
3402
3403
Owen Taylor3473f882001-02-23 17:55:21 +00003404 /*
3405 * SAX: PI detected.
3406 */
3407 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3408 (ctxt->sax->processingInstruction != NULL))
3409 ctxt->sax->processingInstruction(ctxt->userData,
3410 target, buf);
3411 }
3412 xmlFree(buf);
3413 xmlFree(target);
3414 } else {
3415 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3417 ctxt->sax->error(ctxt->userData,
3418 "xmlParsePI : no target name\n");
3419 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003420 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003421 }
3422 ctxt->instate = state;
3423 }
3424}
3425
3426/**
3427 * xmlParseNotationDecl:
3428 * @ctxt: an XML parser context
3429 *
3430 * parse a notation declaration
3431 *
3432 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3433 *
3434 * Hence there is actually 3 choices:
3435 * 'PUBLIC' S PubidLiteral
3436 * 'PUBLIC' S PubidLiteral S SystemLiteral
3437 * and 'SYSTEM' S SystemLiteral
3438 *
3439 * See the NOTE on xmlParseExternalID().
3440 */
3441
3442void
3443xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3444 xmlChar *name;
3445 xmlChar *Pubid;
3446 xmlChar *Systemid;
3447
3448 if ((RAW == '<') && (NXT(1) == '!') &&
3449 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3450 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3451 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3452 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3453 xmlParserInputPtr input = ctxt->input;
3454 SHRINK;
3455 SKIP(10);
3456 if (!IS_BLANK(CUR)) {
3457 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3459 ctxt->sax->error(ctxt->userData,
3460 "Space required after '<!NOTATION'\n");
3461 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003462 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003463 return;
3464 }
3465 SKIP_BLANKS;
3466
Daniel Veillard76d66f42001-05-16 21:05:17 +00003467 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003468 if (name == NULL) {
3469 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3471 ctxt->sax->error(ctxt->userData,
3472 "NOTATION: Name expected here\n");
3473 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003474 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003475 return;
3476 }
3477 if (!IS_BLANK(CUR)) {
3478 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3480 ctxt->sax->error(ctxt->userData,
3481 "Space required after the NOTATION name'\n");
3482 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003483 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003484 return;
3485 }
3486 SKIP_BLANKS;
3487
3488 /*
3489 * Parse the IDs.
3490 */
3491 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3492 SKIP_BLANKS;
3493
3494 if (RAW == '>') {
3495 if (input != ctxt->input) {
3496 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3498 ctxt->sax->error(ctxt->userData,
3499"Notation declaration doesn't start and stop in the same entity\n");
3500 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003501 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003502 }
3503 NEXT;
3504 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3505 (ctxt->sax->notationDecl != NULL))
3506 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3507 } else {
3508 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3510 ctxt->sax->error(ctxt->userData,
3511 "'>' required to close NOTATION declaration\n");
3512 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003513 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003514 }
3515 xmlFree(name);
3516 if (Systemid != NULL) xmlFree(Systemid);
3517 if (Pubid != NULL) xmlFree(Pubid);
3518 }
3519}
3520
3521/**
3522 * xmlParseEntityDecl:
3523 * @ctxt: an XML parser context
3524 *
3525 * parse <!ENTITY declarations
3526 *
3527 * [70] EntityDecl ::= GEDecl | PEDecl
3528 *
3529 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3530 *
3531 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3532 *
3533 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3534 *
3535 * [74] PEDef ::= EntityValue | ExternalID
3536 *
3537 * [76] NDataDecl ::= S 'NDATA' S Name
3538 *
3539 * [ VC: Notation Declared ]
3540 * The Name must match the declared name of a notation.
3541 */
3542
3543void
3544xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3545 xmlChar *name = NULL;
3546 xmlChar *value = NULL;
3547 xmlChar *URI = NULL, *literal = NULL;
3548 xmlChar *ndata = NULL;
3549 int isParameter = 0;
3550 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003551 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003552
3553 GROW;
3554 if ((RAW == '<') && (NXT(1) == '!') &&
3555 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3556 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3557 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3558 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003559 SHRINK;
3560 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003561 skipped = SKIP_BLANKS;
3562 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003563 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3565 ctxt->sax->error(ctxt->userData,
3566 "Space required after '<!ENTITY'\n");
3567 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003568 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003569 }
Owen Taylor3473f882001-02-23 17:55:21 +00003570
3571 if (RAW == '%') {
3572 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003573 skipped = SKIP_BLANKS;
3574 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003575 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3577 ctxt->sax->error(ctxt->userData,
3578 "Space required after '%'\n");
3579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003581 }
Owen Taylor3473f882001-02-23 17:55:21 +00003582 isParameter = 1;
3583 }
3584
Daniel Veillard76d66f42001-05-16 21:05:17 +00003585 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003586 if (name == NULL) {
3587 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3589 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3590 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003591 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003592 return;
3593 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003594 skipped = SKIP_BLANKS;
3595 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003596 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3598 ctxt->sax->error(ctxt->userData,
3599 "Space required after the entity name\n");
3600 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003601 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003602 }
Owen Taylor3473f882001-02-23 17:55:21 +00003603
Daniel Veillardf5582f12002-06-11 10:08:16 +00003604 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003605 /*
3606 * handle the various case of definitions...
3607 */
3608 if (isParameter) {
3609 if ((RAW == '"') || (RAW == '\'')) {
3610 value = xmlParseEntityValue(ctxt, &orig);
3611 if (value) {
3612 if ((ctxt->sax != NULL) &&
3613 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3614 ctxt->sax->entityDecl(ctxt->userData, name,
3615 XML_INTERNAL_PARAMETER_ENTITY,
3616 NULL, NULL, value);
3617 }
3618 } else {
3619 URI = xmlParseExternalID(ctxt, &literal, 1);
3620 if ((URI == NULL) && (literal == NULL)) {
3621 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3623 ctxt->sax->error(ctxt->userData,
3624 "Entity value required\n");
3625 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003626 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003627 }
3628 if (URI) {
3629 xmlURIPtr uri;
3630
3631 uri = xmlParseURI((const char *) URI);
3632 if (uri == NULL) {
3633 ctxt->errNo = XML_ERR_INVALID_URI;
3634 if ((ctxt->sax != NULL) &&
3635 (!ctxt->disableSAX) &&
3636 (ctxt->sax->error != NULL))
3637 ctxt->sax->error(ctxt->userData,
3638 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003639 /*
3640 * This really ought to be a well formedness error
3641 * but the XML Core WG decided otherwise c.f. issue
3642 * E26 of the XML erratas.
3643 */
Owen Taylor3473f882001-02-23 17:55:21 +00003644 } else {
3645 if (uri->fragment != NULL) {
3646 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3647 if ((ctxt->sax != NULL) &&
3648 (!ctxt->disableSAX) &&
3649 (ctxt->sax->error != NULL))
3650 ctxt->sax->error(ctxt->userData,
3651 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003652 /*
3653 * Okay this is foolish to block those but not
3654 * invalid URIs.
3655 */
Owen Taylor3473f882001-02-23 17:55:21 +00003656 ctxt->wellFormed = 0;
3657 } else {
3658 if ((ctxt->sax != NULL) &&
3659 (!ctxt->disableSAX) &&
3660 (ctxt->sax->entityDecl != NULL))
3661 ctxt->sax->entityDecl(ctxt->userData, name,
3662 XML_EXTERNAL_PARAMETER_ENTITY,
3663 literal, URI, NULL);
3664 }
3665 xmlFreeURI(uri);
3666 }
3667 }
3668 }
3669 } else {
3670 if ((RAW == '"') || (RAW == '\'')) {
3671 value = xmlParseEntityValue(ctxt, &orig);
3672 if ((ctxt->sax != NULL) &&
3673 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3674 ctxt->sax->entityDecl(ctxt->userData, name,
3675 XML_INTERNAL_GENERAL_ENTITY,
3676 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003677 /*
3678 * For expat compatibility in SAX mode.
3679 */
3680 if ((ctxt->myDoc == NULL) ||
3681 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3682 if (ctxt->myDoc == NULL) {
3683 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3684 }
3685 if (ctxt->myDoc->intSubset == NULL)
3686 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3687 BAD_CAST "fake", NULL, NULL);
3688
3689 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3690 NULL, NULL, value);
3691 }
Owen Taylor3473f882001-02-23 17:55:21 +00003692 } else {
3693 URI = xmlParseExternalID(ctxt, &literal, 1);
3694 if ((URI == NULL) && (literal == NULL)) {
3695 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3697 ctxt->sax->error(ctxt->userData,
3698 "Entity value required\n");
3699 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003700 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003701 }
3702 if (URI) {
3703 xmlURIPtr uri;
3704
3705 uri = xmlParseURI((const char *)URI);
3706 if (uri == NULL) {
3707 ctxt->errNo = XML_ERR_INVALID_URI;
3708 if ((ctxt->sax != NULL) &&
3709 (!ctxt->disableSAX) &&
3710 (ctxt->sax->error != NULL))
3711 ctxt->sax->error(ctxt->userData,
3712 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003713 /*
3714 * This really ought to be a well formedness error
3715 * but the XML Core WG decided otherwise c.f. issue
3716 * E26 of the XML erratas.
3717 */
Owen Taylor3473f882001-02-23 17:55:21 +00003718 } else {
3719 if (uri->fragment != NULL) {
3720 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3721 if ((ctxt->sax != NULL) &&
3722 (!ctxt->disableSAX) &&
3723 (ctxt->sax->error != NULL))
3724 ctxt->sax->error(ctxt->userData,
3725 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003726 /*
3727 * Okay this is foolish to block those but not
3728 * invalid URIs.
3729 */
Owen Taylor3473f882001-02-23 17:55:21 +00003730 ctxt->wellFormed = 0;
3731 }
3732 xmlFreeURI(uri);
3733 }
3734 }
3735 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3736 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3738 ctxt->sax->error(ctxt->userData,
3739 "Space required before 'NDATA'\n");
3740 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003741 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003742 }
3743 SKIP_BLANKS;
3744 if ((RAW == 'N') && (NXT(1) == 'D') &&
3745 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3746 (NXT(4) == 'A')) {
3747 SKIP(5);
3748 if (!IS_BLANK(CUR)) {
3749 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3751 ctxt->sax->error(ctxt->userData,
3752 "Space required after 'NDATA'\n");
3753 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003754 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003755 }
3756 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003757 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003758 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3759 (ctxt->sax->unparsedEntityDecl != NULL))
3760 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3761 literal, URI, ndata);
3762 } else {
3763 if ((ctxt->sax != NULL) &&
3764 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3765 ctxt->sax->entityDecl(ctxt->userData, name,
3766 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3767 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003768 /*
3769 * For expat compatibility in SAX mode.
3770 * assuming the entity repalcement was asked for
3771 */
3772 if ((ctxt->replaceEntities != 0) &&
3773 ((ctxt->myDoc == NULL) ||
3774 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3775 if (ctxt->myDoc == NULL) {
3776 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3777 }
3778
3779 if (ctxt->myDoc->intSubset == NULL)
3780 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3781 BAD_CAST "fake", NULL, NULL);
3782 entityDecl(ctxt, name,
3783 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3784 literal, URI, NULL);
3785 }
Owen Taylor3473f882001-02-23 17:55:21 +00003786 }
3787 }
3788 }
3789 SKIP_BLANKS;
3790 if (RAW != '>') {
3791 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3793 ctxt->sax->error(ctxt->userData,
3794 "xmlParseEntityDecl: entity %s not terminated\n", name);
3795 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003796 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003797 } else {
3798 if (input != ctxt->input) {
3799 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3801 ctxt->sax->error(ctxt->userData,
3802"Entity declaration doesn't start and stop in the same entity\n");
3803 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003804 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003805 }
3806 NEXT;
3807 }
3808 if (orig != NULL) {
3809 /*
3810 * Ugly mechanism to save the raw entity value.
3811 */
3812 xmlEntityPtr cur = NULL;
3813
3814 if (isParameter) {
3815 if ((ctxt->sax != NULL) &&
3816 (ctxt->sax->getParameterEntity != NULL))
3817 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3818 } else {
3819 if ((ctxt->sax != NULL) &&
3820 (ctxt->sax->getEntity != NULL))
3821 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003822 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3823 cur = getEntity(ctxt, name);
3824 }
Owen Taylor3473f882001-02-23 17:55:21 +00003825 }
3826 if (cur != NULL) {
3827 if (cur->orig != NULL)
3828 xmlFree(orig);
3829 else
3830 cur->orig = orig;
3831 } else
3832 xmlFree(orig);
3833 }
3834 if (name != NULL) xmlFree(name);
3835 if (value != NULL) xmlFree(value);
3836 if (URI != NULL) xmlFree(URI);
3837 if (literal != NULL) xmlFree(literal);
3838 if (ndata != NULL) xmlFree(ndata);
3839 }
3840}
3841
3842/**
3843 * xmlParseDefaultDecl:
3844 * @ctxt: an XML parser context
3845 * @value: Receive a possible fixed default value for the attribute
3846 *
3847 * Parse an attribute default declaration
3848 *
3849 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3850 *
3851 * [ VC: Required Attribute ]
3852 * if the default declaration is the keyword #REQUIRED, then the
3853 * attribute must be specified for all elements of the type in the
3854 * attribute-list declaration.
3855 *
3856 * [ VC: Attribute Default Legal ]
3857 * The declared default value must meet the lexical constraints of
3858 * the declared attribute type c.f. xmlValidateAttributeDecl()
3859 *
3860 * [ VC: Fixed Attribute Default ]
3861 * if an attribute has a default value declared with the #FIXED
3862 * keyword, instances of that attribute must match the default value.
3863 *
3864 * [ WFC: No < in Attribute Values ]
3865 * handled in xmlParseAttValue()
3866 *
3867 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3868 * or XML_ATTRIBUTE_FIXED.
3869 */
3870
3871int
3872xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3873 int val;
3874 xmlChar *ret;
3875
3876 *value = NULL;
3877 if ((RAW == '#') && (NXT(1) == 'R') &&
3878 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3879 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3880 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3881 (NXT(8) == 'D')) {
3882 SKIP(9);
3883 return(XML_ATTRIBUTE_REQUIRED);
3884 }
3885 if ((RAW == '#') && (NXT(1) == 'I') &&
3886 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3887 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3888 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3889 SKIP(8);
3890 return(XML_ATTRIBUTE_IMPLIED);
3891 }
3892 val = XML_ATTRIBUTE_NONE;
3893 if ((RAW == '#') && (NXT(1) == 'F') &&
3894 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3895 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3896 SKIP(6);
3897 val = XML_ATTRIBUTE_FIXED;
3898 if (!IS_BLANK(CUR)) {
3899 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3901 ctxt->sax->error(ctxt->userData,
3902 "Space required after '#FIXED'\n");
3903 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003904 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003905 }
3906 SKIP_BLANKS;
3907 }
3908 ret = xmlParseAttValue(ctxt);
3909 ctxt->instate = XML_PARSER_DTD;
3910 if (ret == NULL) {
3911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3912 ctxt->sax->error(ctxt->userData,
3913 "Attribute default value declaration error\n");
3914 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003915 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003916 } else
3917 *value = ret;
3918 return(val);
3919}
3920
3921/**
3922 * xmlParseNotationType:
3923 * @ctxt: an XML parser context
3924 *
3925 * parse an Notation attribute type.
3926 *
3927 * Note: the leading 'NOTATION' S part has already being parsed...
3928 *
3929 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3930 *
3931 * [ VC: Notation Attributes ]
3932 * Values of this type must match one of the notation names included
3933 * in the declaration; all notation names in the declaration must be declared.
3934 *
3935 * Returns: the notation attribute tree built while parsing
3936 */
3937
3938xmlEnumerationPtr
3939xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3940 xmlChar *name;
3941 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3942
3943 if (RAW != '(') {
3944 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3946 ctxt->sax->error(ctxt->userData,
3947 "'(' required to start 'NOTATION'\n");
3948 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003949 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003950 return(NULL);
3951 }
3952 SHRINK;
3953 do {
3954 NEXT;
3955 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003956 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003957 if (name == NULL) {
3958 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3960 ctxt->sax->error(ctxt->userData,
3961 "Name expected in NOTATION declaration\n");
3962 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003963 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003964 return(ret);
3965 }
3966 cur = xmlCreateEnumeration(name);
3967 xmlFree(name);
3968 if (cur == NULL) return(ret);
3969 if (last == NULL) ret = last = cur;
3970 else {
3971 last->next = cur;
3972 last = cur;
3973 }
3974 SKIP_BLANKS;
3975 } while (RAW == '|');
3976 if (RAW != ')') {
3977 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3979 ctxt->sax->error(ctxt->userData,
3980 "')' required to finish NOTATION declaration\n");
3981 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003982 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003983 if ((last != NULL) && (last != ret))
3984 xmlFreeEnumeration(last);
3985 return(ret);
3986 }
3987 NEXT;
3988 return(ret);
3989}
3990
3991/**
3992 * xmlParseEnumerationType:
3993 * @ctxt: an XML parser context
3994 *
3995 * parse an Enumeration attribute type.
3996 *
3997 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3998 *
3999 * [ VC: Enumeration ]
4000 * Values of this type must match one of the Nmtoken tokens in
4001 * the declaration
4002 *
4003 * Returns: the enumeration attribute tree built while parsing
4004 */
4005
4006xmlEnumerationPtr
4007xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4008 xmlChar *name;
4009 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4010
4011 if (RAW != '(') {
4012 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4014 ctxt->sax->error(ctxt->userData,
4015 "'(' required to start ATTLIST enumeration\n");
4016 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004017 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004018 return(NULL);
4019 }
4020 SHRINK;
4021 do {
4022 NEXT;
4023 SKIP_BLANKS;
4024 name = xmlParseNmtoken(ctxt);
4025 if (name == NULL) {
4026 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4028 ctxt->sax->error(ctxt->userData,
4029 "NmToken expected in ATTLIST enumeration\n");
4030 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004031 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004032 return(ret);
4033 }
4034 cur = xmlCreateEnumeration(name);
4035 xmlFree(name);
4036 if (cur == NULL) return(ret);
4037 if (last == NULL) ret = last = cur;
4038 else {
4039 last->next = cur;
4040 last = cur;
4041 }
4042 SKIP_BLANKS;
4043 } while (RAW == '|');
4044 if (RAW != ')') {
4045 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4047 ctxt->sax->error(ctxt->userData,
4048 "')' required to finish ATTLIST enumeration\n");
4049 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004050 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004051 return(ret);
4052 }
4053 NEXT;
4054 return(ret);
4055}
4056
4057/**
4058 * xmlParseEnumeratedType:
4059 * @ctxt: an XML parser context
4060 * @tree: the enumeration tree built while parsing
4061 *
4062 * parse an Enumerated attribute type.
4063 *
4064 * [57] EnumeratedType ::= NotationType | Enumeration
4065 *
4066 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4067 *
4068 *
4069 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4070 */
4071
4072int
4073xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4074 if ((RAW == 'N') && (NXT(1) == 'O') &&
4075 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4076 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4077 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4078 SKIP(8);
4079 if (!IS_BLANK(CUR)) {
4080 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4082 ctxt->sax->error(ctxt->userData,
4083 "Space required after 'NOTATION'\n");
4084 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004085 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004086 return(0);
4087 }
4088 SKIP_BLANKS;
4089 *tree = xmlParseNotationType(ctxt);
4090 if (*tree == NULL) return(0);
4091 return(XML_ATTRIBUTE_NOTATION);
4092 }
4093 *tree = xmlParseEnumerationType(ctxt);
4094 if (*tree == NULL) return(0);
4095 return(XML_ATTRIBUTE_ENUMERATION);
4096}
4097
4098/**
4099 * xmlParseAttributeType:
4100 * @ctxt: an XML parser context
4101 * @tree: the enumeration tree built while parsing
4102 *
4103 * parse the Attribute list def for an element
4104 *
4105 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4106 *
4107 * [55] StringType ::= 'CDATA'
4108 *
4109 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4110 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4111 *
4112 * Validity constraints for attribute values syntax are checked in
4113 * xmlValidateAttributeValue()
4114 *
4115 * [ VC: ID ]
4116 * Values of type ID must match the Name production. A name must not
4117 * appear more than once in an XML document as a value of this type;
4118 * i.e., ID values must uniquely identify the elements which bear them.
4119 *
4120 * [ VC: One ID per Element Type ]
4121 * No element type may have more than one ID attribute specified.
4122 *
4123 * [ VC: ID Attribute Default ]
4124 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4125 *
4126 * [ VC: IDREF ]
4127 * Values of type IDREF must match the Name production, and values
4128 * of type IDREFS must match Names; each IDREF Name must match the value
4129 * of an ID attribute on some element in the XML document; i.e. IDREF
4130 * values must match the value of some ID attribute.
4131 *
4132 * [ VC: Entity Name ]
4133 * Values of type ENTITY must match the Name production, values
4134 * of type ENTITIES must match Names; each Entity Name must match the
4135 * name of an unparsed entity declared in the DTD.
4136 *
4137 * [ VC: Name Token ]
4138 * Values of type NMTOKEN must match the Nmtoken production; values
4139 * of type NMTOKENS must match Nmtokens.
4140 *
4141 * Returns the attribute type
4142 */
4143int
4144xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4145 SHRINK;
4146 if ((RAW == 'C') && (NXT(1) == 'D') &&
4147 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4148 (NXT(4) == 'A')) {
4149 SKIP(5);
4150 return(XML_ATTRIBUTE_CDATA);
4151 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4152 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4153 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4154 SKIP(6);
4155 return(XML_ATTRIBUTE_IDREFS);
4156 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4157 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4158 (NXT(4) == 'F')) {
4159 SKIP(5);
4160 return(XML_ATTRIBUTE_IDREF);
4161 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4162 SKIP(2);
4163 return(XML_ATTRIBUTE_ID);
4164 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4165 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4166 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4167 SKIP(6);
4168 return(XML_ATTRIBUTE_ENTITY);
4169 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4170 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4171 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4172 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4173 SKIP(8);
4174 return(XML_ATTRIBUTE_ENTITIES);
4175 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4176 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4177 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4178 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4179 SKIP(8);
4180 return(XML_ATTRIBUTE_NMTOKENS);
4181 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4182 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4183 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4184 (NXT(6) == 'N')) {
4185 SKIP(7);
4186 return(XML_ATTRIBUTE_NMTOKEN);
4187 }
4188 return(xmlParseEnumeratedType(ctxt, tree));
4189}
4190
4191/**
4192 * xmlParseAttributeListDecl:
4193 * @ctxt: an XML parser context
4194 *
4195 * : parse the Attribute list def for an element
4196 *
4197 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4198 *
4199 * [53] AttDef ::= S Name S AttType S DefaultDecl
4200 *
4201 */
4202void
4203xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4204 xmlChar *elemName;
4205 xmlChar *attrName;
4206 xmlEnumerationPtr tree;
4207
4208 if ((RAW == '<') && (NXT(1) == '!') &&
4209 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4210 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4211 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4212 (NXT(8) == 'T')) {
4213 xmlParserInputPtr input = ctxt->input;
4214
4215 SKIP(9);
4216 if (!IS_BLANK(CUR)) {
4217 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4219 ctxt->sax->error(ctxt->userData,
4220 "Space required after '<!ATTLIST'\n");
4221 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004223 }
4224 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004225 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004226 if (elemName == NULL) {
4227 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4229 ctxt->sax->error(ctxt->userData,
4230 "ATTLIST: no name for Element\n");
4231 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004232 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004233 return;
4234 }
4235 SKIP_BLANKS;
4236 GROW;
4237 while (RAW != '>') {
4238 const xmlChar *check = CUR_PTR;
4239 int type;
4240 int def;
4241 xmlChar *defaultValue = NULL;
4242
4243 GROW;
4244 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004245 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004246 if (attrName == NULL) {
4247 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4249 ctxt->sax->error(ctxt->userData,
4250 "ATTLIST: no name for Attribute\n");
4251 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004252 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004253 break;
4254 }
4255 GROW;
4256 if (!IS_BLANK(CUR)) {
4257 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4258 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4259 ctxt->sax->error(ctxt->userData,
4260 "Space required after the attribute name\n");
4261 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004262 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004263 if (attrName != NULL)
4264 xmlFree(attrName);
4265 if (defaultValue != NULL)
4266 xmlFree(defaultValue);
4267 break;
4268 }
4269 SKIP_BLANKS;
4270
4271 type = xmlParseAttributeType(ctxt, &tree);
4272 if (type <= 0) {
4273 if (attrName != NULL)
4274 xmlFree(attrName);
4275 if (defaultValue != NULL)
4276 xmlFree(defaultValue);
4277 break;
4278 }
4279
4280 GROW;
4281 if (!IS_BLANK(CUR)) {
4282 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4284 ctxt->sax->error(ctxt->userData,
4285 "Space required after the attribute type\n");
4286 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004287 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004288 if (attrName != NULL)
4289 xmlFree(attrName);
4290 if (defaultValue != NULL)
4291 xmlFree(defaultValue);
4292 if (tree != NULL)
4293 xmlFreeEnumeration(tree);
4294 break;
4295 }
4296 SKIP_BLANKS;
4297
4298 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4299 if (def <= 0) {
4300 if (attrName != NULL)
4301 xmlFree(attrName);
4302 if (defaultValue != NULL)
4303 xmlFree(defaultValue);
4304 if (tree != NULL)
4305 xmlFreeEnumeration(tree);
4306 break;
4307 }
4308
4309 GROW;
4310 if (RAW != '>') {
4311 if (!IS_BLANK(CUR)) {
4312 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4313 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4314 ctxt->sax->error(ctxt->userData,
4315 "Space required after the attribute default value\n");
4316 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004317 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004318 if (attrName != NULL)
4319 xmlFree(attrName);
4320 if (defaultValue != NULL)
4321 xmlFree(defaultValue);
4322 if (tree != NULL)
4323 xmlFreeEnumeration(tree);
4324 break;
4325 }
4326 SKIP_BLANKS;
4327 }
4328 if (check == CUR_PTR) {
4329 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4331 ctxt->sax->error(ctxt->userData,
4332 "xmlParseAttributeListDecl: detected internal error\n");
4333 if (attrName != NULL)
4334 xmlFree(attrName);
4335 if (defaultValue != NULL)
4336 xmlFree(defaultValue);
4337 if (tree != NULL)
4338 xmlFreeEnumeration(tree);
4339 break;
4340 }
4341 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4342 (ctxt->sax->attributeDecl != NULL))
4343 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4344 type, def, defaultValue, tree);
4345 if (attrName != NULL)
4346 xmlFree(attrName);
4347 if (defaultValue != NULL)
4348 xmlFree(defaultValue);
4349 GROW;
4350 }
4351 if (RAW == '>') {
4352 if (input != ctxt->input) {
4353 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4355 ctxt->sax->error(ctxt->userData,
4356"Attribute list declaration doesn't start and stop in the same entity\n");
4357 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004358 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004359 }
4360 NEXT;
4361 }
4362
4363 xmlFree(elemName);
4364 }
4365}
4366
4367/**
4368 * xmlParseElementMixedContentDecl:
4369 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004370 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004371 *
4372 * parse the declaration for a Mixed Element content
4373 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4374 *
4375 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4376 * '(' S? '#PCDATA' S? ')'
4377 *
4378 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4379 *
4380 * [ VC: No Duplicate Types ]
4381 * The same name must not appear more than once in a single
4382 * mixed-content declaration.
4383 *
4384 * returns: the list of the xmlElementContentPtr describing the element choices
4385 */
4386xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004387xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004388 xmlElementContentPtr ret = NULL, cur = NULL, n;
4389 xmlChar *elem = NULL;
4390
4391 GROW;
4392 if ((RAW == '#') && (NXT(1) == 'P') &&
4393 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4394 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4395 (NXT(6) == 'A')) {
4396 SKIP(7);
4397 SKIP_BLANKS;
4398 SHRINK;
4399 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004400 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4401 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4402 if (ctxt->vctxt.error != NULL)
4403 ctxt->vctxt.error(ctxt->vctxt.userData,
4404"Element content declaration doesn't start and stop in the same entity\n");
4405 ctxt->valid = 0;
4406 }
Owen Taylor3473f882001-02-23 17:55:21 +00004407 NEXT;
4408 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4409 if (RAW == '*') {
4410 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4411 NEXT;
4412 }
4413 return(ret);
4414 }
4415 if ((RAW == '(') || (RAW == '|')) {
4416 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4417 if (ret == NULL) return(NULL);
4418 }
4419 while (RAW == '|') {
4420 NEXT;
4421 if (elem == NULL) {
4422 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4423 if (ret == NULL) return(NULL);
4424 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004425 if (cur != NULL)
4426 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004427 cur = ret;
4428 } else {
4429 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4430 if (n == NULL) return(NULL);
4431 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004432 if (n->c1 != NULL)
4433 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004434 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004435 if (n != NULL)
4436 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004437 cur = n;
4438 xmlFree(elem);
4439 }
4440 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004441 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004442 if (elem == NULL) {
4443 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4445 ctxt->sax->error(ctxt->userData,
4446 "xmlParseElementMixedContentDecl : Name expected\n");
4447 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004448 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004449 xmlFreeElementContent(cur);
4450 return(NULL);
4451 }
4452 SKIP_BLANKS;
4453 GROW;
4454 }
4455 if ((RAW == ')') && (NXT(1) == '*')) {
4456 if (elem != NULL) {
4457 cur->c2 = xmlNewElementContent(elem,
4458 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004459 if (cur->c2 != NULL)
4460 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004461 xmlFree(elem);
4462 }
4463 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004464 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4465 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4466 if (ctxt->vctxt.error != NULL)
4467 ctxt->vctxt.error(ctxt->vctxt.userData,
4468"Element content declaration doesn't start and stop in the same entity\n");
4469 ctxt->valid = 0;
4470 }
Owen Taylor3473f882001-02-23 17:55:21 +00004471 SKIP(2);
4472 } else {
4473 if (elem != NULL) xmlFree(elem);
4474 xmlFreeElementContent(ret);
4475 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4477 ctxt->sax->error(ctxt->userData,
4478 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4479 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004480 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004481 return(NULL);
4482 }
4483
4484 } else {
4485 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4487 ctxt->sax->error(ctxt->userData,
4488 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4489 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004490 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004491 }
4492 return(ret);
4493}
4494
4495/**
4496 * xmlParseElementChildrenContentDecl:
4497 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004498 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004499 *
4500 * parse the declaration for a Mixed Element content
4501 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4502 *
4503 *
4504 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4505 *
4506 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4507 *
4508 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4509 *
4510 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4511 *
4512 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4513 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004514 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004515 * opening or closing parentheses in a choice, seq, or Mixed
4516 * construct is contained in the replacement text for a parameter
4517 * entity, both must be contained in the same replacement text. For
4518 * interoperability, if a parameter-entity reference appears in a
4519 * choice, seq, or Mixed construct, its replacement text should not
4520 * be empty, and neither the first nor last non-blank character of
4521 * the replacement text should be a connector (| or ,).
4522 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004523 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004524 * hierarchy.
4525 */
4526xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004527xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004528(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004529 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4530 xmlChar *elem;
4531 xmlChar type = 0;
4532
4533 SKIP_BLANKS;
4534 GROW;
4535 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004536 xmlParserInputPtr input = ctxt->input;
4537
Owen Taylor3473f882001-02-23 17:55:21 +00004538 /* Recurse on first child */
4539 NEXT;
4540 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004541 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004542 SKIP_BLANKS;
4543 GROW;
4544 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004545 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004546 if (elem == NULL) {
4547 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4549 ctxt->sax->error(ctxt->userData,
4550 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004553 return(NULL);
4554 }
4555 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4556 GROW;
4557 if (RAW == '?') {
4558 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4559 NEXT;
4560 } else if (RAW == '*') {
4561 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4562 NEXT;
4563 } else if (RAW == '+') {
4564 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4565 NEXT;
4566 } else {
4567 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4568 }
4569 xmlFree(elem);
4570 GROW;
4571 }
4572 SKIP_BLANKS;
4573 SHRINK;
4574 while (RAW != ')') {
4575 /*
4576 * Each loop we parse one separator and one element.
4577 */
4578 if (RAW == ',') {
4579 if (type == 0) type = CUR;
4580
4581 /*
4582 * Detect "Name | Name , Name" error
4583 */
4584 else if (type != CUR) {
4585 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4587 ctxt->sax->error(ctxt->userData,
4588 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4589 type);
4590 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004591 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004592 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004593 xmlFreeElementContent(last);
4594 if (ret != NULL)
4595 xmlFreeElementContent(ret);
4596 return(NULL);
4597 }
4598 NEXT;
4599
4600 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4601 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004602 if ((last != NULL) && (last != ret))
4603 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004604 xmlFreeElementContent(ret);
4605 return(NULL);
4606 }
4607 if (last == NULL) {
4608 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004609 if (ret != NULL)
4610 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004611 ret = cur = op;
4612 } else {
4613 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004614 if (op != NULL)
4615 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004616 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004617 if (last != NULL)
4618 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004619 cur =op;
4620 last = NULL;
4621 }
4622 } else if (RAW == '|') {
4623 if (type == 0) type = CUR;
4624
4625 /*
4626 * Detect "Name , Name | Name" error
4627 */
4628 else if (type != CUR) {
4629 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4631 ctxt->sax->error(ctxt->userData,
4632 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4633 type);
4634 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004635 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004636 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004637 xmlFreeElementContent(last);
4638 if (ret != NULL)
4639 xmlFreeElementContent(ret);
4640 return(NULL);
4641 }
4642 NEXT;
4643
4644 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4645 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004646 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004647 xmlFreeElementContent(last);
4648 if (ret != NULL)
4649 xmlFreeElementContent(ret);
4650 return(NULL);
4651 }
4652 if (last == NULL) {
4653 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004654 if (ret != NULL)
4655 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004656 ret = cur = op;
4657 } else {
4658 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004659 if (op != NULL)
4660 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004661 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004662 if (last != NULL)
4663 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004664 cur =op;
4665 last = NULL;
4666 }
4667 } else {
4668 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4670 ctxt->sax->error(ctxt->userData,
4671 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4672 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004673 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004674 if (ret != NULL)
4675 xmlFreeElementContent(ret);
4676 return(NULL);
4677 }
4678 GROW;
4679 SKIP_BLANKS;
4680 GROW;
4681 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004682 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004683 /* Recurse on second child */
4684 NEXT;
4685 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004686 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004687 SKIP_BLANKS;
4688 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004689 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004690 if (elem == NULL) {
4691 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4693 ctxt->sax->error(ctxt->userData,
4694 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4695 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004696 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004697 if (ret != NULL)
4698 xmlFreeElementContent(ret);
4699 return(NULL);
4700 }
4701 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4702 xmlFree(elem);
4703 if (RAW == '?') {
4704 last->ocur = XML_ELEMENT_CONTENT_OPT;
4705 NEXT;
4706 } else if (RAW == '*') {
4707 last->ocur = XML_ELEMENT_CONTENT_MULT;
4708 NEXT;
4709 } else if (RAW == '+') {
4710 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4711 NEXT;
4712 } else {
4713 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4714 }
4715 }
4716 SKIP_BLANKS;
4717 GROW;
4718 }
4719 if ((cur != NULL) && (last != NULL)) {
4720 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004721 if (last != NULL)
4722 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004723 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004724 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4725 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4726 if (ctxt->vctxt.error != NULL)
4727 ctxt->vctxt.error(ctxt->vctxt.userData,
4728"Element content declaration doesn't start and stop in the same entity\n");
4729 ctxt->valid = 0;
4730 }
Owen Taylor3473f882001-02-23 17:55:21 +00004731 NEXT;
4732 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004733 if (ret != NULL)
4734 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004735 NEXT;
4736 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004737 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004738 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004739 cur = ret;
4740 /*
4741 * Some normalization:
4742 * (a | b* | c?)* == (a | b | c)*
4743 */
4744 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4745 if ((cur->c1 != NULL) &&
4746 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4747 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4748 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4749 if ((cur->c2 != NULL) &&
4750 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4751 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4752 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4753 cur = cur->c2;
4754 }
4755 }
Owen Taylor3473f882001-02-23 17:55:21 +00004756 NEXT;
4757 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004758 if (ret != NULL) {
4759 int found = 0;
4760
Daniel Veillarde470df72001-04-18 21:41:07 +00004761 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004762 /*
4763 * Some normalization:
4764 * (a | b*)+ == (a | b)*
4765 * (a | b?)+ == (a | b)*
4766 */
4767 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4768 if ((cur->c1 != NULL) &&
4769 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4770 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4771 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4772 found = 1;
4773 }
4774 if ((cur->c2 != NULL) &&
4775 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4776 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4777 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4778 found = 1;
4779 }
4780 cur = cur->c2;
4781 }
4782 if (found)
4783 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4784 }
Owen Taylor3473f882001-02-23 17:55:21 +00004785 NEXT;
4786 }
4787 return(ret);
4788}
4789
4790/**
4791 * xmlParseElementContentDecl:
4792 * @ctxt: an XML parser context
4793 * @name: the name of the element being defined.
4794 * @result: the Element Content pointer will be stored here if any
4795 *
4796 * parse the declaration for an Element content either Mixed or Children,
4797 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4798 *
4799 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4800 *
4801 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4802 */
4803
4804int
4805xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4806 xmlElementContentPtr *result) {
4807
4808 xmlElementContentPtr tree = NULL;
4809 xmlParserInputPtr input = ctxt->input;
4810 int res;
4811
4812 *result = NULL;
4813
4814 if (RAW != '(') {
4815 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4817 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004818 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004819 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004820 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004821 return(-1);
4822 }
4823 NEXT;
4824 GROW;
4825 SKIP_BLANKS;
4826 if ((RAW == '#') && (NXT(1) == 'P') &&
4827 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4828 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4829 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004830 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004831 res = XML_ELEMENT_TYPE_MIXED;
4832 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004833 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004834 res = XML_ELEMENT_TYPE_ELEMENT;
4835 }
Owen Taylor3473f882001-02-23 17:55:21 +00004836 SKIP_BLANKS;
4837 *result = tree;
4838 return(res);
4839}
4840
4841/**
4842 * xmlParseElementDecl:
4843 * @ctxt: an XML parser context
4844 *
4845 * parse an Element declaration.
4846 *
4847 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4848 *
4849 * [ VC: Unique Element Type Declaration ]
4850 * No element type may be declared more than once
4851 *
4852 * Returns the type of the element, or -1 in case of error
4853 */
4854int
4855xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4856 xmlChar *name;
4857 int ret = -1;
4858 xmlElementContentPtr content = NULL;
4859
4860 GROW;
4861 if ((RAW == '<') && (NXT(1) == '!') &&
4862 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4863 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4864 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4865 (NXT(8) == 'T')) {
4866 xmlParserInputPtr input = ctxt->input;
4867
4868 SKIP(9);
4869 if (!IS_BLANK(CUR)) {
4870 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4872 ctxt->sax->error(ctxt->userData,
4873 "Space required after 'ELEMENT'\n");
4874 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004875 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004876 }
4877 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004878 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004879 if (name == NULL) {
4880 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4882 ctxt->sax->error(ctxt->userData,
4883 "xmlParseElementDecl: no name for Element\n");
4884 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004885 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004886 return(-1);
4887 }
4888 while ((RAW == 0) && (ctxt->inputNr > 1))
4889 xmlPopInput(ctxt);
4890 if (!IS_BLANK(CUR)) {
4891 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4893 ctxt->sax->error(ctxt->userData,
4894 "Space required after the element name\n");
4895 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004896 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004897 }
4898 SKIP_BLANKS;
4899 if ((RAW == 'E') && (NXT(1) == 'M') &&
4900 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4901 (NXT(4) == 'Y')) {
4902 SKIP(5);
4903 /*
4904 * Element must always be empty.
4905 */
4906 ret = XML_ELEMENT_TYPE_EMPTY;
4907 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4908 (NXT(2) == 'Y')) {
4909 SKIP(3);
4910 /*
4911 * Element is a generic container.
4912 */
4913 ret = XML_ELEMENT_TYPE_ANY;
4914 } else if (RAW == '(') {
4915 ret = xmlParseElementContentDecl(ctxt, name, &content);
4916 } else {
4917 /*
4918 * [ WFC: PEs in Internal Subset ] error handling.
4919 */
4920 if ((RAW == '%') && (ctxt->external == 0) &&
4921 (ctxt->inputNr == 1)) {
4922 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4924 ctxt->sax->error(ctxt->userData,
4925 "PEReference: forbidden within markup decl in internal subset\n");
4926 } else {
4927 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4929 ctxt->sax->error(ctxt->userData,
4930 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4931 }
4932 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004933 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004934 if (name != NULL) xmlFree(name);
4935 return(-1);
4936 }
4937
4938 SKIP_BLANKS;
4939 /*
4940 * Pop-up of finished entities.
4941 */
4942 while ((RAW == 0) && (ctxt->inputNr > 1))
4943 xmlPopInput(ctxt);
4944 SKIP_BLANKS;
4945
4946 if (RAW != '>') {
4947 ctxt->errNo = XML_ERR_GT_REQUIRED;
4948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4949 ctxt->sax->error(ctxt->userData,
4950 "xmlParseElementDecl: expected '>' at the end\n");
4951 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004952 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004953 } else {
4954 if (input != ctxt->input) {
4955 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4957 ctxt->sax->error(ctxt->userData,
4958"Element declaration doesn't start and stop in the same entity\n");
4959 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004960 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004961 }
4962
4963 NEXT;
4964 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4965 (ctxt->sax->elementDecl != NULL))
4966 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4967 content);
4968 }
4969 if (content != NULL) {
4970 xmlFreeElementContent(content);
4971 }
4972 if (name != NULL) {
4973 xmlFree(name);
4974 }
4975 }
4976 return(ret);
4977}
4978
4979/**
Owen Taylor3473f882001-02-23 17:55:21 +00004980 * xmlParseConditionalSections
4981 * @ctxt: an XML parser context
4982 *
4983 * [61] conditionalSect ::= includeSect | ignoreSect
4984 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4985 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4986 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4987 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4988 */
4989
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004990static void
Owen Taylor3473f882001-02-23 17:55:21 +00004991xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4992 SKIP(3);
4993 SKIP_BLANKS;
4994 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4995 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4996 (NXT(6) == 'E')) {
4997 SKIP(7);
4998 SKIP_BLANKS;
4999 if (RAW != '[') {
5000 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5001 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5002 ctxt->sax->error(ctxt->userData,
5003 "XML conditional section '[' expected\n");
5004 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005005 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005006 } else {
5007 NEXT;
5008 }
5009 if (xmlParserDebugEntities) {
5010 if ((ctxt->input != NULL) && (ctxt->input->filename))
5011 xmlGenericError(xmlGenericErrorContext,
5012 "%s(%d): ", ctxt->input->filename,
5013 ctxt->input->line);
5014 xmlGenericError(xmlGenericErrorContext,
5015 "Entering INCLUDE Conditional Section\n");
5016 }
5017
5018 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5019 (NXT(2) != '>'))) {
5020 const xmlChar *check = CUR_PTR;
5021 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005022
5023 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5024 xmlParseConditionalSections(ctxt);
5025 } else if (IS_BLANK(CUR)) {
5026 NEXT;
5027 } else if (RAW == '%') {
5028 xmlParsePEReference(ctxt);
5029 } else
5030 xmlParseMarkupDecl(ctxt);
5031
5032 /*
5033 * Pop-up of finished entities.
5034 */
5035 while ((RAW == 0) && (ctxt->inputNr > 1))
5036 xmlPopInput(ctxt);
5037
Daniel Veillardfdc91562002-07-01 21:52:03 +00005038 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005039 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5041 ctxt->sax->error(ctxt->userData,
5042 "Content error in the external subset\n");
5043 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005044 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005045 break;
5046 }
5047 }
5048 if (xmlParserDebugEntities) {
5049 if ((ctxt->input != NULL) && (ctxt->input->filename))
5050 xmlGenericError(xmlGenericErrorContext,
5051 "%s(%d): ", ctxt->input->filename,
5052 ctxt->input->line);
5053 xmlGenericError(xmlGenericErrorContext,
5054 "Leaving INCLUDE Conditional Section\n");
5055 }
5056
5057 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5058 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5059 int state;
5060 int instate;
5061 int depth = 0;
5062
5063 SKIP(6);
5064 SKIP_BLANKS;
5065 if (RAW != '[') {
5066 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5067 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5068 ctxt->sax->error(ctxt->userData,
5069 "XML conditional section '[' expected\n");
5070 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005071 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005072 } else {
5073 NEXT;
5074 }
5075 if (xmlParserDebugEntities) {
5076 if ((ctxt->input != NULL) && (ctxt->input->filename))
5077 xmlGenericError(xmlGenericErrorContext,
5078 "%s(%d): ", ctxt->input->filename,
5079 ctxt->input->line);
5080 xmlGenericError(xmlGenericErrorContext,
5081 "Entering IGNORE Conditional Section\n");
5082 }
5083
5084 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005085 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005086 * But disable SAX event generating DTD building in the meantime
5087 */
5088 state = ctxt->disableSAX;
5089 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005090 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005091 ctxt->instate = XML_PARSER_IGNORE;
5092
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005093 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005094 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5095 depth++;
5096 SKIP(3);
5097 continue;
5098 }
5099 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5100 if (--depth >= 0) SKIP(3);
5101 continue;
5102 }
5103 NEXT;
5104 continue;
5105 }
5106
5107 ctxt->disableSAX = state;
5108 ctxt->instate = instate;
5109
5110 if (xmlParserDebugEntities) {
5111 if ((ctxt->input != NULL) && (ctxt->input->filename))
5112 xmlGenericError(xmlGenericErrorContext,
5113 "%s(%d): ", ctxt->input->filename,
5114 ctxt->input->line);
5115 xmlGenericError(xmlGenericErrorContext,
5116 "Leaving IGNORE Conditional Section\n");
5117 }
5118
5119 } else {
5120 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5122 ctxt->sax->error(ctxt->userData,
5123 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5124 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005125 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005126 }
5127
5128 if (RAW == 0)
5129 SHRINK;
5130
5131 if (RAW == 0) {
5132 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5133 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5134 ctxt->sax->error(ctxt->userData,
5135 "XML conditional section not closed\n");
5136 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005137 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005138 } else {
5139 SKIP(3);
5140 }
5141}
5142
5143/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005144 * xmlParseMarkupDecl:
5145 * @ctxt: an XML parser context
5146 *
5147 * parse Markup declarations
5148 *
5149 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5150 * NotationDecl | PI | Comment
5151 *
5152 * [ VC: Proper Declaration/PE Nesting ]
5153 * Parameter-entity replacement text must be properly nested with
5154 * markup declarations. That is to say, if either the first character
5155 * or the last character of a markup declaration (markupdecl above) is
5156 * contained in the replacement text for a parameter-entity reference,
5157 * both must be contained in the same replacement text.
5158 *
5159 * [ WFC: PEs in Internal Subset ]
5160 * In the internal DTD subset, parameter-entity references can occur
5161 * only where markup declarations can occur, not within markup declarations.
5162 * (This does not apply to references that occur in external parameter
5163 * entities or to the external subset.)
5164 */
5165void
5166xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5167 GROW;
5168 xmlParseElementDecl(ctxt);
5169 xmlParseAttributeListDecl(ctxt);
5170 xmlParseEntityDecl(ctxt);
5171 xmlParseNotationDecl(ctxt);
5172 xmlParsePI(ctxt);
5173 xmlParseComment(ctxt);
5174 /*
5175 * This is only for internal subset. On external entities,
5176 * the replacement is done before parsing stage
5177 */
5178 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5179 xmlParsePEReference(ctxt);
5180
5181 /*
5182 * Conditional sections are allowed from entities included
5183 * by PE References in the internal subset.
5184 */
5185 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5186 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5187 xmlParseConditionalSections(ctxt);
5188 }
5189 }
5190
5191 ctxt->instate = XML_PARSER_DTD;
5192}
5193
5194/**
5195 * xmlParseTextDecl:
5196 * @ctxt: an XML parser context
5197 *
5198 * parse an XML declaration header for external entities
5199 *
5200 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5201 *
5202 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5203 */
5204
5205void
5206xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5207 xmlChar *version;
5208
5209 /*
5210 * We know that '<?xml' is here.
5211 */
5212 if ((RAW == '<') && (NXT(1) == '?') &&
5213 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5214 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5215 SKIP(5);
5216 } else {
5217 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5219 ctxt->sax->error(ctxt->userData,
5220 "Text declaration '<?xml' required\n");
5221 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005223
5224 return;
5225 }
5226
5227 if (!IS_BLANK(CUR)) {
5228 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5230 ctxt->sax->error(ctxt->userData,
5231 "Space needed after '<?xml'\n");
5232 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005233 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005234 }
5235 SKIP_BLANKS;
5236
5237 /*
5238 * We may have the VersionInfo here.
5239 */
5240 version = xmlParseVersionInfo(ctxt);
5241 if (version == NULL)
5242 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005243 else {
5244 if (!IS_BLANK(CUR)) {
5245 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5247 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5248 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005249 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005250 }
5251 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005252 ctxt->input->version = version;
5253
5254 /*
5255 * We must have the encoding declaration
5256 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005257 xmlParseEncodingDecl(ctxt);
5258 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5259 /*
5260 * The XML REC instructs us to stop parsing right here
5261 */
5262 return;
5263 }
5264
5265 SKIP_BLANKS;
5266 if ((RAW == '?') && (NXT(1) == '>')) {
5267 SKIP(2);
5268 } else if (RAW == '>') {
5269 /* Deprecated old WD ... */
5270 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5272 ctxt->sax->error(ctxt->userData,
5273 "XML declaration must end-up with '?>'\n");
5274 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005275 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005276 NEXT;
5277 } else {
5278 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5280 ctxt->sax->error(ctxt->userData,
5281 "parsing XML declaration: '?>' expected\n");
5282 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005283 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005284 MOVETO_ENDTAG(CUR_PTR);
5285 NEXT;
5286 }
5287}
5288
5289/**
Owen Taylor3473f882001-02-23 17:55:21 +00005290 * xmlParseExternalSubset:
5291 * @ctxt: an XML parser context
5292 * @ExternalID: the external identifier
5293 * @SystemID: the system identifier (or URL)
5294 *
5295 * parse Markup declarations from an external subset
5296 *
5297 * [30] extSubset ::= textDecl? extSubsetDecl
5298 *
5299 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5300 */
5301void
5302xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5303 const xmlChar *SystemID) {
5304 GROW;
5305 if ((RAW == '<') && (NXT(1) == '?') &&
5306 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5307 (NXT(4) == 'l')) {
5308 xmlParseTextDecl(ctxt);
5309 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5310 /*
5311 * The XML REC instructs us to stop parsing right here
5312 */
5313 ctxt->instate = XML_PARSER_EOF;
5314 return;
5315 }
5316 }
5317 if (ctxt->myDoc == NULL) {
5318 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5319 }
5320 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5321 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5322
5323 ctxt->instate = XML_PARSER_DTD;
5324 ctxt->external = 1;
5325 while (((RAW == '<') && (NXT(1) == '?')) ||
5326 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005327 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005328 const xmlChar *check = CUR_PTR;
5329 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005330
5331 GROW;
5332 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5333 xmlParseConditionalSections(ctxt);
5334 } else if (IS_BLANK(CUR)) {
5335 NEXT;
5336 } else if (RAW == '%') {
5337 xmlParsePEReference(ctxt);
5338 } else
5339 xmlParseMarkupDecl(ctxt);
5340
5341 /*
5342 * Pop-up of finished entities.
5343 */
5344 while ((RAW == 0) && (ctxt->inputNr > 1))
5345 xmlPopInput(ctxt);
5346
Daniel Veillardfdc91562002-07-01 21:52:03 +00005347 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005348 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5350 ctxt->sax->error(ctxt->userData,
5351 "Content error in the external subset\n");
5352 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005353 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005354 break;
5355 }
5356 }
5357
5358 if (RAW != 0) {
5359 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5361 ctxt->sax->error(ctxt->userData,
5362 "Extra content at the end of the document\n");
5363 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005364 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005365 }
5366
5367}
5368
5369/**
5370 * xmlParseReference:
5371 * @ctxt: an XML parser context
5372 *
5373 * parse and handle entity references in content, depending on the SAX
5374 * interface, this may end-up in a call to character() if this is a
5375 * CharRef, a predefined entity, if there is no reference() callback.
5376 * or if the parser was asked to switch to that mode.
5377 *
5378 * [67] Reference ::= EntityRef | CharRef
5379 */
5380void
5381xmlParseReference(xmlParserCtxtPtr ctxt) {
5382 xmlEntityPtr ent;
5383 xmlChar *val;
5384 if (RAW != '&') return;
5385
5386 if (NXT(1) == '#') {
5387 int i = 0;
5388 xmlChar out[10];
5389 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005390 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005391
5392 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5393 /*
5394 * So we are using non-UTF-8 buffers
5395 * Check that the char fit on 8bits, if not
5396 * generate a CharRef.
5397 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005398 if (value <= 0xFF) {
5399 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005400 out[1] = 0;
5401 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5402 (!ctxt->disableSAX))
5403 ctxt->sax->characters(ctxt->userData, out, 1);
5404 } else {
5405 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005406 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005407 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005408 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005409 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5410 (!ctxt->disableSAX))
5411 ctxt->sax->reference(ctxt->userData, out);
5412 }
5413 } else {
5414 /*
5415 * Just encode the value in UTF-8
5416 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005417 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005418 out[i] = 0;
5419 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5420 (!ctxt->disableSAX))
5421 ctxt->sax->characters(ctxt->userData, out, i);
5422 }
5423 } else {
5424 ent = xmlParseEntityRef(ctxt);
5425 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005426 if (!ctxt->wellFormed)
5427 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005428 if ((ent->name != NULL) &&
5429 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5430 xmlNodePtr list = NULL;
5431 int ret;
5432
5433
5434 /*
5435 * The first reference to the entity trigger a parsing phase
5436 * where the ent->children is filled with the result from
5437 * the parsing.
5438 */
5439 if (ent->children == NULL) {
5440 xmlChar *value;
5441 value = ent->content;
5442
5443 /*
5444 * Check that this entity is well formed
5445 */
5446 if ((value != NULL) &&
5447 (value[1] == 0) && (value[0] == '<') &&
5448 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5449 /*
5450 * DONE: get definite answer on this !!!
5451 * Lots of entity decls are used to declare a single
5452 * char
5453 * <!ENTITY lt "<">
5454 * Which seems to be valid since
5455 * 2.4: The ampersand character (&) and the left angle
5456 * bracket (<) may appear in their literal form only
5457 * when used ... They are also legal within the literal
5458 * entity value of an internal entity declaration;i
5459 * see "4.3.2 Well-Formed Parsed Entities".
5460 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5461 * Looking at the OASIS test suite and James Clark
5462 * tests, this is broken. However the XML REC uses
5463 * it. Is the XML REC not well-formed ????
5464 * This is a hack to avoid this problem
5465 *
5466 * ANSWER: since lt gt amp .. are already defined,
5467 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005468 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005469 * is lousy but acceptable.
5470 */
5471 list = xmlNewDocText(ctxt->myDoc, value);
5472 if (list != NULL) {
5473 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5474 (ent->children == NULL)) {
5475 ent->children = list;
5476 ent->last = list;
5477 list->parent = (xmlNodePtr) ent;
5478 } else {
5479 xmlFreeNodeList(list);
5480 }
5481 } else if (list != NULL) {
5482 xmlFreeNodeList(list);
5483 }
5484 } else {
5485 /*
5486 * 4.3.2: An internal general parsed entity is well-formed
5487 * if its replacement text matches the production labeled
5488 * content.
5489 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005490
5491 void *user_data;
5492 /*
5493 * This is a bit hackish but this seems the best
5494 * way to make sure both SAX and DOM entity support
5495 * behaves okay.
5496 */
5497 if (ctxt->userData == ctxt)
5498 user_data = NULL;
5499 else
5500 user_data = ctxt->userData;
5501
Owen Taylor3473f882001-02-23 17:55:21 +00005502 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5503 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005504 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5505 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005506 ctxt->depth--;
5507 } else if (ent->etype ==
5508 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5509 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005510 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005511 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005512 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005513 ctxt->depth--;
5514 } else {
5515 ret = -1;
5516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5517 ctxt->sax->error(ctxt->userData,
5518 "Internal: invalid entity type\n");
5519 }
5520 if (ret == XML_ERR_ENTITY_LOOP) {
5521 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5523 ctxt->sax->error(ctxt->userData,
5524 "Detected entity reference loop\n");
5525 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005526 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005527 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005528 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005529 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5530 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005531 (ent->children == NULL)) {
5532 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005533 if (ctxt->replaceEntities) {
5534 /*
5535 * Prune it directly in the generated document
5536 * except for single text nodes.
5537 */
5538 if ((list->type == XML_TEXT_NODE) &&
5539 (list->next == NULL)) {
5540 list->parent = (xmlNodePtr) ent;
5541 list = NULL;
5542 } else {
5543 while (list != NULL) {
5544 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005545 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005546 if (list->next == NULL)
5547 ent->last = list;
5548 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005549 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005550 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005551 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5552 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005553 }
5554 } else {
5555 while (list != NULL) {
5556 list->parent = (xmlNodePtr) ent;
5557 if (list->next == NULL)
5558 ent->last = list;
5559 list = list->next;
5560 }
Owen Taylor3473f882001-02-23 17:55:21 +00005561 }
5562 } else {
5563 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005564 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005565 }
5566 } else if (ret > 0) {
5567 ctxt->errNo = ret;
5568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5569 ctxt->sax->error(ctxt->userData,
5570 "Entity value required\n");
5571 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005572 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005573 } else if (list != NULL) {
5574 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005575 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005576 }
5577 }
5578 }
5579 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5580 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5581 /*
5582 * Create a node.
5583 */
5584 ctxt->sax->reference(ctxt->userData, ent->name);
5585 return;
5586 } else if (ctxt->replaceEntities) {
5587 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5588 /*
5589 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005590 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005591 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005592 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005593 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005594 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005595 cur = ent->children;
5596 while (cur != NULL) {
5597 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005598 if (firstChild == NULL){
5599 firstChild = new;
5600 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005601 xmlAddChild(ctxt->node, new);
5602 if (cur == ent->last)
5603 break;
5604 cur = cur->next;
5605 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005606 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5607 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005608 } else {
5609 /*
5610 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005611 * node with a possible previous text one which
5612 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005613 */
5614 if (ent->children->type == XML_TEXT_NODE)
5615 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5616 if ((ent->last != ent->children) &&
5617 (ent->last->type == XML_TEXT_NODE))
5618 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5619 xmlAddChildList(ctxt->node, ent->children);
5620 }
5621
Owen Taylor3473f882001-02-23 17:55:21 +00005622 /*
5623 * This is to avoid a nasty side effect, see
5624 * characters() in SAX.c
5625 */
5626 ctxt->nodemem = 0;
5627 ctxt->nodelen = 0;
5628 return;
5629 } else {
5630 /*
5631 * Probably running in SAX mode
5632 */
5633 xmlParserInputPtr input;
5634
5635 input = xmlNewEntityInputStream(ctxt, ent);
5636 xmlPushInput(ctxt, input);
5637 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5638 (RAW == '<') && (NXT(1) == '?') &&
5639 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5640 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5641 xmlParseTextDecl(ctxt);
5642 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5643 /*
5644 * The XML REC instructs us to stop parsing right here
5645 */
5646 ctxt->instate = XML_PARSER_EOF;
5647 return;
5648 }
5649 if (input->standalone == 1) {
5650 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5652 ctxt->sax->error(ctxt->userData,
5653 "external parsed entities cannot be standalone\n");
5654 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005655 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005656 }
5657 }
5658 return;
5659 }
5660 }
5661 } else {
5662 val = ent->content;
5663 if (val == NULL) return;
5664 /*
5665 * inline the entity.
5666 */
5667 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5668 (!ctxt->disableSAX))
5669 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5670 }
5671 }
5672}
5673
5674/**
5675 * xmlParseEntityRef:
5676 * @ctxt: an XML parser context
5677 *
5678 * parse ENTITY references declarations
5679 *
5680 * [68] EntityRef ::= '&' Name ';'
5681 *
5682 * [ WFC: Entity Declared ]
5683 * In a document without any DTD, a document with only an internal DTD
5684 * subset which contains no parameter entity references, or a document
5685 * with "standalone='yes'", the Name given in the entity reference
5686 * must match that in an entity declaration, except that well-formed
5687 * documents need not declare any of the following entities: amp, lt,
5688 * gt, apos, quot. The declaration of a parameter entity must precede
5689 * any reference to it. Similarly, the declaration of a general entity
5690 * must precede any reference to it which appears in a default value in an
5691 * attribute-list declaration. Note that if entities are declared in the
5692 * external subset or in external parameter entities, a non-validating
5693 * processor is not obligated to read and process their declarations;
5694 * for such documents, the rule that an entity must be declared is a
5695 * well-formedness constraint only if standalone='yes'.
5696 *
5697 * [ WFC: Parsed Entity ]
5698 * An entity reference must not contain the name of an unparsed entity
5699 *
5700 * Returns the xmlEntityPtr if found, or NULL otherwise.
5701 */
5702xmlEntityPtr
5703xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5704 xmlChar *name;
5705 xmlEntityPtr ent = NULL;
5706
5707 GROW;
5708
5709 if (RAW == '&') {
5710 NEXT;
5711 name = xmlParseName(ctxt);
5712 if (name == NULL) {
5713 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5715 ctxt->sax->error(ctxt->userData,
5716 "xmlParseEntityRef: no name\n");
5717 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005718 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005719 } else {
5720 if (RAW == ';') {
5721 NEXT;
5722 /*
5723 * Ask first SAX for entity resolution, otherwise try the
5724 * predefined set.
5725 */
5726 if (ctxt->sax != NULL) {
5727 if (ctxt->sax->getEntity != NULL)
5728 ent = ctxt->sax->getEntity(ctxt->userData, name);
5729 if (ent == NULL)
5730 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005731 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5732 ent = getEntity(ctxt, name);
5733 }
Owen Taylor3473f882001-02-23 17:55:21 +00005734 }
5735 /*
5736 * [ WFC: Entity Declared ]
5737 * In a document without any DTD, a document with only an
5738 * internal DTD subset which contains no parameter entity
5739 * references, or a document with "standalone='yes'", the
5740 * Name given in the entity reference must match that in an
5741 * entity declaration, except that well-formed documents
5742 * need not declare any of the following entities: amp, lt,
5743 * gt, apos, quot.
5744 * The declaration of a parameter entity must precede any
5745 * reference to it.
5746 * Similarly, the declaration of a general entity must
5747 * precede any reference to it which appears in a default
5748 * value in an attribute-list declaration. Note that if
5749 * entities are declared in the external subset or in
5750 * external parameter entities, a non-validating processor
5751 * is not obligated to read and process their declarations;
5752 * for such documents, the rule that an entity must be
5753 * declared is a well-formedness constraint only if
5754 * standalone='yes'.
5755 */
5756 if (ent == NULL) {
5757 if ((ctxt->standalone == 1) ||
5758 ((ctxt->hasExternalSubset == 0) &&
5759 (ctxt->hasPErefs == 0))) {
5760 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5762 ctxt->sax->error(ctxt->userData,
5763 "Entity '%s' not defined\n", name);
5764 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005765 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005766 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005767 } else {
5768 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005770 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005771 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005772 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005773 }
5774 }
5775
5776 /*
5777 * [ WFC: Parsed Entity ]
5778 * An entity reference must not contain the name of an
5779 * unparsed entity
5780 */
5781 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5782 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5784 ctxt->sax->error(ctxt->userData,
5785 "Entity reference to unparsed entity %s\n", name);
5786 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005787 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005788 }
5789
5790 /*
5791 * [ WFC: No External Entity References ]
5792 * Attribute values cannot contain direct or indirect
5793 * entity references to external entities.
5794 */
5795 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5796 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5797 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5799 ctxt->sax->error(ctxt->userData,
5800 "Attribute references external entity '%s'\n", name);
5801 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005802 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005803 }
5804 /*
5805 * [ WFC: No < in Attribute Values ]
5806 * The replacement text of any entity referred to directly or
5807 * indirectly in an attribute value (other than "&lt;") must
5808 * not contain a <.
5809 */
5810 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5811 (ent != NULL) &&
5812 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5813 (ent->content != NULL) &&
5814 (xmlStrchr(ent->content, '<'))) {
5815 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5817 ctxt->sax->error(ctxt->userData,
5818 "'<' in entity '%s' is not allowed in attributes values\n", name);
5819 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005820 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005821 }
5822
5823 /*
5824 * Internal check, no parameter entities here ...
5825 */
5826 else {
5827 switch (ent->etype) {
5828 case XML_INTERNAL_PARAMETER_ENTITY:
5829 case XML_EXTERNAL_PARAMETER_ENTITY:
5830 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5832 ctxt->sax->error(ctxt->userData,
5833 "Attempt to reference the parameter entity '%s'\n", name);
5834 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005835 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005836 break;
5837 default:
5838 break;
5839 }
5840 }
5841
5842 /*
5843 * [ WFC: No Recursion ]
5844 * A parsed entity must not contain a recursive reference
5845 * to itself, either directly or indirectly.
5846 * Done somewhere else
5847 */
5848
5849 } else {
5850 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5852 ctxt->sax->error(ctxt->userData,
5853 "xmlParseEntityRef: expecting ';'\n");
5854 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005855 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005856 }
5857 xmlFree(name);
5858 }
5859 }
5860 return(ent);
5861}
5862
5863/**
5864 * xmlParseStringEntityRef:
5865 * @ctxt: an XML parser context
5866 * @str: a pointer to an index in the string
5867 *
5868 * parse ENTITY references declarations, but this version parses it from
5869 * a string value.
5870 *
5871 * [68] EntityRef ::= '&' Name ';'
5872 *
5873 * [ WFC: Entity Declared ]
5874 * In a document without any DTD, a document with only an internal DTD
5875 * subset which contains no parameter entity references, or a document
5876 * with "standalone='yes'", the Name given in the entity reference
5877 * must match that in an entity declaration, except that well-formed
5878 * documents need not declare any of the following entities: amp, lt,
5879 * gt, apos, quot. The declaration of a parameter entity must precede
5880 * any reference to it. Similarly, the declaration of a general entity
5881 * must precede any reference to it which appears in a default value in an
5882 * attribute-list declaration. Note that if entities are declared in the
5883 * external subset or in external parameter entities, a non-validating
5884 * processor is not obligated to read and process their declarations;
5885 * for such documents, the rule that an entity must be declared is a
5886 * well-formedness constraint only if standalone='yes'.
5887 *
5888 * [ WFC: Parsed Entity ]
5889 * An entity reference must not contain the name of an unparsed entity
5890 *
5891 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5892 * is updated to the current location in the string.
5893 */
5894xmlEntityPtr
5895xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5896 xmlChar *name;
5897 const xmlChar *ptr;
5898 xmlChar cur;
5899 xmlEntityPtr ent = NULL;
5900
5901 if ((str == NULL) || (*str == NULL))
5902 return(NULL);
5903 ptr = *str;
5904 cur = *ptr;
5905 if (cur == '&') {
5906 ptr++;
5907 cur = *ptr;
5908 name = xmlParseStringName(ctxt, &ptr);
5909 if (name == NULL) {
5910 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5912 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005913 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005914 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005915 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005916 } else {
5917 if (*ptr == ';') {
5918 ptr++;
5919 /*
5920 * Ask first SAX for entity resolution, otherwise try the
5921 * predefined set.
5922 */
5923 if (ctxt->sax != NULL) {
5924 if (ctxt->sax->getEntity != NULL)
5925 ent = ctxt->sax->getEntity(ctxt->userData, name);
5926 if (ent == NULL)
5927 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005928 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5929 ent = getEntity(ctxt, name);
5930 }
Owen Taylor3473f882001-02-23 17:55:21 +00005931 }
5932 /*
5933 * [ WFC: Entity Declared ]
5934 * In a document without any DTD, a document with only an
5935 * internal DTD subset which contains no parameter entity
5936 * references, or a document with "standalone='yes'", the
5937 * Name given in the entity reference must match that in an
5938 * entity declaration, except that well-formed documents
5939 * need not declare any of the following entities: amp, lt,
5940 * gt, apos, quot.
5941 * The declaration of a parameter entity must precede any
5942 * reference to it.
5943 * Similarly, the declaration of a general entity must
5944 * precede any reference to it which appears in a default
5945 * value in an attribute-list declaration. Note that if
5946 * entities are declared in the external subset or in
5947 * external parameter entities, a non-validating processor
5948 * is not obligated to read and process their declarations;
5949 * for such documents, the rule that an entity must be
5950 * declared is a well-formedness constraint only if
5951 * standalone='yes'.
5952 */
5953 if (ent == NULL) {
5954 if ((ctxt->standalone == 1) ||
5955 ((ctxt->hasExternalSubset == 0) &&
5956 (ctxt->hasPErefs == 0))) {
5957 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5959 ctxt->sax->error(ctxt->userData,
5960 "Entity '%s' not defined\n", name);
5961 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005962 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005963 } else {
5964 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5965 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5966 ctxt->sax->warning(ctxt->userData,
5967 "Entity '%s' not defined\n", name);
5968 }
5969 }
5970
5971 /*
5972 * [ WFC: Parsed Entity ]
5973 * An entity reference must not contain the name of an
5974 * unparsed entity
5975 */
5976 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5977 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5979 ctxt->sax->error(ctxt->userData,
5980 "Entity reference to unparsed entity %s\n", name);
5981 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005982 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005983 }
5984
5985 /*
5986 * [ WFC: No External Entity References ]
5987 * Attribute values cannot contain direct or indirect
5988 * entity references to external entities.
5989 */
5990 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5991 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5992 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5994 ctxt->sax->error(ctxt->userData,
5995 "Attribute references external entity '%s'\n", name);
5996 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005997 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005998 }
5999 /*
6000 * [ WFC: No < in Attribute Values ]
6001 * The replacement text of any entity referred to directly or
6002 * indirectly in an attribute value (other than "&lt;") must
6003 * not contain a <.
6004 */
6005 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6006 (ent != NULL) &&
6007 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6008 (ent->content != NULL) &&
6009 (xmlStrchr(ent->content, '<'))) {
6010 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6012 ctxt->sax->error(ctxt->userData,
6013 "'<' in entity '%s' is not allowed in attributes values\n", name);
6014 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006015 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006016 }
6017
6018 /*
6019 * Internal check, no parameter entities here ...
6020 */
6021 else {
6022 switch (ent->etype) {
6023 case XML_INTERNAL_PARAMETER_ENTITY:
6024 case XML_EXTERNAL_PARAMETER_ENTITY:
6025 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6026 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6027 ctxt->sax->error(ctxt->userData,
6028 "Attempt to reference the parameter entity '%s'\n", name);
6029 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006030 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006031 break;
6032 default:
6033 break;
6034 }
6035 }
6036
6037 /*
6038 * [ WFC: No Recursion ]
6039 * A parsed entity must not contain a recursive reference
6040 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006041 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006042 */
6043
6044 } else {
6045 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6047 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006048 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006049 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006050 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006051 }
6052 xmlFree(name);
6053 }
6054 }
6055 *str = ptr;
6056 return(ent);
6057}
6058
6059/**
6060 * xmlParsePEReference:
6061 * @ctxt: an XML parser context
6062 *
6063 * parse PEReference declarations
6064 * The entity content is handled directly by pushing it's content as
6065 * a new input stream.
6066 *
6067 * [69] PEReference ::= '%' Name ';'
6068 *
6069 * [ WFC: No Recursion ]
6070 * A parsed entity must not contain a recursive
6071 * reference to itself, either directly or indirectly.
6072 *
6073 * [ WFC: Entity Declared ]
6074 * In a document without any DTD, a document with only an internal DTD
6075 * subset which contains no parameter entity references, or a document
6076 * with "standalone='yes'", ... ... The declaration of a parameter
6077 * entity must precede any reference to it...
6078 *
6079 * [ VC: Entity Declared ]
6080 * In a document with an external subset or external parameter entities
6081 * with "standalone='no'", ... ... The declaration of a parameter entity
6082 * must precede any reference to it...
6083 *
6084 * [ WFC: In DTD ]
6085 * Parameter-entity references may only appear in the DTD.
6086 * NOTE: misleading but this is handled.
6087 */
6088void
6089xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6090 xmlChar *name;
6091 xmlEntityPtr entity = NULL;
6092 xmlParserInputPtr input;
6093
6094 if (RAW == '%') {
6095 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006096 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006097 if (name == NULL) {
6098 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6100 ctxt->sax->error(ctxt->userData,
6101 "xmlParsePEReference: no name\n");
6102 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006103 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006104 } else {
6105 if (RAW == ';') {
6106 NEXT;
6107 if ((ctxt->sax != NULL) &&
6108 (ctxt->sax->getParameterEntity != NULL))
6109 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6110 name);
6111 if (entity == NULL) {
6112 /*
6113 * [ WFC: Entity Declared ]
6114 * In a document without any DTD, a document with only an
6115 * internal DTD subset which contains no parameter entity
6116 * references, or a document with "standalone='yes'", ...
6117 * ... The declaration of a parameter entity must precede
6118 * any reference to it...
6119 */
6120 if ((ctxt->standalone == 1) ||
6121 ((ctxt->hasExternalSubset == 0) &&
6122 (ctxt->hasPErefs == 0))) {
6123 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6124 if ((!ctxt->disableSAX) &&
6125 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6126 ctxt->sax->error(ctxt->userData,
6127 "PEReference: %%%s; not found\n", name);
6128 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006129 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006130 } else {
6131 /*
6132 * [ VC: Entity Declared ]
6133 * In a document with an external subset or external
6134 * parameter entities with "standalone='no'", ...
6135 * ... The declaration of a parameter entity must precede
6136 * any reference to it...
6137 */
6138 if ((!ctxt->disableSAX) &&
6139 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6140 ctxt->sax->warning(ctxt->userData,
6141 "PEReference: %%%s; not found\n", name);
6142 ctxt->valid = 0;
6143 }
6144 } else {
6145 /*
6146 * Internal checking in case the entity quest barfed
6147 */
6148 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6149 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6150 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6151 ctxt->sax->warning(ctxt->userData,
6152 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006153 } else if (ctxt->input->free != deallocblankswrapper) {
6154 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6155 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006156 } else {
6157 /*
6158 * TODO !!!
6159 * handle the extra spaces added before and after
6160 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6161 */
6162 input = xmlNewEntityInputStream(ctxt, entity);
6163 xmlPushInput(ctxt, input);
6164 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6165 (RAW == '<') && (NXT(1) == '?') &&
6166 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6167 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6168 xmlParseTextDecl(ctxt);
6169 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6170 /*
6171 * The XML REC instructs us to stop parsing
6172 * right here
6173 */
6174 ctxt->instate = XML_PARSER_EOF;
6175 xmlFree(name);
6176 return;
6177 }
6178 }
Owen Taylor3473f882001-02-23 17:55:21 +00006179 }
6180 }
6181 ctxt->hasPErefs = 1;
6182 } else {
6183 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6185 ctxt->sax->error(ctxt->userData,
6186 "xmlParsePEReference: expecting ';'\n");
6187 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006188 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006189 }
6190 xmlFree(name);
6191 }
6192 }
6193}
6194
6195/**
6196 * xmlParseStringPEReference:
6197 * @ctxt: an XML parser context
6198 * @str: a pointer to an index in the string
6199 *
6200 * parse PEReference declarations
6201 *
6202 * [69] PEReference ::= '%' Name ';'
6203 *
6204 * [ WFC: No Recursion ]
6205 * A parsed entity must not contain a recursive
6206 * reference to itself, either directly or indirectly.
6207 *
6208 * [ WFC: Entity Declared ]
6209 * In a document without any DTD, a document with only an internal DTD
6210 * subset which contains no parameter entity references, or a document
6211 * with "standalone='yes'", ... ... The declaration of a parameter
6212 * entity must precede any reference to it...
6213 *
6214 * [ VC: Entity Declared ]
6215 * In a document with an external subset or external parameter entities
6216 * with "standalone='no'", ... ... The declaration of a parameter entity
6217 * must precede any reference to it...
6218 *
6219 * [ WFC: In DTD ]
6220 * Parameter-entity references may only appear in the DTD.
6221 * NOTE: misleading but this is handled.
6222 *
6223 * Returns the string of the entity content.
6224 * str is updated to the current value of the index
6225 */
6226xmlEntityPtr
6227xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6228 const xmlChar *ptr;
6229 xmlChar cur;
6230 xmlChar *name;
6231 xmlEntityPtr entity = NULL;
6232
6233 if ((str == NULL) || (*str == NULL)) return(NULL);
6234 ptr = *str;
6235 cur = *ptr;
6236 if (cur == '%') {
6237 ptr++;
6238 cur = *ptr;
6239 name = xmlParseStringName(ctxt, &ptr);
6240 if (name == NULL) {
6241 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6242 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6243 ctxt->sax->error(ctxt->userData,
6244 "xmlParseStringPEReference: no name\n");
6245 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006246 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006247 } else {
6248 cur = *ptr;
6249 if (cur == ';') {
6250 ptr++;
6251 cur = *ptr;
6252 if ((ctxt->sax != NULL) &&
6253 (ctxt->sax->getParameterEntity != NULL))
6254 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6255 name);
6256 if (entity == NULL) {
6257 /*
6258 * [ WFC: Entity Declared ]
6259 * In a document without any DTD, a document with only an
6260 * internal DTD subset which contains no parameter entity
6261 * references, or a document with "standalone='yes'", ...
6262 * ... The declaration of a parameter entity must precede
6263 * any reference to it...
6264 */
6265 if ((ctxt->standalone == 1) ||
6266 ((ctxt->hasExternalSubset == 0) &&
6267 (ctxt->hasPErefs == 0))) {
6268 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6270 ctxt->sax->error(ctxt->userData,
6271 "PEReference: %%%s; not found\n", name);
6272 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006273 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006274 } else {
6275 /*
6276 * [ VC: Entity Declared ]
6277 * In a document with an external subset or external
6278 * parameter entities with "standalone='no'", ...
6279 * ... The declaration of a parameter entity must
6280 * precede any reference to it...
6281 */
6282 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6283 ctxt->sax->warning(ctxt->userData,
6284 "PEReference: %%%s; not found\n", name);
6285 ctxt->valid = 0;
6286 }
6287 } else {
6288 /*
6289 * Internal checking in case the entity quest barfed
6290 */
6291 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6292 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6293 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6294 ctxt->sax->warning(ctxt->userData,
6295 "Internal: %%%s; is not a parameter entity\n", name);
6296 }
6297 }
6298 ctxt->hasPErefs = 1;
6299 } else {
6300 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6302 ctxt->sax->error(ctxt->userData,
6303 "xmlParseStringPEReference: expecting ';'\n");
6304 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006305 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006306 }
6307 xmlFree(name);
6308 }
6309 }
6310 *str = ptr;
6311 return(entity);
6312}
6313
6314/**
6315 * xmlParseDocTypeDecl:
6316 * @ctxt: an XML parser context
6317 *
6318 * parse a DOCTYPE declaration
6319 *
6320 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6321 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6322 *
6323 * [ VC: Root Element Type ]
6324 * The Name in the document type declaration must match the element
6325 * type of the root element.
6326 */
6327
6328void
6329xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6330 xmlChar *name = NULL;
6331 xmlChar *ExternalID = NULL;
6332 xmlChar *URI = NULL;
6333
6334 /*
6335 * We know that '<!DOCTYPE' has been detected.
6336 */
6337 SKIP(9);
6338
6339 SKIP_BLANKS;
6340
6341 /*
6342 * Parse the DOCTYPE name.
6343 */
6344 name = xmlParseName(ctxt);
6345 if (name == NULL) {
6346 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6347 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6348 ctxt->sax->error(ctxt->userData,
6349 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6350 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006351 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006352 }
6353 ctxt->intSubName = name;
6354
6355 SKIP_BLANKS;
6356
6357 /*
6358 * Check for SystemID and ExternalID
6359 */
6360 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6361
6362 if ((URI != NULL) || (ExternalID != NULL)) {
6363 ctxt->hasExternalSubset = 1;
6364 }
6365 ctxt->extSubURI = URI;
6366 ctxt->extSubSystem = ExternalID;
6367
6368 SKIP_BLANKS;
6369
6370 /*
6371 * Create and update the internal subset.
6372 */
6373 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6374 (!ctxt->disableSAX))
6375 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6376
6377 /*
6378 * Is there any internal subset declarations ?
6379 * they are handled separately in xmlParseInternalSubset()
6380 */
6381 if (RAW == '[')
6382 return;
6383
6384 /*
6385 * We should be at the end of the DOCTYPE declaration.
6386 */
6387 if (RAW != '>') {
6388 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6389 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006390 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006391 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006392 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006393 }
6394 NEXT;
6395}
6396
6397/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006398 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006399 * @ctxt: an XML parser context
6400 *
6401 * parse the internal subset declaration
6402 *
6403 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6404 */
6405
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006406static void
Owen Taylor3473f882001-02-23 17:55:21 +00006407xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6408 /*
6409 * Is there any DTD definition ?
6410 */
6411 if (RAW == '[') {
6412 ctxt->instate = XML_PARSER_DTD;
6413 NEXT;
6414 /*
6415 * Parse the succession of Markup declarations and
6416 * PEReferences.
6417 * Subsequence (markupdecl | PEReference | S)*
6418 */
6419 while (RAW != ']') {
6420 const xmlChar *check = CUR_PTR;
6421 int cons = ctxt->input->consumed;
6422
6423 SKIP_BLANKS;
6424 xmlParseMarkupDecl(ctxt);
6425 xmlParsePEReference(ctxt);
6426
6427 /*
6428 * Pop-up of finished entities.
6429 */
6430 while ((RAW == 0) && (ctxt->inputNr > 1))
6431 xmlPopInput(ctxt);
6432
6433 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6434 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6436 ctxt->sax->error(ctxt->userData,
6437 "xmlParseInternalSubset: error detected in Markup declaration\n");
6438 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006439 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006440 break;
6441 }
6442 }
6443 if (RAW == ']') {
6444 NEXT;
6445 SKIP_BLANKS;
6446 }
6447 }
6448
6449 /*
6450 * We should be at the end of the DOCTYPE declaration.
6451 */
6452 if (RAW != '>') {
6453 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006455 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006456 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006457 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006458 }
6459 NEXT;
6460}
6461
6462/**
6463 * xmlParseAttribute:
6464 * @ctxt: an XML parser context
6465 * @value: a xmlChar ** used to store the value of the attribute
6466 *
6467 * parse an attribute
6468 *
6469 * [41] Attribute ::= Name Eq AttValue
6470 *
6471 * [ WFC: No External Entity References ]
6472 * Attribute values cannot contain direct or indirect entity references
6473 * to external entities.
6474 *
6475 * [ WFC: No < in Attribute Values ]
6476 * The replacement text of any entity referred to directly or indirectly in
6477 * an attribute value (other than "&lt;") must not contain a <.
6478 *
6479 * [ VC: Attribute Value Type ]
6480 * The attribute must have been declared; the value must be of the type
6481 * declared for it.
6482 *
6483 * [25] Eq ::= S? '=' S?
6484 *
6485 * With namespace:
6486 *
6487 * [NS 11] Attribute ::= QName Eq AttValue
6488 *
6489 * Also the case QName == xmlns:??? is handled independently as a namespace
6490 * definition.
6491 *
6492 * Returns the attribute name, and the value in *value.
6493 */
6494
6495xmlChar *
6496xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6497 xmlChar *name, *val;
6498
6499 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006500 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006501 name = xmlParseName(ctxt);
6502 if (name == NULL) {
6503 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6505 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6506 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006507 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006508 return(NULL);
6509 }
6510
6511 /*
6512 * read the value
6513 */
6514 SKIP_BLANKS;
6515 if (RAW == '=') {
6516 NEXT;
6517 SKIP_BLANKS;
6518 val = xmlParseAttValue(ctxt);
6519 ctxt->instate = XML_PARSER_CONTENT;
6520 } else {
6521 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6523 ctxt->sax->error(ctxt->userData,
6524 "Specification mandate value for attribute %s\n", name);
6525 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006526 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006527 xmlFree(name);
6528 return(NULL);
6529 }
6530
6531 /*
6532 * Check that xml:lang conforms to the specification
6533 * No more registered as an error, just generate a warning now
6534 * since this was deprecated in XML second edition
6535 */
6536 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6537 if (!xmlCheckLanguageID(val)) {
6538 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6539 ctxt->sax->warning(ctxt->userData,
6540 "Malformed value for xml:lang : %s\n", val);
6541 }
6542 }
6543
6544 /*
6545 * Check that xml:space conforms to the specification
6546 */
6547 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6548 if (xmlStrEqual(val, BAD_CAST "default"))
6549 *(ctxt->space) = 0;
6550 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6551 *(ctxt->space) = 1;
6552 else {
6553 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6555 ctxt->sax->error(ctxt->userData,
6556"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6557 val);
6558 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006559 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006560 }
6561 }
6562
6563 *value = val;
6564 return(name);
6565}
6566
6567/**
6568 * xmlParseStartTag:
6569 * @ctxt: an XML parser context
6570 *
6571 * parse a start of tag either for rule element or
6572 * EmptyElement. In both case we don't parse the tag closing chars.
6573 *
6574 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6575 *
6576 * [ WFC: Unique Att Spec ]
6577 * No attribute name may appear more than once in the same start-tag or
6578 * empty-element tag.
6579 *
6580 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6581 *
6582 * [ WFC: Unique Att Spec ]
6583 * No attribute name may appear more than once in the same start-tag or
6584 * empty-element tag.
6585 *
6586 * With namespace:
6587 *
6588 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6589 *
6590 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6591 *
6592 * Returns the element name parsed
6593 */
6594
6595xmlChar *
6596xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6597 xmlChar *name;
6598 xmlChar *attname;
6599 xmlChar *attvalue;
6600 const xmlChar **atts = NULL;
6601 int nbatts = 0;
6602 int maxatts = 0;
6603 int i;
6604
6605 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006606 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006607
6608 name = xmlParseName(ctxt);
6609 if (name == NULL) {
6610 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6612 ctxt->sax->error(ctxt->userData,
6613 "xmlParseStartTag: invalid element name\n");
6614 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006615 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006616 return(NULL);
6617 }
6618
6619 /*
6620 * Now parse the attributes, it ends up with the ending
6621 *
6622 * (S Attribute)* S?
6623 */
6624 SKIP_BLANKS;
6625 GROW;
6626
Daniel Veillard21a0f912001-02-25 19:54:14 +00006627 while ((RAW != '>') &&
6628 ((RAW != '/') || (NXT(1) != '>')) &&
6629 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006630 const xmlChar *q = CUR_PTR;
6631 int cons = ctxt->input->consumed;
6632
6633 attname = xmlParseAttribute(ctxt, &attvalue);
6634 if ((attname != NULL) && (attvalue != NULL)) {
6635 /*
6636 * [ WFC: Unique Att Spec ]
6637 * No attribute name may appear more than once in the same
6638 * start-tag or empty-element tag.
6639 */
6640 for (i = 0; i < nbatts;i += 2) {
6641 if (xmlStrEqual(atts[i], attname)) {
6642 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6644 ctxt->sax->error(ctxt->userData,
6645 "Attribute %s redefined\n",
6646 attname);
6647 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006648 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006649 xmlFree(attname);
6650 xmlFree(attvalue);
6651 goto failed;
6652 }
6653 }
6654
6655 /*
6656 * Add the pair to atts
6657 */
6658 if (atts == NULL) {
6659 maxatts = 10;
6660 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6661 if (atts == NULL) {
6662 xmlGenericError(xmlGenericErrorContext,
6663 "malloc of %ld byte failed\n",
6664 maxatts * (long)sizeof(xmlChar *));
6665 return(NULL);
6666 }
6667 } else if (nbatts + 4 > maxatts) {
6668 maxatts *= 2;
6669 atts = (const xmlChar **) xmlRealloc((void *) atts,
6670 maxatts * sizeof(xmlChar *));
6671 if (atts == NULL) {
6672 xmlGenericError(xmlGenericErrorContext,
6673 "realloc of %ld byte failed\n",
6674 maxatts * (long)sizeof(xmlChar *));
6675 return(NULL);
6676 }
6677 }
6678 atts[nbatts++] = attname;
6679 atts[nbatts++] = attvalue;
6680 atts[nbatts] = NULL;
6681 atts[nbatts + 1] = NULL;
6682 } else {
6683 if (attname != NULL)
6684 xmlFree(attname);
6685 if (attvalue != NULL)
6686 xmlFree(attvalue);
6687 }
6688
6689failed:
6690
6691 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6692 break;
6693 if (!IS_BLANK(RAW)) {
6694 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6696 ctxt->sax->error(ctxt->userData,
6697 "attributes construct error\n");
6698 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006699 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006700 }
6701 SKIP_BLANKS;
6702 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6703 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6705 ctxt->sax->error(ctxt->userData,
6706 "xmlParseStartTag: problem parsing attributes\n");
6707 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006708 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006709 break;
6710 }
6711 GROW;
6712 }
6713
6714 /*
6715 * SAX: Start of Element !
6716 */
6717 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6718 (!ctxt->disableSAX))
6719 ctxt->sax->startElement(ctxt->userData, name, atts);
6720
6721 if (atts != NULL) {
6722 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6723 xmlFree((void *) atts);
6724 }
6725 return(name);
6726}
6727
6728/**
6729 * xmlParseEndTag:
6730 * @ctxt: an XML parser context
6731 *
6732 * parse an end of tag
6733 *
6734 * [42] ETag ::= '</' Name S? '>'
6735 *
6736 * With namespace
6737 *
6738 * [NS 9] ETag ::= '</' QName S? '>'
6739 */
6740
6741void
6742xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6743 xmlChar *name;
6744 xmlChar *oldname;
6745
6746 GROW;
6747 if ((RAW != '<') || (NXT(1) != '/')) {
6748 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6750 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6751 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006752 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006753 return;
6754 }
6755 SKIP(2);
6756
Daniel Veillard46de64e2002-05-29 08:21:33 +00006757 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006758
6759 /*
6760 * We should definitely be at the ending "S? '>'" part
6761 */
6762 GROW;
6763 SKIP_BLANKS;
6764 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6765 ctxt->errNo = XML_ERR_GT_REQUIRED;
6766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6767 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6768 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006769 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006770 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006771 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006772
6773 /*
6774 * [ WFC: Element Type Match ]
6775 * The Name in an element's end-tag must match the element type in the
6776 * start-tag.
6777 *
6778 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006779 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006780 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6781 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006782 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006783 ctxt->sax->error(ctxt->userData,
6784 "Opening and ending tag mismatch: %s and %s\n",
6785 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006786 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006787 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006788 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006789 }
6790
6791 }
6792 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006793 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6794#if 0
6795 else {
6796 /*
6797 * Recover in case of one missing close
6798 */
6799 if ((ctxt->nameNr > 2) &&
6800 (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) {
6801 namePop(ctxt);
6802 spacePop(ctxt);
6803 }
6804 }
6805#endif
6806 if (name != NULL)
6807 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006808 }
6809
6810 /*
6811 * SAX: End of Tag
6812 */
6813 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6814 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006815 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006816
Owen Taylor3473f882001-02-23 17:55:21 +00006817 oldname = namePop(ctxt);
6818 spacePop(ctxt);
6819 if (oldname != NULL) {
6820#ifdef DEBUG_STACK
6821 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6822#endif
6823 xmlFree(oldname);
6824 }
6825 return;
6826}
6827
6828/**
6829 * xmlParseCDSect:
6830 * @ctxt: an XML parser context
6831 *
6832 * Parse escaped pure raw content.
6833 *
6834 * [18] CDSect ::= CDStart CData CDEnd
6835 *
6836 * [19] CDStart ::= '<![CDATA['
6837 *
6838 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6839 *
6840 * [21] CDEnd ::= ']]>'
6841 */
6842void
6843xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6844 xmlChar *buf = NULL;
6845 int len = 0;
6846 int size = XML_PARSER_BUFFER_SIZE;
6847 int r, rl;
6848 int s, sl;
6849 int cur, l;
6850 int count = 0;
6851
6852 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6853 (NXT(2) == '[') && (NXT(3) == 'C') &&
6854 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6855 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6856 (NXT(8) == '[')) {
6857 SKIP(9);
6858 } else
6859 return;
6860
6861 ctxt->instate = XML_PARSER_CDATA_SECTION;
6862 r = CUR_CHAR(rl);
6863 if (!IS_CHAR(r)) {
6864 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6866 ctxt->sax->error(ctxt->userData,
6867 "CData section not finished\n");
6868 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006869 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006870 ctxt->instate = XML_PARSER_CONTENT;
6871 return;
6872 }
6873 NEXTL(rl);
6874 s = CUR_CHAR(sl);
6875 if (!IS_CHAR(s)) {
6876 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6878 ctxt->sax->error(ctxt->userData,
6879 "CData section not finished\n");
6880 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006881 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006882 ctxt->instate = XML_PARSER_CONTENT;
6883 return;
6884 }
6885 NEXTL(sl);
6886 cur = CUR_CHAR(l);
6887 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6888 if (buf == NULL) {
6889 xmlGenericError(xmlGenericErrorContext,
6890 "malloc of %d byte failed\n", size);
6891 return;
6892 }
6893 while (IS_CHAR(cur) &&
6894 ((r != ']') || (s != ']') || (cur != '>'))) {
6895 if (len + 5 >= size) {
6896 size *= 2;
6897 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6898 if (buf == NULL) {
6899 xmlGenericError(xmlGenericErrorContext,
6900 "realloc of %d byte failed\n", size);
6901 return;
6902 }
6903 }
6904 COPY_BUF(rl,buf,len,r);
6905 r = s;
6906 rl = sl;
6907 s = cur;
6908 sl = l;
6909 count++;
6910 if (count > 50) {
6911 GROW;
6912 count = 0;
6913 }
6914 NEXTL(l);
6915 cur = CUR_CHAR(l);
6916 }
6917 buf[len] = 0;
6918 ctxt->instate = XML_PARSER_CONTENT;
6919 if (cur != '>') {
6920 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6922 ctxt->sax->error(ctxt->userData,
6923 "CData section not finished\n%.50s\n", buf);
6924 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006925 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006926 xmlFree(buf);
6927 return;
6928 }
6929 NEXTL(l);
6930
6931 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006932 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006933 */
6934 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6935 if (ctxt->sax->cdataBlock != NULL)
6936 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006937 else if (ctxt->sax->characters != NULL)
6938 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006939 }
6940 xmlFree(buf);
6941}
6942
6943/**
6944 * xmlParseContent:
6945 * @ctxt: an XML parser context
6946 *
6947 * Parse a content:
6948 *
6949 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6950 */
6951
6952void
6953xmlParseContent(xmlParserCtxtPtr ctxt) {
6954 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006955 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006956 ((RAW != '<') || (NXT(1) != '/'))) {
6957 const xmlChar *test = CUR_PTR;
6958 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006959 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006960
6961 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006962 * First case : a Processing Instruction.
6963 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006964 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006965 xmlParsePI(ctxt);
6966 }
6967
6968 /*
6969 * Second case : a CDSection
6970 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006971 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006972 (NXT(2) == '[') && (NXT(3) == 'C') &&
6973 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6974 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6975 (NXT(8) == '[')) {
6976 xmlParseCDSect(ctxt);
6977 }
6978
6979 /*
6980 * Third case : a comment
6981 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006982 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006983 (NXT(2) == '-') && (NXT(3) == '-')) {
6984 xmlParseComment(ctxt);
6985 ctxt->instate = XML_PARSER_CONTENT;
6986 }
6987
6988 /*
6989 * Fourth case : a sub-element.
6990 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006991 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006992 xmlParseElement(ctxt);
6993 }
6994
6995 /*
6996 * Fifth case : a reference. If if has not been resolved,
6997 * parsing returns it's Name, create the node
6998 */
6999
Daniel Veillard21a0f912001-02-25 19:54:14 +00007000 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007001 xmlParseReference(ctxt);
7002 }
7003
7004 /*
7005 * Last case, text. Note that References are handled directly.
7006 */
7007 else {
7008 xmlParseCharData(ctxt, 0);
7009 }
7010
7011 GROW;
7012 /*
7013 * Pop-up of finished entities.
7014 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007015 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007016 xmlPopInput(ctxt);
7017 SHRINK;
7018
Daniel Veillardfdc91562002-07-01 21:52:03 +00007019 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007020 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7022 ctxt->sax->error(ctxt->userData,
7023 "detected an error in element content\n");
7024 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007025 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007026 ctxt->instate = XML_PARSER_EOF;
7027 break;
7028 }
7029 }
7030}
7031
7032/**
7033 * xmlParseElement:
7034 * @ctxt: an XML parser context
7035 *
7036 * parse an XML element, this is highly recursive
7037 *
7038 * [39] element ::= EmptyElemTag | STag content ETag
7039 *
7040 * [ WFC: Element Type Match ]
7041 * The Name in an element's end-tag must match the element type in the
7042 * start-tag.
7043 *
7044 * [ VC: Element Valid ]
7045 * An element is valid if there is a declaration matching elementdecl
7046 * where the Name matches the element type and one of the following holds:
7047 * - The declaration matches EMPTY and the element has no content.
7048 * - The declaration matches children and the sequence of child elements
7049 * belongs to the language generated by the regular expression in the
7050 * content model, with optional white space (characters matching the
7051 * nonterminal S) between each pair of child elements.
7052 * - The declaration matches Mixed and the content consists of character
7053 * data and child elements whose types match names in the content model.
7054 * - The declaration matches ANY, and the types of any child elements have
7055 * been declared.
7056 */
7057
7058void
7059xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007060 xmlChar *name;
7061 xmlChar *oldname;
7062 xmlParserNodeInfo node_info;
7063 xmlNodePtr ret;
7064
7065 /* Capture start position */
7066 if (ctxt->record_info) {
7067 node_info.begin_pos = ctxt->input->consumed +
7068 (CUR_PTR - ctxt->input->base);
7069 node_info.begin_line = ctxt->input->line;
7070 }
7071
7072 if (ctxt->spaceNr == 0)
7073 spacePush(ctxt, -1);
7074 else
7075 spacePush(ctxt, *ctxt->space);
7076
7077 name = xmlParseStartTag(ctxt);
7078 if (name == NULL) {
7079 spacePop(ctxt);
7080 return;
7081 }
7082 namePush(ctxt, name);
7083 ret = ctxt->node;
7084
7085 /*
7086 * [ VC: Root Element Type ]
7087 * The Name in the document type declaration must match the element
7088 * type of the root element.
7089 */
7090 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7091 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7092 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7093
7094 /*
7095 * Check for an Empty Element.
7096 */
7097 if ((RAW == '/') && (NXT(1) == '>')) {
7098 SKIP(2);
7099 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7100 (!ctxt->disableSAX))
7101 ctxt->sax->endElement(ctxt->userData, name);
7102 oldname = namePop(ctxt);
7103 spacePop(ctxt);
7104 if (oldname != NULL) {
7105#ifdef DEBUG_STACK
7106 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7107#endif
7108 xmlFree(oldname);
7109 }
7110 if ( ret != NULL && ctxt->record_info ) {
7111 node_info.end_pos = ctxt->input->consumed +
7112 (CUR_PTR - ctxt->input->base);
7113 node_info.end_line = ctxt->input->line;
7114 node_info.node = ret;
7115 xmlParserAddNodeInfo(ctxt, &node_info);
7116 }
7117 return;
7118 }
7119 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007120 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007121 } else {
7122 ctxt->errNo = XML_ERR_GT_REQUIRED;
7123 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7124 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007125 "Couldn't find end of Start Tag %s\n",
7126 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007127 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007128 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007129
7130 /*
7131 * end of parsing of this node.
7132 */
7133 nodePop(ctxt);
7134 oldname = namePop(ctxt);
7135 spacePop(ctxt);
7136 if (oldname != NULL) {
7137#ifdef DEBUG_STACK
7138 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7139#endif
7140 xmlFree(oldname);
7141 }
7142
7143 /*
7144 * Capture end position and add node
7145 */
7146 if ( ret != NULL && ctxt->record_info ) {
7147 node_info.end_pos = ctxt->input->consumed +
7148 (CUR_PTR - ctxt->input->base);
7149 node_info.end_line = ctxt->input->line;
7150 node_info.node = ret;
7151 xmlParserAddNodeInfo(ctxt, &node_info);
7152 }
7153 return;
7154 }
7155
7156 /*
7157 * Parse the content of the element:
7158 */
7159 xmlParseContent(ctxt);
7160 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007161 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7163 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007164 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007165 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007166 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007167
7168 /*
7169 * end of parsing of this node.
7170 */
7171 nodePop(ctxt);
7172 oldname = namePop(ctxt);
7173 spacePop(ctxt);
7174 if (oldname != NULL) {
7175#ifdef DEBUG_STACK
7176 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7177#endif
7178 xmlFree(oldname);
7179 }
7180 return;
7181 }
7182
7183 /*
7184 * parse the end of tag: '</' should be here.
7185 */
7186 xmlParseEndTag(ctxt);
7187
7188 /*
7189 * Capture end position and add node
7190 */
7191 if ( ret != NULL && ctxt->record_info ) {
7192 node_info.end_pos = ctxt->input->consumed +
7193 (CUR_PTR - ctxt->input->base);
7194 node_info.end_line = ctxt->input->line;
7195 node_info.node = ret;
7196 xmlParserAddNodeInfo(ctxt, &node_info);
7197 }
7198}
7199
7200/**
7201 * xmlParseVersionNum:
7202 * @ctxt: an XML parser context
7203 *
7204 * parse the XML version value.
7205 *
7206 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7207 *
7208 * Returns the string giving the XML version number, or NULL
7209 */
7210xmlChar *
7211xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7212 xmlChar *buf = NULL;
7213 int len = 0;
7214 int size = 10;
7215 xmlChar cur;
7216
7217 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7218 if (buf == NULL) {
7219 xmlGenericError(xmlGenericErrorContext,
7220 "malloc of %d byte failed\n", size);
7221 return(NULL);
7222 }
7223 cur = CUR;
7224 while (((cur >= 'a') && (cur <= 'z')) ||
7225 ((cur >= 'A') && (cur <= 'Z')) ||
7226 ((cur >= '0') && (cur <= '9')) ||
7227 (cur == '_') || (cur == '.') ||
7228 (cur == ':') || (cur == '-')) {
7229 if (len + 1 >= size) {
7230 size *= 2;
7231 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7232 if (buf == NULL) {
7233 xmlGenericError(xmlGenericErrorContext,
7234 "realloc of %d byte failed\n", size);
7235 return(NULL);
7236 }
7237 }
7238 buf[len++] = cur;
7239 NEXT;
7240 cur=CUR;
7241 }
7242 buf[len] = 0;
7243 return(buf);
7244}
7245
7246/**
7247 * xmlParseVersionInfo:
7248 * @ctxt: an XML parser context
7249 *
7250 * parse the XML version.
7251 *
7252 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7253 *
7254 * [25] Eq ::= S? '=' S?
7255 *
7256 * Returns the version string, e.g. "1.0"
7257 */
7258
7259xmlChar *
7260xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7261 xmlChar *version = NULL;
7262 const xmlChar *q;
7263
7264 if ((RAW == 'v') && (NXT(1) == 'e') &&
7265 (NXT(2) == 'r') && (NXT(3) == 's') &&
7266 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7267 (NXT(6) == 'n')) {
7268 SKIP(7);
7269 SKIP_BLANKS;
7270 if (RAW != '=') {
7271 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7273 ctxt->sax->error(ctxt->userData,
7274 "xmlParseVersionInfo : expected '='\n");
7275 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007276 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007277 return(NULL);
7278 }
7279 NEXT;
7280 SKIP_BLANKS;
7281 if (RAW == '"') {
7282 NEXT;
7283 q = CUR_PTR;
7284 version = xmlParseVersionNum(ctxt);
7285 if (RAW != '"') {
7286 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7287 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7288 ctxt->sax->error(ctxt->userData,
7289 "String not closed\n%.50s\n", q);
7290 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007291 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007292 } else
7293 NEXT;
7294 } else if (RAW == '\''){
7295 NEXT;
7296 q = CUR_PTR;
7297 version = xmlParseVersionNum(ctxt);
7298 if (RAW != '\'') {
7299 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7301 ctxt->sax->error(ctxt->userData,
7302 "String not closed\n%.50s\n", q);
7303 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007304 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007305 } else
7306 NEXT;
7307 } else {
7308 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7310 ctxt->sax->error(ctxt->userData,
7311 "xmlParseVersionInfo : expected ' or \"\n");
7312 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007313 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007314 }
7315 }
7316 return(version);
7317}
7318
7319/**
7320 * xmlParseEncName:
7321 * @ctxt: an XML parser context
7322 *
7323 * parse the XML encoding name
7324 *
7325 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7326 *
7327 * Returns the encoding name value or NULL
7328 */
7329xmlChar *
7330xmlParseEncName(xmlParserCtxtPtr ctxt) {
7331 xmlChar *buf = NULL;
7332 int len = 0;
7333 int size = 10;
7334 xmlChar cur;
7335
7336 cur = CUR;
7337 if (((cur >= 'a') && (cur <= 'z')) ||
7338 ((cur >= 'A') && (cur <= 'Z'))) {
7339 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7340 if (buf == NULL) {
7341 xmlGenericError(xmlGenericErrorContext,
7342 "malloc of %d byte failed\n", size);
7343 return(NULL);
7344 }
7345
7346 buf[len++] = cur;
7347 NEXT;
7348 cur = CUR;
7349 while (((cur >= 'a') && (cur <= 'z')) ||
7350 ((cur >= 'A') && (cur <= 'Z')) ||
7351 ((cur >= '0') && (cur <= '9')) ||
7352 (cur == '.') || (cur == '_') ||
7353 (cur == '-')) {
7354 if (len + 1 >= size) {
7355 size *= 2;
7356 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7357 if (buf == NULL) {
7358 xmlGenericError(xmlGenericErrorContext,
7359 "realloc of %d byte failed\n", size);
7360 return(NULL);
7361 }
7362 }
7363 buf[len++] = cur;
7364 NEXT;
7365 cur = CUR;
7366 if (cur == 0) {
7367 SHRINK;
7368 GROW;
7369 cur = CUR;
7370 }
7371 }
7372 buf[len] = 0;
7373 } else {
7374 ctxt->errNo = XML_ERR_ENCODING_NAME;
7375 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7376 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7377 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007378 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007379 }
7380 return(buf);
7381}
7382
7383/**
7384 * xmlParseEncodingDecl:
7385 * @ctxt: an XML parser context
7386 *
7387 * parse the XML encoding declaration
7388 *
7389 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7390 *
7391 * this setups the conversion filters.
7392 *
7393 * Returns the encoding value or NULL
7394 */
7395
7396xmlChar *
7397xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7398 xmlChar *encoding = NULL;
7399 const xmlChar *q;
7400
7401 SKIP_BLANKS;
7402 if ((RAW == 'e') && (NXT(1) == 'n') &&
7403 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7404 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7405 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7406 SKIP(8);
7407 SKIP_BLANKS;
7408 if (RAW != '=') {
7409 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7411 ctxt->sax->error(ctxt->userData,
7412 "xmlParseEncodingDecl : expected '='\n");
7413 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007414 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007415 return(NULL);
7416 }
7417 NEXT;
7418 SKIP_BLANKS;
7419 if (RAW == '"') {
7420 NEXT;
7421 q = CUR_PTR;
7422 encoding = xmlParseEncName(ctxt);
7423 if (RAW != '"') {
7424 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7425 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7426 ctxt->sax->error(ctxt->userData,
7427 "String not closed\n%.50s\n", q);
7428 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007429 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007430 } else
7431 NEXT;
7432 } else if (RAW == '\''){
7433 NEXT;
7434 q = CUR_PTR;
7435 encoding = xmlParseEncName(ctxt);
7436 if (RAW != '\'') {
7437 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7438 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7439 ctxt->sax->error(ctxt->userData,
7440 "String not closed\n%.50s\n", q);
7441 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007442 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007443 } else
7444 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007445 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007446 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7448 ctxt->sax->error(ctxt->userData,
7449 "xmlParseEncodingDecl : expected ' or \"\n");
7450 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007451 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007452 }
7453 if (encoding != NULL) {
7454 xmlCharEncoding enc;
7455 xmlCharEncodingHandlerPtr handler;
7456
7457 if (ctxt->input->encoding != NULL)
7458 xmlFree((xmlChar *) ctxt->input->encoding);
7459 ctxt->input->encoding = encoding;
7460
7461 enc = xmlParseCharEncoding((const char *) encoding);
7462 /*
7463 * registered set of known encodings
7464 */
7465 if (enc != XML_CHAR_ENCODING_ERROR) {
7466 xmlSwitchEncoding(ctxt, enc);
7467 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007468 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007469 xmlFree(encoding);
7470 return(NULL);
7471 }
7472 } else {
7473 /*
7474 * fallback for unknown encodings
7475 */
7476 handler = xmlFindCharEncodingHandler((const char *) encoding);
7477 if (handler != NULL) {
7478 xmlSwitchToEncoding(ctxt, handler);
7479 } else {
7480 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7481 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7482 ctxt->sax->error(ctxt->userData,
7483 "Unsupported encoding %s\n", encoding);
7484 return(NULL);
7485 }
7486 }
7487 }
7488 }
7489 return(encoding);
7490}
7491
7492/**
7493 * xmlParseSDDecl:
7494 * @ctxt: an XML parser context
7495 *
7496 * parse the XML standalone declaration
7497 *
7498 * [32] SDDecl ::= S 'standalone' Eq
7499 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7500 *
7501 * [ VC: Standalone Document Declaration ]
7502 * TODO The standalone document declaration must have the value "no"
7503 * if any external markup declarations contain declarations of:
7504 * - attributes with default values, if elements to which these
7505 * attributes apply appear in the document without specifications
7506 * of values for these attributes, or
7507 * - entities (other than amp, lt, gt, apos, quot), if references
7508 * to those entities appear in the document, or
7509 * - attributes with values subject to normalization, where the
7510 * attribute appears in the document with a value which will change
7511 * as a result of normalization, or
7512 * - element types with element content, if white space occurs directly
7513 * within any instance of those types.
7514 *
7515 * Returns 1 if standalone, 0 otherwise
7516 */
7517
7518int
7519xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7520 int standalone = -1;
7521
7522 SKIP_BLANKS;
7523 if ((RAW == 's') && (NXT(1) == 't') &&
7524 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7525 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7526 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7527 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7528 SKIP(10);
7529 SKIP_BLANKS;
7530 if (RAW != '=') {
7531 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7533 ctxt->sax->error(ctxt->userData,
7534 "XML standalone declaration : expected '='\n");
7535 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007536 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007537 return(standalone);
7538 }
7539 NEXT;
7540 SKIP_BLANKS;
7541 if (RAW == '\''){
7542 NEXT;
7543 if ((RAW == 'n') && (NXT(1) == 'o')) {
7544 standalone = 0;
7545 SKIP(2);
7546 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7547 (NXT(2) == 's')) {
7548 standalone = 1;
7549 SKIP(3);
7550 } else {
7551 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7553 ctxt->sax->error(ctxt->userData,
7554 "standalone accepts only 'yes' or 'no'\n");
7555 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007556 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007557 }
7558 if (RAW != '\'') {
7559 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7561 ctxt->sax->error(ctxt->userData, "String not closed\n");
7562 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007563 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007564 } else
7565 NEXT;
7566 } else if (RAW == '"'){
7567 NEXT;
7568 if ((RAW == 'n') && (NXT(1) == 'o')) {
7569 standalone = 0;
7570 SKIP(2);
7571 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7572 (NXT(2) == 's')) {
7573 standalone = 1;
7574 SKIP(3);
7575 } else {
7576 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7578 ctxt->sax->error(ctxt->userData,
7579 "standalone accepts only 'yes' or 'no'\n");
7580 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007581 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007582 }
7583 if (RAW != '"') {
7584 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7586 ctxt->sax->error(ctxt->userData, "String not closed\n");
7587 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007588 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007589 } else
7590 NEXT;
7591 } else {
7592 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7594 ctxt->sax->error(ctxt->userData,
7595 "Standalone value not found\n");
7596 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007597 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007598 }
7599 }
7600 return(standalone);
7601}
7602
7603/**
7604 * xmlParseXMLDecl:
7605 * @ctxt: an XML parser context
7606 *
7607 * parse an XML declaration header
7608 *
7609 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7610 */
7611
7612void
7613xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7614 xmlChar *version;
7615
7616 /*
7617 * We know that '<?xml' is here.
7618 */
7619 SKIP(5);
7620
7621 if (!IS_BLANK(RAW)) {
7622 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7624 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7625 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007626 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007627 }
7628 SKIP_BLANKS;
7629
7630 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007631 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007632 */
7633 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007634 if (version == NULL) {
7635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7636 ctxt->sax->error(ctxt->userData,
7637 "Malformed declaration expecting version\n");
7638 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007639 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007640 } else {
7641 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7642 /*
7643 * TODO: Blueberry should be detected here
7644 */
7645 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7646 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7647 version);
7648 }
7649 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007650 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007651 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007652 }
Owen Taylor3473f882001-02-23 17:55:21 +00007653
7654 /*
7655 * We may have the encoding declaration
7656 */
7657 if (!IS_BLANK(RAW)) {
7658 if ((RAW == '?') && (NXT(1) == '>')) {
7659 SKIP(2);
7660 return;
7661 }
7662 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7664 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7665 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007666 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007667 }
7668 xmlParseEncodingDecl(ctxt);
7669 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7670 /*
7671 * The XML REC instructs us to stop parsing right here
7672 */
7673 return;
7674 }
7675
7676 /*
7677 * We may have the standalone status.
7678 */
7679 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7680 if ((RAW == '?') && (NXT(1) == '>')) {
7681 SKIP(2);
7682 return;
7683 }
7684 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7686 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7687 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007688 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007689 }
7690 SKIP_BLANKS;
7691 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7692
7693 SKIP_BLANKS;
7694 if ((RAW == '?') && (NXT(1) == '>')) {
7695 SKIP(2);
7696 } else if (RAW == '>') {
7697 /* Deprecated old WD ... */
7698 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7699 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7700 ctxt->sax->error(ctxt->userData,
7701 "XML declaration must end-up with '?>'\n");
7702 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007703 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007704 NEXT;
7705 } else {
7706 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7708 ctxt->sax->error(ctxt->userData,
7709 "parsing XML declaration: '?>' expected\n");
7710 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007711 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007712 MOVETO_ENDTAG(CUR_PTR);
7713 NEXT;
7714 }
7715}
7716
7717/**
7718 * xmlParseMisc:
7719 * @ctxt: an XML parser context
7720 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007721 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007722 *
7723 * [27] Misc ::= Comment | PI | S
7724 */
7725
7726void
7727xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007728 while (((RAW == '<') && (NXT(1) == '?')) ||
7729 ((RAW == '<') && (NXT(1) == '!') &&
7730 (NXT(2) == '-') && (NXT(3) == '-')) ||
7731 IS_BLANK(CUR)) {
7732 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007733 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007734 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007735 NEXT;
7736 } else
7737 xmlParseComment(ctxt);
7738 }
7739}
7740
7741/**
7742 * xmlParseDocument:
7743 * @ctxt: an XML parser context
7744 *
7745 * parse an XML document (and build a tree if using the standard SAX
7746 * interface).
7747 *
7748 * [1] document ::= prolog element Misc*
7749 *
7750 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7751 *
7752 * Returns 0, -1 in case of error. the parser context is augmented
7753 * as a result of the parsing.
7754 */
7755
7756int
7757xmlParseDocument(xmlParserCtxtPtr ctxt) {
7758 xmlChar start[4];
7759 xmlCharEncoding enc;
7760
7761 xmlInitParser();
7762
7763 GROW;
7764
7765 /*
7766 * SAX: beginning of the document processing.
7767 */
7768 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7769 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7770
Daniel Veillard50f34372001-08-03 12:06:36 +00007771 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007772 /*
7773 * Get the 4 first bytes and decode the charset
7774 * if enc != XML_CHAR_ENCODING_NONE
7775 * plug some encoding conversion routines.
7776 */
7777 start[0] = RAW;
7778 start[1] = NXT(1);
7779 start[2] = NXT(2);
7780 start[3] = NXT(3);
7781 enc = xmlDetectCharEncoding(start, 4);
7782 if (enc != XML_CHAR_ENCODING_NONE) {
7783 xmlSwitchEncoding(ctxt, enc);
7784 }
Owen Taylor3473f882001-02-23 17:55:21 +00007785 }
7786
7787
7788 if (CUR == 0) {
7789 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7791 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7792 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007793 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007794 }
7795
7796 /*
7797 * Check for the XMLDecl in the Prolog.
7798 */
7799 GROW;
7800 if ((RAW == '<') && (NXT(1) == '?') &&
7801 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7802 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7803
7804 /*
7805 * Note that we will switch encoding on the fly.
7806 */
7807 xmlParseXMLDecl(ctxt);
7808 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7809 /*
7810 * The XML REC instructs us to stop parsing right here
7811 */
7812 return(-1);
7813 }
7814 ctxt->standalone = ctxt->input->standalone;
7815 SKIP_BLANKS;
7816 } else {
7817 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7818 }
7819 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7820 ctxt->sax->startDocument(ctxt->userData);
7821
7822 /*
7823 * The Misc part of the Prolog
7824 */
7825 GROW;
7826 xmlParseMisc(ctxt);
7827
7828 /*
7829 * Then possibly doc type declaration(s) and more Misc
7830 * (doctypedecl Misc*)?
7831 */
7832 GROW;
7833 if ((RAW == '<') && (NXT(1) == '!') &&
7834 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7835 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7836 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7837 (NXT(8) == 'E')) {
7838
7839 ctxt->inSubset = 1;
7840 xmlParseDocTypeDecl(ctxt);
7841 if (RAW == '[') {
7842 ctxt->instate = XML_PARSER_DTD;
7843 xmlParseInternalSubset(ctxt);
7844 }
7845
7846 /*
7847 * Create and update the external subset.
7848 */
7849 ctxt->inSubset = 2;
7850 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7851 (!ctxt->disableSAX))
7852 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7853 ctxt->extSubSystem, ctxt->extSubURI);
7854 ctxt->inSubset = 0;
7855
7856
7857 ctxt->instate = XML_PARSER_PROLOG;
7858 xmlParseMisc(ctxt);
7859 }
7860
7861 /*
7862 * Time to start parsing the tree itself
7863 */
7864 GROW;
7865 if (RAW != '<') {
7866 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7868 ctxt->sax->error(ctxt->userData,
7869 "Start tag expected, '<' not found\n");
7870 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007871 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007872 ctxt->instate = XML_PARSER_EOF;
7873 } else {
7874 ctxt->instate = XML_PARSER_CONTENT;
7875 xmlParseElement(ctxt);
7876 ctxt->instate = XML_PARSER_EPILOG;
7877
7878
7879 /*
7880 * The Misc part at the end
7881 */
7882 xmlParseMisc(ctxt);
7883
Daniel Veillard561b7f82002-03-20 21:55:57 +00007884 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007885 ctxt->errNo = XML_ERR_DOCUMENT_END;
7886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7887 ctxt->sax->error(ctxt->userData,
7888 "Extra content at the end of the document\n");
7889 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007890 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007891 }
7892 ctxt->instate = XML_PARSER_EOF;
7893 }
7894
7895 /*
7896 * SAX: end of the document processing.
7897 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007898 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007899 ctxt->sax->endDocument(ctxt->userData);
7900
Daniel Veillard5997aca2002-03-18 18:36:20 +00007901 /*
7902 * Remove locally kept entity definitions if the tree was not built
7903 */
7904 if ((ctxt->myDoc != NULL) &&
7905 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7906 xmlFreeDoc(ctxt->myDoc);
7907 ctxt->myDoc = NULL;
7908 }
7909
Daniel Veillardc7612992002-02-17 22:47:37 +00007910 if (! ctxt->wellFormed) {
7911 ctxt->valid = 0;
7912 return(-1);
7913 }
Owen Taylor3473f882001-02-23 17:55:21 +00007914 return(0);
7915}
7916
7917/**
7918 * xmlParseExtParsedEnt:
7919 * @ctxt: an XML parser context
7920 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007921 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007922 * An external general parsed entity is well-formed if it matches the
7923 * production labeled extParsedEnt.
7924 *
7925 * [78] extParsedEnt ::= TextDecl? content
7926 *
7927 * Returns 0, -1 in case of error. the parser context is augmented
7928 * as a result of the parsing.
7929 */
7930
7931int
7932xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7933 xmlChar start[4];
7934 xmlCharEncoding enc;
7935
7936 xmlDefaultSAXHandlerInit();
7937
7938 GROW;
7939
7940 /*
7941 * SAX: beginning of the document processing.
7942 */
7943 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7944 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7945
7946 /*
7947 * Get the 4 first bytes and decode the charset
7948 * if enc != XML_CHAR_ENCODING_NONE
7949 * plug some encoding conversion routines.
7950 */
7951 start[0] = RAW;
7952 start[1] = NXT(1);
7953 start[2] = NXT(2);
7954 start[3] = NXT(3);
7955 enc = xmlDetectCharEncoding(start, 4);
7956 if (enc != XML_CHAR_ENCODING_NONE) {
7957 xmlSwitchEncoding(ctxt, enc);
7958 }
7959
7960
7961 if (CUR == 0) {
7962 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7964 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7965 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007966 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007967 }
7968
7969 /*
7970 * Check for the XMLDecl in the Prolog.
7971 */
7972 GROW;
7973 if ((RAW == '<') && (NXT(1) == '?') &&
7974 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7975 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7976
7977 /*
7978 * Note that we will switch encoding on the fly.
7979 */
7980 xmlParseXMLDecl(ctxt);
7981 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7982 /*
7983 * The XML REC instructs us to stop parsing right here
7984 */
7985 return(-1);
7986 }
7987 SKIP_BLANKS;
7988 } else {
7989 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7990 }
7991 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7992 ctxt->sax->startDocument(ctxt->userData);
7993
7994 /*
7995 * Doing validity checking on chunk doesn't make sense
7996 */
7997 ctxt->instate = XML_PARSER_CONTENT;
7998 ctxt->validate = 0;
7999 ctxt->loadsubset = 0;
8000 ctxt->depth = 0;
8001
8002 xmlParseContent(ctxt);
8003
8004 if ((RAW == '<') && (NXT(1) == '/')) {
8005 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8007 ctxt->sax->error(ctxt->userData,
8008 "chunk is not well balanced\n");
8009 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008011 } else if (RAW != 0) {
8012 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8014 ctxt->sax->error(ctxt->userData,
8015 "extra content at the end of well balanced chunk\n");
8016 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008017 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008018 }
8019
8020 /*
8021 * SAX: end of the document processing.
8022 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008023 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008024 ctxt->sax->endDocument(ctxt->userData);
8025
8026 if (! ctxt->wellFormed) return(-1);
8027 return(0);
8028}
8029
8030/************************************************************************
8031 * *
8032 * Progressive parsing interfaces *
8033 * *
8034 ************************************************************************/
8035
8036/**
8037 * xmlParseLookupSequence:
8038 * @ctxt: an XML parser context
8039 * @first: the first char to lookup
8040 * @next: the next char to lookup or zero
8041 * @third: the next char to lookup or zero
8042 *
8043 * Try to find if a sequence (first, next, third) or just (first next) or
8044 * (first) is available in the input stream.
8045 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8046 * to avoid rescanning sequences of bytes, it DOES change the state of the
8047 * parser, do not use liberally.
8048 *
8049 * Returns the index to the current parsing point if the full sequence
8050 * is available, -1 otherwise.
8051 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008052static int
Owen Taylor3473f882001-02-23 17:55:21 +00008053xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8054 xmlChar next, xmlChar third) {
8055 int base, len;
8056 xmlParserInputPtr in;
8057 const xmlChar *buf;
8058
8059 in = ctxt->input;
8060 if (in == NULL) return(-1);
8061 base = in->cur - in->base;
8062 if (base < 0) return(-1);
8063 if (ctxt->checkIndex > base)
8064 base = ctxt->checkIndex;
8065 if (in->buf == NULL) {
8066 buf = in->base;
8067 len = in->length;
8068 } else {
8069 buf = in->buf->buffer->content;
8070 len = in->buf->buffer->use;
8071 }
8072 /* take into account the sequence length */
8073 if (third) len -= 2;
8074 else if (next) len --;
8075 for (;base < len;base++) {
8076 if (buf[base] == first) {
8077 if (third != 0) {
8078 if ((buf[base + 1] != next) ||
8079 (buf[base + 2] != third)) continue;
8080 } else if (next != 0) {
8081 if (buf[base + 1] != next) continue;
8082 }
8083 ctxt->checkIndex = 0;
8084#ifdef DEBUG_PUSH
8085 if (next == 0)
8086 xmlGenericError(xmlGenericErrorContext,
8087 "PP: lookup '%c' found at %d\n",
8088 first, base);
8089 else if (third == 0)
8090 xmlGenericError(xmlGenericErrorContext,
8091 "PP: lookup '%c%c' found at %d\n",
8092 first, next, base);
8093 else
8094 xmlGenericError(xmlGenericErrorContext,
8095 "PP: lookup '%c%c%c' found at %d\n",
8096 first, next, third, base);
8097#endif
8098 return(base - (in->cur - in->base));
8099 }
8100 }
8101 ctxt->checkIndex = base;
8102#ifdef DEBUG_PUSH
8103 if (next == 0)
8104 xmlGenericError(xmlGenericErrorContext,
8105 "PP: lookup '%c' failed\n", first);
8106 else if (third == 0)
8107 xmlGenericError(xmlGenericErrorContext,
8108 "PP: lookup '%c%c' failed\n", first, next);
8109 else
8110 xmlGenericError(xmlGenericErrorContext,
8111 "PP: lookup '%c%c%c' failed\n", first, next, third);
8112#endif
8113 return(-1);
8114}
8115
8116/**
8117 * xmlParseTryOrFinish:
8118 * @ctxt: an XML parser context
8119 * @terminate: last chunk indicator
8120 *
8121 * Try to progress on parsing
8122 *
8123 * Returns zero if no parsing was possible
8124 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008125static int
Owen Taylor3473f882001-02-23 17:55:21 +00008126xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8127 int ret = 0;
8128 int avail;
8129 xmlChar cur, next;
8130
8131#ifdef DEBUG_PUSH
8132 switch (ctxt->instate) {
8133 case XML_PARSER_EOF:
8134 xmlGenericError(xmlGenericErrorContext,
8135 "PP: try EOF\n"); break;
8136 case XML_PARSER_START:
8137 xmlGenericError(xmlGenericErrorContext,
8138 "PP: try START\n"); break;
8139 case XML_PARSER_MISC:
8140 xmlGenericError(xmlGenericErrorContext,
8141 "PP: try MISC\n");break;
8142 case XML_PARSER_COMMENT:
8143 xmlGenericError(xmlGenericErrorContext,
8144 "PP: try COMMENT\n");break;
8145 case XML_PARSER_PROLOG:
8146 xmlGenericError(xmlGenericErrorContext,
8147 "PP: try PROLOG\n");break;
8148 case XML_PARSER_START_TAG:
8149 xmlGenericError(xmlGenericErrorContext,
8150 "PP: try START_TAG\n");break;
8151 case XML_PARSER_CONTENT:
8152 xmlGenericError(xmlGenericErrorContext,
8153 "PP: try CONTENT\n");break;
8154 case XML_PARSER_CDATA_SECTION:
8155 xmlGenericError(xmlGenericErrorContext,
8156 "PP: try CDATA_SECTION\n");break;
8157 case XML_PARSER_END_TAG:
8158 xmlGenericError(xmlGenericErrorContext,
8159 "PP: try END_TAG\n");break;
8160 case XML_PARSER_ENTITY_DECL:
8161 xmlGenericError(xmlGenericErrorContext,
8162 "PP: try ENTITY_DECL\n");break;
8163 case XML_PARSER_ENTITY_VALUE:
8164 xmlGenericError(xmlGenericErrorContext,
8165 "PP: try ENTITY_VALUE\n");break;
8166 case XML_PARSER_ATTRIBUTE_VALUE:
8167 xmlGenericError(xmlGenericErrorContext,
8168 "PP: try ATTRIBUTE_VALUE\n");break;
8169 case XML_PARSER_DTD:
8170 xmlGenericError(xmlGenericErrorContext,
8171 "PP: try DTD\n");break;
8172 case XML_PARSER_EPILOG:
8173 xmlGenericError(xmlGenericErrorContext,
8174 "PP: try EPILOG\n");break;
8175 case XML_PARSER_PI:
8176 xmlGenericError(xmlGenericErrorContext,
8177 "PP: try PI\n");break;
8178 case XML_PARSER_IGNORE:
8179 xmlGenericError(xmlGenericErrorContext,
8180 "PP: try IGNORE\n");break;
8181 }
8182#endif
8183
8184 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008185 SHRINK;
8186
Owen Taylor3473f882001-02-23 17:55:21 +00008187 /*
8188 * Pop-up of finished entities.
8189 */
8190 while ((RAW == 0) && (ctxt->inputNr > 1))
8191 xmlPopInput(ctxt);
8192
8193 if (ctxt->input ==NULL) break;
8194 if (ctxt->input->buf == NULL)
8195 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008196 else {
8197 /*
8198 * If we are operating on converted input, try to flush
8199 * remainng chars to avoid them stalling in the non-converted
8200 * buffer.
8201 */
8202 if ((ctxt->input->buf->raw != NULL) &&
8203 (ctxt->input->buf->raw->use > 0)) {
8204 int base = ctxt->input->base -
8205 ctxt->input->buf->buffer->content;
8206 int current = ctxt->input->cur - ctxt->input->base;
8207
8208 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8209 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8210 ctxt->input->cur = ctxt->input->base + current;
8211 ctxt->input->end =
8212 &ctxt->input->buf->buffer->content[
8213 ctxt->input->buf->buffer->use];
8214 }
8215 avail = ctxt->input->buf->buffer->use -
8216 (ctxt->input->cur - ctxt->input->base);
8217 }
Owen Taylor3473f882001-02-23 17:55:21 +00008218 if (avail < 1)
8219 goto done;
8220 switch (ctxt->instate) {
8221 case XML_PARSER_EOF:
8222 /*
8223 * Document parsing is done !
8224 */
8225 goto done;
8226 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008227 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8228 xmlChar start[4];
8229 xmlCharEncoding enc;
8230
8231 /*
8232 * Very first chars read from the document flow.
8233 */
8234 if (avail < 4)
8235 goto done;
8236
8237 /*
8238 * Get the 4 first bytes and decode the charset
8239 * if enc != XML_CHAR_ENCODING_NONE
8240 * plug some encoding conversion routines.
8241 */
8242 start[0] = RAW;
8243 start[1] = NXT(1);
8244 start[2] = NXT(2);
8245 start[3] = NXT(3);
8246 enc = xmlDetectCharEncoding(start, 4);
8247 if (enc != XML_CHAR_ENCODING_NONE) {
8248 xmlSwitchEncoding(ctxt, enc);
8249 }
8250 break;
8251 }
Owen Taylor3473f882001-02-23 17:55:21 +00008252
8253 cur = ctxt->input->cur[0];
8254 next = ctxt->input->cur[1];
8255 if (cur == 0) {
8256 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8257 ctxt->sax->setDocumentLocator(ctxt->userData,
8258 &xmlDefaultSAXLocator);
8259 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8260 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8261 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8262 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008263 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008264 ctxt->instate = XML_PARSER_EOF;
8265#ifdef DEBUG_PUSH
8266 xmlGenericError(xmlGenericErrorContext,
8267 "PP: entering EOF\n");
8268#endif
8269 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8270 ctxt->sax->endDocument(ctxt->userData);
8271 goto done;
8272 }
8273 if ((cur == '<') && (next == '?')) {
8274 /* PI or XML decl */
8275 if (avail < 5) return(ret);
8276 if ((!terminate) &&
8277 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8278 return(ret);
8279 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8280 ctxt->sax->setDocumentLocator(ctxt->userData,
8281 &xmlDefaultSAXLocator);
8282 if ((ctxt->input->cur[2] == 'x') &&
8283 (ctxt->input->cur[3] == 'm') &&
8284 (ctxt->input->cur[4] == 'l') &&
8285 (IS_BLANK(ctxt->input->cur[5]))) {
8286 ret += 5;
8287#ifdef DEBUG_PUSH
8288 xmlGenericError(xmlGenericErrorContext,
8289 "PP: Parsing XML Decl\n");
8290#endif
8291 xmlParseXMLDecl(ctxt);
8292 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8293 /*
8294 * The XML REC instructs us to stop parsing right
8295 * here
8296 */
8297 ctxt->instate = XML_PARSER_EOF;
8298 return(0);
8299 }
8300 ctxt->standalone = ctxt->input->standalone;
8301 if ((ctxt->encoding == NULL) &&
8302 (ctxt->input->encoding != NULL))
8303 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8304 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8305 (!ctxt->disableSAX))
8306 ctxt->sax->startDocument(ctxt->userData);
8307 ctxt->instate = XML_PARSER_MISC;
8308#ifdef DEBUG_PUSH
8309 xmlGenericError(xmlGenericErrorContext,
8310 "PP: entering MISC\n");
8311#endif
8312 } else {
8313 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8314 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8315 (!ctxt->disableSAX))
8316 ctxt->sax->startDocument(ctxt->userData);
8317 ctxt->instate = XML_PARSER_MISC;
8318#ifdef DEBUG_PUSH
8319 xmlGenericError(xmlGenericErrorContext,
8320 "PP: entering MISC\n");
8321#endif
8322 }
8323 } else {
8324 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8325 ctxt->sax->setDocumentLocator(ctxt->userData,
8326 &xmlDefaultSAXLocator);
8327 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8328 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8329 (!ctxt->disableSAX))
8330 ctxt->sax->startDocument(ctxt->userData);
8331 ctxt->instate = XML_PARSER_MISC;
8332#ifdef DEBUG_PUSH
8333 xmlGenericError(xmlGenericErrorContext,
8334 "PP: entering MISC\n");
8335#endif
8336 }
8337 break;
8338 case XML_PARSER_MISC:
8339 SKIP_BLANKS;
8340 if (ctxt->input->buf == NULL)
8341 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8342 else
8343 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8344 if (avail < 2)
8345 goto done;
8346 cur = ctxt->input->cur[0];
8347 next = ctxt->input->cur[1];
8348 if ((cur == '<') && (next == '?')) {
8349 if ((!terminate) &&
8350 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8351 goto done;
8352#ifdef DEBUG_PUSH
8353 xmlGenericError(xmlGenericErrorContext,
8354 "PP: Parsing PI\n");
8355#endif
8356 xmlParsePI(ctxt);
8357 } else if ((cur == '<') && (next == '!') &&
8358 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8359 if ((!terminate) &&
8360 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8361 goto done;
8362#ifdef DEBUG_PUSH
8363 xmlGenericError(xmlGenericErrorContext,
8364 "PP: Parsing Comment\n");
8365#endif
8366 xmlParseComment(ctxt);
8367 ctxt->instate = XML_PARSER_MISC;
8368 } else if ((cur == '<') && (next == '!') &&
8369 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8370 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8371 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8372 (ctxt->input->cur[8] == 'E')) {
8373 if ((!terminate) &&
8374 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8375 goto done;
8376#ifdef DEBUG_PUSH
8377 xmlGenericError(xmlGenericErrorContext,
8378 "PP: Parsing internal subset\n");
8379#endif
8380 ctxt->inSubset = 1;
8381 xmlParseDocTypeDecl(ctxt);
8382 if (RAW == '[') {
8383 ctxt->instate = XML_PARSER_DTD;
8384#ifdef DEBUG_PUSH
8385 xmlGenericError(xmlGenericErrorContext,
8386 "PP: entering DTD\n");
8387#endif
8388 } else {
8389 /*
8390 * Create and update the external subset.
8391 */
8392 ctxt->inSubset = 2;
8393 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8394 (ctxt->sax->externalSubset != NULL))
8395 ctxt->sax->externalSubset(ctxt->userData,
8396 ctxt->intSubName, ctxt->extSubSystem,
8397 ctxt->extSubURI);
8398 ctxt->inSubset = 0;
8399 ctxt->instate = XML_PARSER_PROLOG;
8400#ifdef DEBUG_PUSH
8401 xmlGenericError(xmlGenericErrorContext,
8402 "PP: entering PROLOG\n");
8403#endif
8404 }
8405 } else if ((cur == '<') && (next == '!') &&
8406 (avail < 9)) {
8407 goto done;
8408 } else {
8409 ctxt->instate = XML_PARSER_START_TAG;
8410#ifdef DEBUG_PUSH
8411 xmlGenericError(xmlGenericErrorContext,
8412 "PP: entering START_TAG\n");
8413#endif
8414 }
8415 break;
8416 case XML_PARSER_IGNORE:
8417 xmlGenericError(xmlGenericErrorContext,
8418 "PP: internal error, state == IGNORE");
8419 ctxt->instate = XML_PARSER_DTD;
8420#ifdef DEBUG_PUSH
8421 xmlGenericError(xmlGenericErrorContext,
8422 "PP: entering DTD\n");
8423#endif
8424 break;
8425 case XML_PARSER_PROLOG:
8426 SKIP_BLANKS;
8427 if (ctxt->input->buf == NULL)
8428 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8429 else
8430 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8431 if (avail < 2)
8432 goto done;
8433 cur = ctxt->input->cur[0];
8434 next = ctxt->input->cur[1];
8435 if ((cur == '<') && (next == '?')) {
8436 if ((!terminate) &&
8437 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8438 goto done;
8439#ifdef DEBUG_PUSH
8440 xmlGenericError(xmlGenericErrorContext,
8441 "PP: Parsing PI\n");
8442#endif
8443 xmlParsePI(ctxt);
8444 } else if ((cur == '<') && (next == '!') &&
8445 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8446 if ((!terminate) &&
8447 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8448 goto done;
8449#ifdef DEBUG_PUSH
8450 xmlGenericError(xmlGenericErrorContext,
8451 "PP: Parsing Comment\n");
8452#endif
8453 xmlParseComment(ctxt);
8454 ctxt->instate = XML_PARSER_PROLOG;
8455 } else if ((cur == '<') && (next == '!') &&
8456 (avail < 4)) {
8457 goto done;
8458 } else {
8459 ctxt->instate = XML_PARSER_START_TAG;
8460#ifdef DEBUG_PUSH
8461 xmlGenericError(xmlGenericErrorContext,
8462 "PP: entering START_TAG\n");
8463#endif
8464 }
8465 break;
8466 case XML_PARSER_EPILOG:
8467 SKIP_BLANKS;
8468 if (ctxt->input->buf == NULL)
8469 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8470 else
8471 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8472 if (avail < 2)
8473 goto done;
8474 cur = ctxt->input->cur[0];
8475 next = ctxt->input->cur[1];
8476 if ((cur == '<') && (next == '?')) {
8477 if ((!terminate) &&
8478 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8479 goto done;
8480#ifdef DEBUG_PUSH
8481 xmlGenericError(xmlGenericErrorContext,
8482 "PP: Parsing PI\n");
8483#endif
8484 xmlParsePI(ctxt);
8485 ctxt->instate = XML_PARSER_EPILOG;
8486 } else if ((cur == '<') && (next == '!') &&
8487 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8488 if ((!terminate) &&
8489 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8490 goto done;
8491#ifdef DEBUG_PUSH
8492 xmlGenericError(xmlGenericErrorContext,
8493 "PP: Parsing Comment\n");
8494#endif
8495 xmlParseComment(ctxt);
8496 ctxt->instate = XML_PARSER_EPILOG;
8497 } else if ((cur == '<') && (next == '!') &&
8498 (avail < 4)) {
8499 goto done;
8500 } else {
8501 ctxt->errNo = XML_ERR_DOCUMENT_END;
8502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8503 ctxt->sax->error(ctxt->userData,
8504 "Extra content at the end of the document\n");
8505 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008506 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008507 ctxt->instate = XML_PARSER_EOF;
8508#ifdef DEBUG_PUSH
8509 xmlGenericError(xmlGenericErrorContext,
8510 "PP: entering EOF\n");
8511#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008512 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008513 ctxt->sax->endDocument(ctxt->userData);
8514 goto done;
8515 }
8516 break;
8517 case XML_PARSER_START_TAG: {
8518 xmlChar *name, *oldname;
8519
8520 if ((avail < 2) && (ctxt->inputNr == 1))
8521 goto done;
8522 cur = ctxt->input->cur[0];
8523 if (cur != '<') {
8524 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8526 ctxt->sax->error(ctxt->userData,
8527 "Start tag expect, '<' not found\n");
8528 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008529 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008530 ctxt->instate = XML_PARSER_EOF;
8531#ifdef DEBUG_PUSH
8532 xmlGenericError(xmlGenericErrorContext,
8533 "PP: entering EOF\n");
8534#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008535 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008536 ctxt->sax->endDocument(ctxt->userData);
8537 goto done;
8538 }
8539 if ((!terminate) &&
8540 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8541 goto done;
8542 if (ctxt->spaceNr == 0)
8543 spacePush(ctxt, -1);
8544 else
8545 spacePush(ctxt, *ctxt->space);
8546 name = xmlParseStartTag(ctxt);
8547 if (name == NULL) {
8548 spacePop(ctxt);
8549 ctxt->instate = XML_PARSER_EOF;
8550#ifdef DEBUG_PUSH
8551 xmlGenericError(xmlGenericErrorContext,
8552 "PP: entering EOF\n");
8553#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008554 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008555 ctxt->sax->endDocument(ctxt->userData);
8556 goto done;
8557 }
8558 namePush(ctxt, xmlStrdup(name));
8559
8560 /*
8561 * [ VC: Root Element Type ]
8562 * The Name in the document type declaration must match
8563 * the element type of the root element.
8564 */
8565 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8566 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8567 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8568
8569 /*
8570 * Check for an Empty Element.
8571 */
8572 if ((RAW == '/') && (NXT(1) == '>')) {
8573 SKIP(2);
8574 if ((ctxt->sax != NULL) &&
8575 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8576 ctxt->sax->endElement(ctxt->userData, name);
8577 xmlFree(name);
8578 oldname = namePop(ctxt);
8579 spacePop(ctxt);
8580 if (oldname != NULL) {
8581#ifdef DEBUG_STACK
8582 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8583#endif
8584 xmlFree(oldname);
8585 }
8586 if (ctxt->name == NULL) {
8587 ctxt->instate = XML_PARSER_EPILOG;
8588#ifdef DEBUG_PUSH
8589 xmlGenericError(xmlGenericErrorContext,
8590 "PP: entering EPILOG\n");
8591#endif
8592 } else {
8593 ctxt->instate = XML_PARSER_CONTENT;
8594#ifdef DEBUG_PUSH
8595 xmlGenericError(xmlGenericErrorContext,
8596 "PP: entering CONTENT\n");
8597#endif
8598 }
8599 break;
8600 }
8601 if (RAW == '>') {
8602 NEXT;
8603 } else {
8604 ctxt->errNo = XML_ERR_GT_REQUIRED;
8605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8606 ctxt->sax->error(ctxt->userData,
8607 "Couldn't find end of Start Tag %s\n",
8608 name);
8609 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008610 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008611
8612 /*
8613 * end of parsing of this node.
8614 */
8615 nodePop(ctxt);
8616 oldname = namePop(ctxt);
8617 spacePop(ctxt);
8618 if (oldname != NULL) {
8619#ifdef DEBUG_STACK
8620 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8621#endif
8622 xmlFree(oldname);
8623 }
8624 }
8625 xmlFree(name);
8626 ctxt->instate = XML_PARSER_CONTENT;
8627#ifdef DEBUG_PUSH
8628 xmlGenericError(xmlGenericErrorContext,
8629 "PP: entering CONTENT\n");
8630#endif
8631 break;
8632 }
8633 case XML_PARSER_CONTENT: {
8634 const xmlChar *test;
8635 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008636 if ((avail < 2) && (ctxt->inputNr == 1))
8637 goto done;
8638 cur = ctxt->input->cur[0];
8639 next = ctxt->input->cur[1];
8640
8641 test = CUR_PTR;
8642 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008643 if ((cur == '<') && (next == '?')) {
8644 if ((!terminate) &&
8645 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8646 goto done;
8647#ifdef DEBUG_PUSH
8648 xmlGenericError(xmlGenericErrorContext,
8649 "PP: Parsing PI\n");
8650#endif
8651 xmlParsePI(ctxt);
8652 } else if ((cur == '<') && (next == '!') &&
8653 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8654 if ((!terminate) &&
8655 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8656 goto done;
8657#ifdef DEBUG_PUSH
8658 xmlGenericError(xmlGenericErrorContext,
8659 "PP: Parsing Comment\n");
8660#endif
8661 xmlParseComment(ctxt);
8662 ctxt->instate = XML_PARSER_CONTENT;
8663 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8664 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8665 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8666 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8667 (ctxt->input->cur[8] == '[')) {
8668 SKIP(9);
8669 ctxt->instate = XML_PARSER_CDATA_SECTION;
8670#ifdef DEBUG_PUSH
8671 xmlGenericError(xmlGenericErrorContext,
8672 "PP: entering CDATA_SECTION\n");
8673#endif
8674 break;
8675 } else if ((cur == '<') && (next == '!') &&
8676 (avail < 9)) {
8677 goto done;
8678 } else if ((cur == '<') && (next == '/')) {
8679 ctxt->instate = XML_PARSER_END_TAG;
8680#ifdef DEBUG_PUSH
8681 xmlGenericError(xmlGenericErrorContext,
8682 "PP: entering END_TAG\n");
8683#endif
8684 break;
8685 } else if (cur == '<') {
8686 ctxt->instate = XML_PARSER_START_TAG;
8687#ifdef DEBUG_PUSH
8688 xmlGenericError(xmlGenericErrorContext,
8689 "PP: entering START_TAG\n");
8690#endif
8691 break;
8692 } else if (cur == '&') {
8693 if ((!terminate) &&
8694 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8695 goto done;
8696#ifdef DEBUG_PUSH
8697 xmlGenericError(xmlGenericErrorContext,
8698 "PP: Parsing Reference\n");
8699#endif
8700 xmlParseReference(ctxt);
8701 } else {
8702 /* TODO Avoid the extra copy, handle directly !!! */
8703 /*
8704 * Goal of the following test is:
8705 * - minimize calls to the SAX 'character' callback
8706 * when they are mergeable
8707 * - handle an problem for isBlank when we only parse
8708 * a sequence of blank chars and the next one is
8709 * not available to check against '<' presence.
8710 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008711 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008712 * of the parser.
8713 */
8714 if ((ctxt->inputNr == 1) &&
8715 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8716 if ((!terminate) &&
8717 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8718 goto done;
8719 }
8720 ctxt->checkIndex = 0;
8721#ifdef DEBUG_PUSH
8722 xmlGenericError(xmlGenericErrorContext,
8723 "PP: Parsing char data\n");
8724#endif
8725 xmlParseCharData(ctxt, 0);
8726 }
8727 /*
8728 * Pop-up of finished entities.
8729 */
8730 while ((RAW == 0) && (ctxt->inputNr > 1))
8731 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008732 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008733 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8735 ctxt->sax->error(ctxt->userData,
8736 "detected an error in element content\n");
8737 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008738 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008739 ctxt->instate = XML_PARSER_EOF;
8740 break;
8741 }
8742 break;
8743 }
8744 case XML_PARSER_CDATA_SECTION: {
8745 /*
8746 * The Push mode need to have the SAX callback for
8747 * cdataBlock merge back contiguous callbacks.
8748 */
8749 int base;
8750
8751 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8752 if (base < 0) {
8753 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8754 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8755 if (ctxt->sax->cdataBlock != NULL)
8756 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8757 XML_PARSER_BIG_BUFFER_SIZE);
8758 }
8759 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8760 ctxt->checkIndex = 0;
8761 }
8762 goto done;
8763 } else {
8764 if ((ctxt->sax != NULL) && (base > 0) &&
8765 (!ctxt->disableSAX)) {
8766 if (ctxt->sax->cdataBlock != NULL)
8767 ctxt->sax->cdataBlock(ctxt->userData,
8768 ctxt->input->cur, base);
8769 }
8770 SKIP(base + 3);
8771 ctxt->checkIndex = 0;
8772 ctxt->instate = XML_PARSER_CONTENT;
8773#ifdef DEBUG_PUSH
8774 xmlGenericError(xmlGenericErrorContext,
8775 "PP: entering CONTENT\n");
8776#endif
8777 }
8778 break;
8779 }
8780 case XML_PARSER_END_TAG:
8781 if (avail < 2)
8782 goto done;
8783 if ((!terminate) &&
8784 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8785 goto done;
8786 xmlParseEndTag(ctxt);
8787 if (ctxt->name == NULL) {
8788 ctxt->instate = XML_PARSER_EPILOG;
8789#ifdef DEBUG_PUSH
8790 xmlGenericError(xmlGenericErrorContext,
8791 "PP: entering EPILOG\n");
8792#endif
8793 } else {
8794 ctxt->instate = XML_PARSER_CONTENT;
8795#ifdef DEBUG_PUSH
8796 xmlGenericError(xmlGenericErrorContext,
8797 "PP: entering CONTENT\n");
8798#endif
8799 }
8800 break;
8801 case XML_PARSER_DTD: {
8802 /*
8803 * Sorry but progressive parsing of the internal subset
8804 * is not expected to be supported. We first check that
8805 * the full content of the internal subset is available and
8806 * the parsing is launched only at that point.
8807 * Internal subset ends up with "']' S? '>'" in an unescaped
8808 * section and not in a ']]>' sequence which are conditional
8809 * sections (whoever argued to keep that crap in XML deserve
8810 * a place in hell !).
8811 */
8812 int base, i;
8813 xmlChar *buf;
8814 xmlChar quote = 0;
8815
8816 base = ctxt->input->cur - ctxt->input->base;
8817 if (base < 0) return(0);
8818 if (ctxt->checkIndex > base)
8819 base = ctxt->checkIndex;
8820 buf = ctxt->input->buf->buffer->content;
8821 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8822 base++) {
8823 if (quote != 0) {
8824 if (buf[base] == quote)
8825 quote = 0;
8826 continue;
8827 }
8828 if (buf[base] == '"') {
8829 quote = '"';
8830 continue;
8831 }
8832 if (buf[base] == '\'') {
8833 quote = '\'';
8834 continue;
8835 }
8836 if (buf[base] == ']') {
8837 if ((unsigned int) base +1 >=
8838 ctxt->input->buf->buffer->use)
8839 break;
8840 if (buf[base + 1] == ']') {
8841 /* conditional crap, skip both ']' ! */
8842 base++;
8843 continue;
8844 }
8845 for (i = 0;
8846 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8847 i++) {
8848 if (buf[base + i] == '>')
8849 goto found_end_int_subset;
8850 }
8851 break;
8852 }
8853 }
8854 /*
8855 * We didn't found the end of the Internal subset
8856 */
8857 if (quote == 0)
8858 ctxt->checkIndex = base;
8859#ifdef DEBUG_PUSH
8860 if (next == 0)
8861 xmlGenericError(xmlGenericErrorContext,
8862 "PP: lookup of int subset end filed\n");
8863#endif
8864 goto done;
8865
8866found_end_int_subset:
8867 xmlParseInternalSubset(ctxt);
8868 ctxt->inSubset = 2;
8869 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8870 (ctxt->sax->externalSubset != NULL))
8871 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8872 ctxt->extSubSystem, ctxt->extSubURI);
8873 ctxt->inSubset = 0;
8874 ctxt->instate = XML_PARSER_PROLOG;
8875 ctxt->checkIndex = 0;
8876#ifdef DEBUG_PUSH
8877 xmlGenericError(xmlGenericErrorContext,
8878 "PP: entering PROLOG\n");
8879#endif
8880 break;
8881 }
8882 case XML_PARSER_COMMENT:
8883 xmlGenericError(xmlGenericErrorContext,
8884 "PP: internal error, state == COMMENT\n");
8885 ctxt->instate = XML_PARSER_CONTENT;
8886#ifdef DEBUG_PUSH
8887 xmlGenericError(xmlGenericErrorContext,
8888 "PP: entering CONTENT\n");
8889#endif
8890 break;
8891 case XML_PARSER_PI:
8892 xmlGenericError(xmlGenericErrorContext,
8893 "PP: internal error, state == PI\n");
8894 ctxt->instate = XML_PARSER_CONTENT;
8895#ifdef DEBUG_PUSH
8896 xmlGenericError(xmlGenericErrorContext,
8897 "PP: entering CONTENT\n");
8898#endif
8899 break;
8900 case XML_PARSER_ENTITY_DECL:
8901 xmlGenericError(xmlGenericErrorContext,
8902 "PP: internal error, state == ENTITY_DECL\n");
8903 ctxt->instate = XML_PARSER_DTD;
8904#ifdef DEBUG_PUSH
8905 xmlGenericError(xmlGenericErrorContext,
8906 "PP: entering DTD\n");
8907#endif
8908 break;
8909 case XML_PARSER_ENTITY_VALUE:
8910 xmlGenericError(xmlGenericErrorContext,
8911 "PP: internal error, state == ENTITY_VALUE\n");
8912 ctxt->instate = XML_PARSER_CONTENT;
8913#ifdef DEBUG_PUSH
8914 xmlGenericError(xmlGenericErrorContext,
8915 "PP: entering DTD\n");
8916#endif
8917 break;
8918 case XML_PARSER_ATTRIBUTE_VALUE:
8919 xmlGenericError(xmlGenericErrorContext,
8920 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8921 ctxt->instate = XML_PARSER_START_TAG;
8922#ifdef DEBUG_PUSH
8923 xmlGenericError(xmlGenericErrorContext,
8924 "PP: entering START_TAG\n");
8925#endif
8926 break;
8927 case XML_PARSER_SYSTEM_LITERAL:
8928 xmlGenericError(xmlGenericErrorContext,
8929 "PP: internal error, state == SYSTEM_LITERAL\n");
8930 ctxt->instate = XML_PARSER_START_TAG;
8931#ifdef DEBUG_PUSH
8932 xmlGenericError(xmlGenericErrorContext,
8933 "PP: entering START_TAG\n");
8934#endif
8935 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008936 case XML_PARSER_PUBLIC_LITERAL:
8937 xmlGenericError(xmlGenericErrorContext,
8938 "PP: internal error, state == PUBLIC_LITERAL\n");
8939 ctxt->instate = XML_PARSER_START_TAG;
8940#ifdef DEBUG_PUSH
8941 xmlGenericError(xmlGenericErrorContext,
8942 "PP: entering START_TAG\n");
8943#endif
8944 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008945 }
8946 }
8947done:
8948#ifdef DEBUG_PUSH
8949 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8950#endif
8951 return(ret);
8952}
8953
8954/**
Owen Taylor3473f882001-02-23 17:55:21 +00008955 * xmlParseChunk:
8956 * @ctxt: an XML parser context
8957 * @chunk: an char array
8958 * @size: the size in byte of the chunk
8959 * @terminate: last chunk indicator
8960 *
8961 * Parse a Chunk of memory
8962 *
8963 * Returns zero if no error, the xmlParserErrors otherwise.
8964 */
8965int
8966xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8967 int terminate) {
8968 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8969 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8970 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8971 int cur = ctxt->input->cur - ctxt->input->base;
8972
8973 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8974 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8975 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008976 ctxt->input->end =
8977 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008978#ifdef DEBUG_PUSH
8979 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8980#endif
8981
8982 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8983 xmlParseTryOrFinish(ctxt, terminate);
8984 } else if (ctxt->instate != XML_PARSER_EOF) {
8985 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8986 xmlParserInputBufferPtr in = ctxt->input->buf;
8987 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8988 (in->raw != NULL)) {
8989 int nbchars;
8990
8991 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8992 if (nbchars < 0) {
8993 xmlGenericError(xmlGenericErrorContext,
8994 "xmlParseChunk: encoder error\n");
8995 return(XML_ERR_INVALID_ENCODING);
8996 }
8997 }
8998 }
8999 }
9000 xmlParseTryOrFinish(ctxt, terminate);
9001 if (terminate) {
9002 /*
9003 * Check for termination
9004 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009005 int avail = 0;
9006 if (ctxt->input->buf == NULL)
9007 avail = ctxt->input->length -
9008 (ctxt->input->cur - ctxt->input->base);
9009 else
9010 avail = ctxt->input->buf->buffer->use -
9011 (ctxt->input->cur - ctxt->input->base);
9012
Owen Taylor3473f882001-02-23 17:55:21 +00009013 if ((ctxt->instate != XML_PARSER_EOF) &&
9014 (ctxt->instate != XML_PARSER_EPILOG)) {
9015 ctxt->errNo = XML_ERR_DOCUMENT_END;
9016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9017 ctxt->sax->error(ctxt->userData,
9018 "Extra content at the end of the document\n");
9019 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009020 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009021 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009022 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9023 ctxt->errNo = XML_ERR_DOCUMENT_END;
9024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9025 ctxt->sax->error(ctxt->userData,
9026 "Extra content at the end of the document\n");
9027 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009028 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009029
9030 }
Owen Taylor3473f882001-02-23 17:55:21 +00009031 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009032 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009033 ctxt->sax->endDocument(ctxt->userData);
9034 }
9035 ctxt->instate = XML_PARSER_EOF;
9036 }
9037 return((xmlParserErrors) ctxt->errNo);
9038}
9039
9040/************************************************************************
9041 * *
9042 * I/O front end functions to the parser *
9043 * *
9044 ************************************************************************/
9045
9046/**
9047 * xmlStopParser:
9048 * @ctxt: an XML parser context
9049 *
9050 * Blocks further parser processing
9051 */
9052void
9053xmlStopParser(xmlParserCtxtPtr ctxt) {
9054 ctxt->instate = XML_PARSER_EOF;
9055 if (ctxt->input != NULL)
9056 ctxt->input->cur = BAD_CAST"";
9057}
9058
9059/**
9060 * xmlCreatePushParserCtxt:
9061 * @sax: a SAX handler
9062 * @user_data: The user data returned on SAX callbacks
9063 * @chunk: a pointer to an array of chars
9064 * @size: number of chars in the array
9065 * @filename: an optional file name or URI
9066 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009067 * Create a parser context for using the XML parser in push mode.
9068 * If @buffer and @size are non-NULL, the data is used to detect
9069 * the encoding. The remaining characters will be parsed so they
9070 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009071 * To allow content encoding detection, @size should be >= 4
9072 * The value of @filename is used for fetching external entities
9073 * and error/warning reports.
9074 *
9075 * Returns the new parser context or NULL
9076 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009077
Owen Taylor3473f882001-02-23 17:55:21 +00009078xmlParserCtxtPtr
9079xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9080 const char *chunk, int size, const char *filename) {
9081 xmlParserCtxtPtr ctxt;
9082 xmlParserInputPtr inputStream;
9083 xmlParserInputBufferPtr buf;
9084 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9085
9086 /*
9087 * plug some encoding conversion routines
9088 */
9089 if ((chunk != NULL) && (size >= 4))
9090 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9091
9092 buf = xmlAllocParserInputBuffer(enc);
9093 if (buf == NULL) return(NULL);
9094
9095 ctxt = xmlNewParserCtxt();
9096 if (ctxt == NULL) {
9097 xmlFree(buf);
9098 return(NULL);
9099 }
9100 if (sax != NULL) {
9101 if (ctxt->sax != &xmlDefaultSAXHandler)
9102 xmlFree(ctxt->sax);
9103 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9104 if (ctxt->sax == NULL) {
9105 xmlFree(buf);
9106 xmlFree(ctxt);
9107 return(NULL);
9108 }
9109 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9110 if (user_data != NULL)
9111 ctxt->userData = user_data;
9112 }
9113 if (filename == NULL) {
9114 ctxt->directory = NULL;
9115 } else {
9116 ctxt->directory = xmlParserGetDirectory(filename);
9117 }
9118
9119 inputStream = xmlNewInputStream(ctxt);
9120 if (inputStream == NULL) {
9121 xmlFreeParserCtxt(ctxt);
9122 return(NULL);
9123 }
9124
9125 if (filename == NULL)
9126 inputStream->filename = NULL;
9127 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009128 inputStream->filename = (char *)
9129 xmlNormalizeWindowsPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009130 inputStream->buf = buf;
9131 inputStream->base = inputStream->buf->buffer->content;
9132 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009133 inputStream->end =
9134 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009135
9136 inputPush(ctxt, inputStream);
9137
9138 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9139 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009140 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9141 int cur = ctxt->input->cur - ctxt->input->base;
9142
Owen Taylor3473f882001-02-23 17:55:21 +00009143 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009144
9145 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9146 ctxt->input->cur = ctxt->input->base + cur;
9147 ctxt->input->end =
9148 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009149#ifdef DEBUG_PUSH
9150 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9151#endif
9152 }
9153
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009154 if (enc != XML_CHAR_ENCODING_NONE) {
9155 xmlSwitchEncoding(ctxt, enc);
9156 }
9157
Owen Taylor3473f882001-02-23 17:55:21 +00009158 return(ctxt);
9159}
9160
9161/**
9162 * xmlCreateIOParserCtxt:
9163 * @sax: a SAX handler
9164 * @user_data: The user data returned on SAX callbacks
9165 * @ioread: an I/O read function
9166 * @ioclose: an I/O close function
9167 * @ioctx: an I/O handler
9168 * @enc: the charset encoding if known
9169 *
9170 * Create a parser context for using the XML parser with an existing
9171 * I/O stream
9172 *
9173 * Returns the new parser context or NULL
9174 */
9175xmlParserCtxtPtr
9176xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9177 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9178 void *ioctx, xmlCharEncoding enc) {
9179 xmlParserCtxtPtr ctxt;
9180 xmlParserInputPtr inputStream;
9181 xmlParserInputBufferPtr buf;
9182
9183 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9184 if (buf == NULL) return(NULL);
9185
9186 ctxt = xmlNewParserCtxt();
9187 if (ctxt == NULL) {
9188 xmlFree(buf);
9189 return(NULL);
9190 }
9191 if (sax != NULL) {
9192 if (ctxt->sax != &xmlDefaultSAXHandler)
9193 xmlFree(ctxt->sax);
9194 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9195 if (ctxt->sax == NULL) {
9196 xmlFree(buf);
9197 xmlFree(ctxt);
9198 return(NULL);
9199 }
9200 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9201 if (user_data != NULL)
9202 ctxt->userData = user_data;
9203 }
9204
9205 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9206 if (inputStream == NULL) {
9207 xmlFreeParserCtxt(ctxt);
9208 return(NULL);
9209 }
9210 inputPush(ctxt, inputStream);
9211
9212 return(ctxt);
9213}
9214
9215/************************************************************************
9216 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009217 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009218 * *
9219 ************************************************************************/
9220
9221/**
9222 * xmlIOParseDTD:
9223 * @sax: the SAX handler block or NULL
9224 * @input: an Input Buffer
9225 * @enc: the charset encoding if known
9226 *
9227 * Load and parse a DTD
9228 *
9229 * Returns the resulting xmlDtdPtr or NULL in case of error.
9230 * @input will be freed at parsing end.
9231 */
9232
9233xmlDtdPtr
9234xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9235 xmlCharEncoding enc) {
9236 xmlDtdPtr ret = NULL;
9237 xmlParserCtxtPtr ctxt;
9238 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009239 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009240
9241 if (input == NULL)
9242 return(NULL);
9243
9244 ctxt = xmlNewParserCtxt();
9245 if (ctxt == NULL) {
9246 return(NULL);
9247 }
9248
9249 /*
9250 * Set-up the SAX context
9251 */
9252 if (sax != NULL) {
9253 if (ctxt->sax != NULL)
9254 xmlFree(ctxt->sax);
9255 ctxt->sax = sax;
9256 ctxt->userData = NULL;
9257 }
9258
9259 /*
9260 * generate a parser input from the I/O handler
9261 */
9262
9263 pinput = xmlNewIOInputStream(ctxt, input, enc);
9264 if (pinput == NULL) {
9265 if (sax != NULL) ctxt->sax = NULL;
9266 xmlFreeParserCtxt(ctxt);
9267 return(NULL);
9268 }
9269
9270 /*
9271 * plug some encoding conversion routines here.
9272 */
9273 xmlPushInput(ctxt, pinput);
9274
9275 pinput->filename = NULL;
9276 pinput->line = 1;
9277 pinput->col = 1;
9278 pinput->base = ctxt->input->cur;
9279 pinput->cur = ctxt->input->cur;
9280 pinput->free = NULL;
9281
9282 /*
9283 * let's parse that entity knowing it's an external subset.
9284 */
9285 ctxt->inSubset = 2;
9286 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9287 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9288 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009289
9290 if (enc == XML_CHAR_ENCODING_NONE) {
9291 /*
9292 * Get the 4 first bytes and decode the charset
9293 * if enc != XML_CHAR_ENCODING_NONE
9294 * plug some encoding conversion routines.
9295 */
9296 start[0] = RAW;
9297 start[1] = NXT(1);
9298 start[2] = NXT(2);
9299 start[3] = NXT(3);
9300 enc = xmlDetectCharEncoding(start, 4);
9301 if (enc != XML_CHAR_ENCODING_NONE) {
9302 xmlSwitchEncoding(ctxt, enc);
9303 }
9304 }
9305
Owen Taylor3473f882001-02-23 17:55:21 +00009306 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9307
9308 if (ctxt->myDoc != NULL) {
9309 if (ctxt->wellFormed) {
9310 ret = ctxt->myDoc->extSubset;
9311 ctxt->myDoc->extSubset = NULL;
9312 } else {
9313 ret = NULL;
9314 }
9315 xmlFreeDoc(ctxt->myDoc);
9316 ctxt->myDoc = NULL;
9317 }
9318 if (sax != NULL) ctxt->sax = NULL;
9319 xmlFreeParserCtxt(ctxt);
9320
9321 return(ret);
9322}
9323
9324/**
9325 * xmlSAXParseDTD:
9326 * @sax: the SAX handler block
9327 * @ExternalID: a NAME* containing the External ID of the DTD
9328 * @SystemID: a NAME* containing the URL to the DTD
9329 *
9330 * Load and parse an external subset.
9331 *
9332 * Returns the resulting xmlDtdPtr or NULL in case of error.
9333 */
9334
9335xmlDtdPtr
9336xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9337 const xmlChar *SystemID) {
9338 xmlDtdPtr ret = NULL;
9339 xmlParserCtxtPtr ctxt;
9340 xmlParserInputPtr input = NULL;
9341 xmlCharEncoding enc;
9342
9343 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9344
9345 ctxt = xmlNewParserCtxt();
9346 if (ctxt == NULL) {
9347 return(NULL);
9348 }
9349
9350 /*
9351 * Set-up the SAX context
9352 */
9353 if (sax != NULL) {
9354 if (ctxt->sax != NULL)
9355 xmlFree(ctxt->sax);
9356 ctxt->sax = sax;
9357 ctxt->userData = NULL;
9358 }
9359
9360 /*
9361 * Ask the Entity resolver to load the damn thing
9362 */
9363
9364 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9365 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9366 if (input == NULL) {
9367 if (sax != NULL) ctxt->sax = NULL;
9368 xmlFreeParserCtxt(ctxt);
9369 return(NULL);
9370 }
9371
9372 /*
9373 * plug some encoding conversion routines here.
9374 */
9375 xmlPushInput(ctxt, input);
9376 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9377 xmlSwitchEncoding(ctxt, enc);
9378
9379 if (input->filename == NULL)
9380 input->filename = (char *) xmlStrdup(SystemID);
9381 input->line = 1;
9382 input->col = 1;
9383 input->base = ctxt->input->cur;
9384 input->cur = ctxt->input->cur;
9385 input->free = NULL;
9386
9387 /*
9388 * let's parse that entity knowing it's an external subset.
9389 */
9390 ctxt->inSubset = 2;
9391 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9392 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9393 ExternalID, SystemID);
9394 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9395
9396 if (ctxt->myDoc != NULL) {
9397 if (ctxt->wellFormed) {
9398 ret = ctxt->myDoc->extSubset;
9399 ctxt->myDoc->extSubset = NULL;
9400 } else {
9401 ret = NULL;
9402 }
9403 xmlFreeDoc(ctxt->myDoc);
9404 ctxt->myDoc = NULL;
9405 }
9406 if (sax != NULL) ctxt->sax = NULL;
9407 xmlFreeParserCtxt(ctxt);
9408
9409 return(ret);
9410}
9411
9412/**
9413 * xmlParseDTD:
9414 * @ExternalID: a NAME* containing the External ID of the DTD
9415 * @SystemID: a NAME* containing the URL to the DTD
9416 *
9417 * Load and parse an external subset.
9418 *
9419 * Returns the resulting xmlDtdPtr or NULL in case of error.
9420 */
9421
9422xmlDtdPtr
9423xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9424 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9425}
9426
9427/************************************************************************
9428 * *
9429 * Front ends when parsing an Entity *
9430 * *
9431 ************************************************************************/
9432
9433/**
Owen Taylor3473f882001-02-23 17:55:21 +00009434 * xmlParseCtxtExternalEntity:
9435 * @ctx: the existing parsing context
9436 * @URL: the URL for the entity to load
9437 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009438 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009439 *
9440 * Parse an external general entity within an existing parsing context
9441 * An external general parsed entity is well-formed if it matches the
9442 * production labeled extParsedEnt.
9443 *
9444 * [78] extParsedEnt ::= TextDecl? content
9445 *
9446 * Returns 0 if the entity is well formed, -1 in case of args problem and
9447 * the parser error code otherwise
9448 */
9449
9450int
9451xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009452 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009453 xmlParserCtxtPtr ctxt;
9454 xmlDocPtr newDoc;
9455 xmlSAXHandlerPtr oldsax = NULL;
9456 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009457 xmlChar start[4];
9458 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009459
9460 if (ctx->depth > 40) {
9461 return(XML_ERR_ENTITY_LOOP);
9462 }
9463
Daniel Veillardcda96922001-08-21 10:56:31 +00009464 if (lst != NULL)
9465 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009466 if ((URL == NULL) && (ID == NULL))
9467 return(-1);
9468 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9469 return(-1);
9470
9471
9472 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9473 if (ctxt == NULL) return(-1);
9474 ctxt->userData = ctxt;
9475 oldsax = ctxt->sax;
9476 ctxt->sax = ctx->sax;
9477 newDoc = xmlNewDoc(BAD_CAST "1.0");
9478 if (newDoc == NULL) {
9479 xmlFreeParserCtxt(ctxt);
9480 return(-1);
9481 }
9482 if (ctx->myDoc != NULL) {
9483 newDoc->intSubset = ctx->myDoc->intSubset;
9484 newDoc->extSubset = ctx->myDoc->extSubset;
9485 }
9486 if (ctx->myDoc->URL != NULL) {
9487 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9488 }
9489 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9490 if (newDoc->children == NULL) {
9491 ctxt->sax = oldsax;
9492 xmlFreeParserCtxt(ctxt);
9493 newDoc->intSubset = NULL;
9494 newDoc->extSubset = NULL;
9495 xmlFreeDoc(newDoc);
9496 return(-1);
9497 }
9498 nodePush(ctxt, newDoc->children);
9499 if (ctx->myDoc == NULL) {
9500 ctxt->myDoc = newDoc;
9501 } else {
9502 ctxt->myDoc = ctx->myDoc;
9503 newDoc->children->doc = ctx->myDoc;
9504 }
9505
Daniel Veillard87a764e2001-06-20 17:41:10 +00009506 /*
9507 * Get the 4 first bytes and decode the charset
9508 * if enc != XML_CHAR_ENCODING_NONE
9509 * plug some encoding conversion routines.
9510 */
9511 GROW
9512 start[0] = RAW;
9513 start[1] = NXT(1);
9514 start[2] = NXT(2);
9515 start[3] = NXT(3);
9516 enc = xmlDetectCharEncoding(start, 4);
9517 if (enc != XML_CHAR_ENCODING_NONE) {
9518 xmlSwitchEncoding(ctxt, enc);
9519 }
9520
Owen Taylor3473f882001-02-23 17:55:21 +00009521 /*
9522 * Parse a possible text declaration first
9523 */
Owen Taylor3473f882001-02-23 17:55:21 +00009524 if ((RAW == '<') && (NXT(1) == '?') &&
9525 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9526 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9527 xmlParseTextDecl(ctxt);
9528 }
9529
9530 /*
9531 * Doing validity checking on chunk doesn't make sense
9532 */
9533 ctxt->instate = XML_PARSER_CONTENT;
9534 ctxt->validate = ctx->validate;
9535 ctxt->loadsubset = ctx->loadsubset;
9536 ctxt->depth = ctx->depth + 1;
9537 ctxt->replaceEntities = ctx->replaceEntities;
9538 if (ctxt->validate) {
9539 ctxt->vctxt.error = ctx->vctxt.error;
9540 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009541 } else {
9542 ctxt->vctxt.error = NULL;
9543 ctxt->vctxt.warning = NULL;
9544 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009545 ctxt->vctxt.nodeTab = NULL;
9546 ctxt->vctxt.nodeNr = 0;
9547 ctxt->vctxt.nodeMax = 0;
9548 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009549
9550 xmlParseContent(ctxt);
9551
9552 if ((RAW == '<') && (NXT(1) == '/')) {
9553 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9555 ctxt->sax->error(ctxt->userData,
9556 "chunk is not well balanced\n");
9557 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009558 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009559 } else if (RAW != 0) {
9560 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9562 ctxt->sax->error(ctxt->userData,
9563 "extra content at the end of well balanced chunk\n");
9564 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009565 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009566 }
9567 if (ctxt->node != newDoc->children) {
9568 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9570 ctxt->sax->error(ctxt->userData,
9571 "chunk is not well balanced\n");
9572 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009573 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009574 }
9575
9576 if (!ctxt->wellFormed) {
9577 if (ctxt->errNo == 0)
9578 ret = 1;
9579 else
9580 ret = ctxt->errNo;
9581 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009582 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009583 xmlNodePtr cur;
9584
9585 /*
9586 * Return the newly created nodeset after unlinking it from
9587 * they pseudo parent.
9588 */
9589 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009590 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009591 while (cur != NULL) {
9592 cur->parent = NULL;
9593 cur = cur->next;
9594 }
9595 newDoc->children->children = NULL;
9596 }
9597 ret = 0;
9598 }
9599 ctxt->sax = oldsax;
9600 xmlFreeParserCtxt(ctxt);
9601 newDoc->intSubset = NULL;
9602 newDoc->extSubset = NULL;
9603 xmlFreeDoc(newDoc);
9604
9605 return(ret);
9606}
9607
9608/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009609 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009610 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009611 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009612 * @sax: the SAX handler bloc (possibly NULL)
9613 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9614 * @depth: Used for loop detection, use 0
9615 * @URL: the URL for the entity to load
9616 * @ID: the System ID for the entity to load
9617 * @list: the return value for the set of parsed nodes
9618 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009619 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009620 *
9621 * Returns 0 if the entity is well formed, -1 in case of args problem and
9622 * the parser error code otherwise
9623 */
9624
Daniel Veillard257d9102001-05-08 10:41:44 +00009625static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009626xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9627 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009628 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009629 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009630 xmlParserCtxtPtr ctxt;
9631 xmlDocPtr newDoc;
9632 xmlSAXHandlerPtr oldsax = NULL;
9633 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009634 xmlChar start[4];
9635 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009636
9637 if (depth > 40) {
9638 return(XML_ERR_ENTITY_LOOP);
9639 }
9640
9641
9642
9643 if (list != NULL)
9644 *list = NULL;
9645 if ((URL == NULL) && (ID == NULL))
9646 return(-1);
9647 if (doc == NULL) /* @@ relax but check for dereferences */
9648 return(-1);
9649
9650
9651 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9652 if (ctxt == NULL) return(-1);
9653 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009654 if (oldctxt != NULL) {
9655 ctxt->_private = oldctxt->_private;
9656 ctxt->loadsubset = oldctxt->loadsubset;
9657 ctxt->validate = oldctxt->validate;
9658 ctxt->external = oldctxt->external;
9659 } else {
9660 /*
9661 * Doing validity checking on chunk without context
9662 * doesn't make sense
9663 */
9664 ctxt->_private = NULL;
9665 ctxt->validate = 0;
9666 ctxt->external = 2;
9667 ctxt->loadsubset = 0;
9668 }
Owen Taylor3473f882001-02-23 17:55:21 +00009669 if (sax != NULL) {
9670 oldsax = ctxt->sax;
9671 ctxt->sax = sax;
9672 if (user_data != NULL)
9673 ctxt->userData = user_data;
9674 }
9675 newDoc = xmlNewDoc(BAD_CAST "1.0");
9676 if (newDoc == NULL) {
9677 xmlFreeParserCtxt(ctxt);
9678 return(-1);
9679 }
9680 if (doc != NULL) {
9681 newDoc->intSubset = doc->intSubset;
9682 newDoc->extSubset = doc->extSubset;
9683 }
9684 if (doc->URL != NULL) {
9685 newDoc->URL = xmlStrdup(doc->URL);
9686 }
9687 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9688 if (newDoc->children == NULL) {
9689 if (sax != NULL)
9690 ctxt->sax = oldsax;
9691 xmlFreeParserCtxt(ctxt);
9692 newDoc->intSubset = NULL;
9693 newDoc->extSubset = NULL;
9694 xmlFreeDoc(newDoc);
9695 return(-1);
9696 }
9697 nodePush(ctxt, newDoc->children);
9698 if (doc == NULL) {
9699 ctxt->myDoc = newDoc;
9700 } else {
9701 ctxt->myDoc = doc;
9702 newDoc->children->doc = doc;
9703 }
9704
Daniel Veillard87a764e2001-06-20 17:41:10 +00009705 /*
9706 * Get the 4 first bytes and decode the charset
9707 * if enc != XML_CHAR_ENCODING_NONE
9708 * plug some encoding conversion routines.
9709 */
9710 GROW;
9711 start[0] = RAW;
9712 start[1] = NXT(1);
9713 start[2] = NXT(2);
9714 start[3] = NXT(3);
9715 enc = xmlDetectCharEncoding(start, 4);
9716 if (enc != XML_CHAR_ENCODING_NONE) {
9717 xmlSwitchEncoding(ctxt, enc);
9718 }
9719
Owen Taylor3473f882001-02-23 17:55:21 +00009720 /*
9721 * Parse a possible text declaration first
9722 */
Owen Taylor3473f882001-02-23 17:55:21 +00009723 if ((RAW == '<') && (NXT(1) == '?') &&
9724 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9725 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9726 xmlParseTextDecl(ctxt);
9727 }
9728
Owen Taylor3473f882001-02-23 17:55:21 +00009729 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009730 ctxt->depth = depth;
9731
9732 xmlParseContent(ctxt);
9733
Daniel Veillard561b7f82002-03-20 21:55:57 +00009734 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009735 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9736 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9737 ctxt->sax->error(ctxt->userData,
9738 "chunk is not well balanced\n");
9739 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009740 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009741 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009742 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9744 ctxt->sax->error(ctxt->userData,
9745 "extra content at the end of well balanced chunk\n");
9746 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009747 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009748 }
9749 if (ctxt->node != newDoc->children) {
9750 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9751 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9752 ctxt->sax->error(ctxt->userData,
9753 "chunk is not well balanced\n");
9754 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009755 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009756 }
9757
9758 if (!ctxt->wellFormed) {
9759 if (ctxt->errNo == 0)
9760 ret = 1;
9761 else
9762 ret = ctxt->errNo;
9763 } else {
9764 if (list != NULL) {
9765 xmlNodePtr cur;
9766
9767 /*
9768 * Return the newly created nodeset after unlinking it from
9769 * they pseudo parent.
9770 */
9771 cur = newDoc->children->children;
9772 *list = cur;
9773 while (cur != NULL) {
9774 cur->parent = NULL;
9775 cur = cur->next;
9776 }
9777 newDoc->children->children = NULL;
9778 }
9779 ret = 0;
9780 }
9781 if (sax != NULL)
9782 ctxt->sax = oldsax;
9783 xmlFreeParserCtxt(ctxt);
9784 newDoc->intSubset = NULL;
9785 newDoc->extSubset = NULL;
9786 xmlFreeDoc(newDoc);
9787
9788 return(ret);
9789}
9790
9791/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009792 * xmlParseExternalEntity:
9793 * @doc: the document the chunk pertains to
9794 * @sax: the SAX handler bloc (possibly NULL)
9795 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9796 * @depth: Used for loop detection, use 0
9797 * @URL: the URL for the entity to load
9798 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009799 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009800 *
9801 * Parse an external general entity
9802 * An external general parsed entity is well-formed if it matches the
9803 * production labeled extParsedEnt.
9804 *
9805 * [78] extParsedEnt ::= TextDecl? content
9806 *
9807 * Returns 0 if the entity is well formed, -1 in case of args problem and
9808 * the parser error code otherwise
9809 */
9810
9811int
9812xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009813 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009814 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009815 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009816}
9817
9818/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009819 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009820 * @doc: the document the chunk pertains to
9821 * @sax: the SAX handler bloc (possibly NULL)
9822 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9823 * @depth: Used for loop detection, use 0
9824 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009825 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009826 *
9827 * Parse a well-balanced chunk of an XML document
9828 * called by the parser
9829 * The allowed sequence for the Well Balanced Chunk is the one defined by
9830 * the content production in the XML grammar:
9831 *
9832 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9833 *
9834 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9835 * the parser error code otherwise
9836 */
9837
9838int
9839xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009840 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009841 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9842 depth, string, lst, 0 );
9843}
9844
9845/**
Daniel Veillard328f48c2002-11-15 15:24:34 +00009846 * xmlParseBalancedChunkMemoryInternal:
9847 * @oldctxt: the existing parsing context
9848 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9849 * @user_data: the user data field for the parser context
9850 * @lst: the return value for the set of parsed nodes
9851 *
9852 *
9853 * Parse a well-balanced chunk of an XML document
9854 * called by the parser
9855 * The allowed sequence for the Well Balanced Chunk is the one defined by
9856 * the content production in the XML grammar:
9857 *
9858 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9859 *
9860 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9861 * the parser error code otherwise
9862 *
9863 * In case recover is set to 1, the nodelist will not be empty even if
9864 * the parsed chunk is not well balanced.
9865 */
9866static int
9867xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9868 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9869 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009870 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009871 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009872 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009873 int size;
9874 int ret = 0;
9875
9876 if (oldctxt->depth > 40) {
9877 return(XML_ERR_ENTITY_LOOP);
9878 }
9879
9880
9881 if (lst != NULL)
9882 *lst = NULL;
9883 if (string == NULL)
9884 return(-1);
9885
9886 size = xmlStrlen(string);
9887
9888 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9889 if (ctxt == NULL) return(-1);
9890 if (user_data != NULL)
9891 ctxt->userData = user_data;
9892 else
9893 ctxt->userData = ctxt;
9894
9895 oldsax = ctxt->sax;
9896 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +00009897 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009898 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +00009899 newDoc = xmlNewDoc(BAD_CAST "1.0");
9900 if (newDoc == NULL) {
9901 ctxt->sax = oldsax;
9902 xmlFreeParserCtxt(ctxt);
9903 return(-1);
9904 }
Daniel Veillard328f48c2002-11-15 15:24:34 +00009905 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009906 } else {
9907 ctxt->myDoc = oldctxt->myDoc;
9908 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009909 }
Daniel Veillard9bc53102002-11-25 13:20:04 +00009910 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +00009911 BAD_CAST "pseudoroot", NULL);
9912 if (ctxt->myDoc->children == NULL) {
9913 ctxt->sax = oldsax;
9914 xmlFreeParserCtxt(ctxt);
9915 if (newDoc != NULL)
9916 xmlFreeDoc(newDoc);
9917 return(-1);
9918 }
9919 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009920 ctxt->instate = XML_PARSER_CONTENT;
9921 ctxt->depth = oldctxt->depth + 1;
9922
9923 /*
9924 * Doing validity checking on chunk doesn't make sense
9925 */
9926 ctxt->validate = 0;
9927 ctxt->loadsubset = oldctxt->loadsubset;
9928
Daniel Veillard68e9e742002-11-16 15:35:11 +00009929 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009930 if ((RAW == '<') && (NXT(1) == '/')) {
9931 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9933 ctxt->sax->error(ctxt->userData,
9934 "chunk is not well balanced\n");
9935 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009936 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009937 } else if (RAW != 0) {
9938 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9940 ctxt->sax->error(ctxt->userData,
9941 "extra content at the end of well balanced chunk\n");
9942 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009943 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009944 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009945 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +00009946 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9948 ctxt->sax->error(ctxt->userData,
9949 "chunk is not well balanced\n");
9950 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009951 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009952 }
9953
9954 if (!ctxt->wellFormed) {
9955 if (ctxt->errNo == 0)
9956 ret = 1;
9957 else
9958 ret = ctxt->errNo;
9959 } else {
9960 ret = 0;
9961 }
9962
9963 if ((lst != NULL) && (ret == 0)) {
9964 xmlNodePtr cur;
9965
9966 /*
9967 * Return the newly created nodeset after unlinking it from
9968 * they pseudo parent.
9969 */
Daniel Veillard68e9e742002-11-16 15:35:11 +00009970 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009971 *lst = cur;
9972 while (cur != NULL) {
9973 cur->parent = NULL;
9974 cur = cur->next;
9975 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009976 ctxt->myDoc->children->children = NULL;
9977 }
9978 if (ctxt->myDoc != NULL) {
9979 xmlFreeNode(ctxt->myDoc->children);
9980 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009981 }
9982
9983 ctxt->sax = oldsax;
9984 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +00009985 if (newDoc != NULL)
9986 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009987
9988 return(ret);
9989}
9990
9991/**
Daniel Veillard58e44c92002-08-02 22:19:49 +00009992 * xmlParseBalancedChunkMemoryRecover:
9993 * @doc: the document the chunk pertains to
9994 * @sax: the SAX handler bloc (possibly NULL)
9995 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9996 * @depth: Used for loop detection, use 0
9997 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9998 * @lst: the return value for the set of parsed nodes
9999 * @recover: return nodes even if the data is broken (use 0)
10000 *
10001 *
10002 * Parse a well-balanced chunk of an XML document
10003 * called by the parser
10004 * The allowed sequence for the Well Balanced Chunk is the one defined by
10005 * the content production in the XML grammar:
10006 *
10007 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10008 *
10009 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10010 * the parser error code otherwise
10011 *
10012 * In case recover is set to 1, the nodelist will not be empty even if
10013 * the parsed chunk is not well balanced.
10014 */
10015int
10016xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10017 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10018 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010019 xmlParserCtxtPtr ctxt;
10020 xmlDocPtr newDoc;
10021 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010022 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010023 int size;
10024 int ret = 0;
10025
10026 if (depth > 40) {
10027 return(XML_ERR_ENTITY_LOOP);
10028 }
10029
10030
Daniel Veillardcda96922001-08-21 10:56:31 +000010031 if (lst != NULL)
10032 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010033 if (string == NULL)
10034 return(-1);
10035
10036 size = xmlStrlen(string);
10037
10038 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10039 if (ctxt == NULL) return(-1);
10040 ctxt->userData = ctxt;
10041 if (sax != NULL) {
10042 oldsax = ctxt->sax;
10043 ctxt->sax = sax;
10044 if (user_data != NULL)
10045 ctxt->userData = user_data;
10046 }
10047 newDoc = xmlNewDoc(BAD_CAST "1.0");
10048 if (newDoc == NULL) {
10049 xmlFreeParserCtxt(ctxt);
10050 return(-1);
10051 }
10052 if (doc != NULL) {
10053 newDoc->intSubset = doc->intSubset;
10054 newDoc->extSubset = doc->extSubset;
10055 }
10056 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10057 if (newDoc->children == NULL) {
10058 if (sax != NULL)
10059 ctxt->sax = oldsax;
10060 xmlFreeParserCtxt(ctxt);
10061 newDoc->intSubset = NULL;
10062 newDoc->extSubset = NULL;
10063 xmlFreeDoc(newDoc);
10064 return(-1);
10065 }
10066 nodePush(ctxt, newDoc->children);
10067 if (doc == NULL) {
10068 ctxt->myDoc = newDoc;
10069 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010070 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010071 newDoc->children->doc = doc;
10072 }
10073 ctxt->instate = XML_PARSER_CONTENT;
10074 ctxt->depth = depth;
10075
10076 /*
10077 * Doing validity checking on chunk doesn't make sense
10078 */
10079 ctxt->validate = 0;
10080 ctxt->loadsubset = 0;
10081
Daniel Veillardb39bc392002-10-26 19:29:51 +000010082 if ( doc != NULL ){
10083 content = doc->children;
10084 doc->children = NULL;
10085 xmlParseContent(ctxt);
10086 doc->children = content;
10087 }
10088 else {
10089 xmlParseContent(ctxt);
10090 }
Owen Taylor3473f882001-02-23 17:55:21 +000010091 if ((RAW == '<') && (NXT(1) == '/')) {
10092 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10094 ctxt->sax->error(ctxt->userData,
10095 "chunk is not well balanced\n");
10096 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010097 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010098 } else if (RAW != 0) {
10099 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10100 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10101 ctxt->sax->error(ctxt->userData,
10102 "extra content at the end of well balanced chunk\n");
10103 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010104 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010105 }
10106 if (ctxt->node != newDoc->children) {
10107 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10108 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10109 ctxt->sax->error(ctxt->userData,
10110 "chunk is not well balanced\n");
10111 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010112 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010113 }
10114
10115 if (!ctxt->wellFormed) {
10116 if (ctxt->errNo == 0)
10117 ret = 1;
10118 else
10119 ret = ctxt->errNo;
10120 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010121 ret = 0;
10122 }
10123
10124 if (lst != NULL && (ret == 0 || recover == 1)) {
10125 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010126
10127 /*
10128 * Return the newly created nodeset after unlinking it from
10129 * they pseudo parent.
10130 */
10131 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010132 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010133 while (cur != NULL) {
10134 cur->parent = NULL;
10135 cur = cur->next;
10136 }
10137 newDoc->children->children = NULL;
10138 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010139
Owen Taylor3473f882001-02-23 17:55:21 +000010140 if (sax != NULL)
10141 ctxt->sax = oldsax;
10142 xmlFreeParserCtxt(ctxt);
10143 newDoc->intSubset = NULL;
10144 newDoc->extSubset = NULL;
10145 xmlFreeDoc(newDoc);
10146
10147 return(ret);
10148}
10149
10150/**
10151 * xmlSAXParseEntity:
10152 * @sax: the SAX handler block
10153 * @filename: the filename
10154 *
10155 * parse an XML external entity out of context and build a tree.
10156 * It use the given SAX function block to handle the parsing callback.
10157 * If sax is NULL, fallback to the default DOM tree building routines.
10158 *
10159 * [78] extParsedEnt ::= TextDecl? content
10160 *
10161 * This correspond to a "Well Balanced" chunk
10162 *
10163 * Returns the resulting document tree
10164 */
10165
10166xmlDocPtr
10167xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10168 xmlDocPtr ret;
10169 xmlParserCtxtPtr ctxt;
10170 char *directory = NULL;
10171
10172 ctxt = xmlCreateFileParserCtxt(filename);
10173 if (ctxt == NULL) {
10174 return(NULL);
10175 }
10176 if (sax != NULL) {
10177 if (ctxt->sax != NULL)
10178 xmlFree(ctxt->sax);
10179 ctxt->sax = sax;
10180 ctxt->userData = NULL;
10181 }
10182
10183 if ((ctxt->directory == NULL) && (directory == NULL))
10184 directory = xmlParserGetDirectory(filename);
10185
10186 xmlParseExtParsedEnt(ctxt);
10187
10188 if (ctxt->wellFormed)
10189 ret = ctxt->myDoc;
10190 else {
10191 ret = NULL;
10192 xmlFreeDoc(ctxt->myDoc);
10193 ctxt->myDoc = NULL;
10194 }
10195 if (sax != NULL)
10196 ctxt->sax = NULL;
10197 xmlFreeParserCtxt(ctxt);
10198
10199 return(ret);
10200}
10201
10202/**
10203 * xmlParseEntity:
10204 * @filename: the filename
10205 *
10206 * parse an XML external entity out of context and build a tree.
10207 *
10208 * [78] extParsedEnt ::= TextDecl? content
10209 *
10210 * This correspond to a "Well Balanced" chunk
10211 *
10212 * Returns the resulting document tree
10213 */
10214
10215xmlDocPtr
10216xmlParseEntity(const char *filename) {
10217 return(xmlSAXParseEntity(NULL, filename));
10218}
10219
10220/**
10221 * xmlCreateEntityParserCtxt:
10222 * @URL: the entity URL
10223 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010224 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010225 *
10226 * Create a parser context for an external entity
10227 * Automatic support for ZLIB/Compress compressed document is provided
10228 * by default if found at compile-time.
10229 *
10230 * Returns the new parser context or NULL
10231 */
10232xmlParserCtxtPtr
10233xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10234 const xmlChar *base) {
10235 xmlParserCtxtPtr ctxt;
10236 xmlParserInputPtr inputStream;
10237 char *directory = NULL;
10238 xmlChar *uri;
10239
10240 ctxt = xmlNewParserCtxt();
10241 if (ctxt == NULL) {
10242 return(NULL);
10243 }
10244
10245 uri = xmlBuildURI(URL, base);
10246
10247 if (uri == NULL) {
10248 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10249 if (inputStream == NULL) {
10250 xmlFreeParserCtxt(ctxt);
10251 return(NULL);
10252 }
10253
10254 inputPush(ctxt, inputStream);
10255
10256 if ((ctxt->directory == NULL) && (directory == NULL))
10257 directory = xmlParserGetDirectory((char *)URL);
10258 if ((ctxt->directory == NULL) && (directory != NULL))
10259 ctxt->directory = directory;
10260 } else {
10261 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10262 if (inputStream == NULL) {
10263 xmlFree(uri);
10264 xmlFreeParserCtxt(ctxt);
10265 return(NULL);
10266 }
10267
10268 inputPush(ctxt, inputStream);
10269
10270 if ((ctxt->directory == NULL) && (directory == NULL))
10271 directory = xmlParserGetDirectory((char *)uri);
10272 if ((ctxt->directory == NULL) && (directory != NULL))
10273 ctxt->directory = directory;
10274 xmlFree(uri);
10275 }
10276
10277 return(ctxt);
10278}
10279
10280/************************************************************************
10281 * *
10282 * Front ends when parsing from a file *
10283 * *
10284 ************************************************************************/
10285
10286/**
10287 * xmlCreateFileParserCtxt:
10288 * @filename: the filename
10289 *
10290 * Create a parser context for a file content.
10291 * Automatic support for ZLIB/Compress compressed document is provided
10292 * by default if found at compile-time.
10293 *
10294 * Returns the new parser context or NULL
10295 */
10296xmlParserCtxtPtr
10297xmlCreateFileParserCtxt(const char *filename)
10298{
10299 xmlParserCtxtPtr ctxt;
10300 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010301 char *directory = NULL;
Daniel Veillardf4862f02002-09-10 11:13:43 +000010302 xmlChar *normalized;
Owen Taylor3473f882001-02-23 17:55:21 +000010303
Owen Taylor3473f882001-02-23 17:55:21 +000010304 ctxt = xmlNewParserCtxt();
10305 if (ctxt == NULL) {
10306 if (xmlDefaultSAXHandler.error != NULL) {
10307 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10308 }
10309 return(NULL);
10310 }
10311
Daniel Veillardf4862f02002-09-10 11:13:43 +000010312 normalized = xmlNormalizeWindowsPath((const xmlChar *) filename);
10313 if (normalized == NULL) {
10314 xmlFreeParserCtxt(ctxt);
10315 return(NULL);
10316 }
10317 inputStream = xmlLoadExternalEntity((char *) normalized, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010318 if (inputStream == NULL) {
10319 xmlFreeParserCtxt(ctxt);
Daniel Veillardf4862f02002-09-10 11:13:43 +000010320 xmlFree(normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010321 return(NULL);
10322 }
10323
Owen Taylor3473f882001-02-23 17:55:21 +000010324 inputPush(ctxt, inputStream);
10325 if ((ctxt->directory == NULL) && (directory == NULL))
Daniel Veillardf4862f02002-09-10 11:13:43 +000010326 directory = xmlParserGetDirectory((char *) normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010327 if ((ctxt->directory == NULL) && (directory != NULL))
10328 ctxt->directory = directory;
10329
Daniel Veillardf4862f02002-09-10 11:13:43 +000010330 xmlFree(normalized);
10331
Owen Taylor3473f882001-02-23 17:55:21 +000010332 return(ctxt);
10333}
10334
10335/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010336 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010337 * @sax: the SAX handler block
10338 * @filename: the filename
10339 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10340 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010341 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010342 *
10343 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10344 * compressed document is provided by default if found at compile-time.
10345 * It use the given SAX function block to handle the parsing callback.
10346 * If sax is NULL, fallback to the default DOM tree building routines.
10347 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010348 * User data (void *) is stored within the parser context in the
10349 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010350 *
Owen Taylor3473f882001-02-23 17:55:21 +000010351 * Returns the resulting document tree
10352 */
10353
10354xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010355xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10356 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010357 xmlDocPtr ret;
10358 xmlParserCtxtPtr ctxt;
10359 char *directory = NULL;
10360
Daniel Veillard635ef722001-10-29 11:48:19 +000010361 xmlInitParser();
10362
Owen Taylor3473f882001-02-23 17:55:21 +000010363 ctxt = xmlCreateFileParserCtxt(filename);
10364 if (ctxt == NULL) {
10365 return(NULL);
10366 }
10367 if (sax != NULL) {
10368 if (ctxt->sax != NULL)
10369 xmlFree(ctxt->sax);
10370 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010371 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010372 if (data!=NULL) {
10373 ctxt->_private=data;
10374 }
Owen Taylor3473f882001-02-23 17:55:21 +000010375
10376 if ((ctxt->directory == NULL) && (directory == NULL))
10377 directory = xmlParserGetDirectory(filename);
10378 if ((ctxt->directory == NULL) && (directory != NULL))
10379 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10380
Daniel Veillarddad3f682002-11-17 16:47:27 +000010381 ctxt->recovery = recovery;
10382
Owen Taylor3473f882001-02-23 17:55:21 +000010383 xmlParseDocument(ctxt);
10384
10385 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10386 else {
10387 ret = NULL;
10388 xmlFreeDoc(ctxt->myDoc);
10389 ctxt->myDoc = NULL;
10390 }
10391 if (sax != NULL)
10392 ctxt->sax = NULL;
10393 xmlFreeParserCtxt(ctxt);
10394
10395 return(ret);
10396}
10397
10398/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010399 * xmlSAXParseFile:
10400 * @sax: the SAX handler block
10401 * @filename: the filename
10402 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10403 * documents
10404 *
10405 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10406 * compressed document is provided by default if found at compile-time.
10407 * It use the given SAX function block to handle the parsing callback.
10408 * If sax is NULL, fallback to the default DOM tree building routines.
10409 *
10410 * Returns the resulting document tree
10411 */
10412
10413xmlDocPtr
10414xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10415 int recovery) {
10416 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10417}
10418
10419/**
Owen Taylor3473f882001-02-23 17:55:21 +000010420 * xmlRecoverDoc:
10421 * @cur: a pointer to an array of xmlChar
10422 *
10423 * parse an XML in-memory document and build a tree.
10424 * In the case the document is not Well Formed, a tree is built anyway
10425 *
10426 * Returns the resulting document tree
10427 */
10428
10429xmlDocPtr
10430xmlRecoverDoc(xmlChar *cur) {
10431 return(xmlSAXParseDoc(NULL, cur, 1));
10432}
10433
10434/**
10435 * xmlParseFile:
10436 * @filename: the filename
10437 *
10438 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10439 * compressed document is provided by default if found at compile-time.
10440 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010441 * Returns the resulting document tree if the file was wellformed,
10442 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010443 */
10444
10445xmlDocPtr
10446xmlParseFile(const char *filename) {
10447 return(xmlSAXParseFile(NULL, filename, 0));
10448}
10449
10450/**
10451 * xmlRecoverFile:
10452 * @filename: the filename
10453 *
10454 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10455 * compressed document is provided by default if found at compile-time.
10456 * In the case the document is not Well Formed, a tree is built anyway
10457 *
10458 * Returns the resulting document tree
10459 */
10460
10461xmlDocPtr
10462xmlRecoverFile(const char *filename) {
10463 return(xmlSAXParseFile(NULL, filename, 1));
10464}
10465
10466
10467/**
10468 * xmlSetupParserForBuffer:
10469 * @ctxt: an XML parser context
10470 * @buffer: a xmlChar * buffer
10471 * @filename: a file name
10472 *
10473 * Setup the parser context to parse a new buffer; Clears any prior
10474 * contents from the parser context. The buffer parameter must not be
10475 * NULL, but the filename parameter can be
10476 */
10477void
10478xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10479 const char* filename)
10480{
10481 xmlParserInputPtr input;
10482
10483 input = xmlNewInputStream(ctxt);
10484 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010485 xmlGenericError(xmlGenericErrorContext,
10486 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010487 xmlFree(ctxt);
10488 return;
10489 }
10490
10491 xmlClearParserCtxt(ctxt);
10492 if (filename != NULL)
10493 input->filename = xmlMemStrdup(filename);
10494 input->base = buffer;
10495 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010496 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010497 inputPush(ctxt, input);
10498}
10499
10500/**
10501 * xmlSAXUserParseFile:
10502 * @sax: a SAX handler
10503 * @user_data: The user data returned on SAX callbacks
10504 * @filename: a file name
10505 *
10506 * parse an XML file and call the given SAX handler routines.
10507 * Automatic support for ZLIB/Compress compressed document is provided
10508 *
10509 * Returns 0 in case of success or a error number otherwise
10510 */
10511int
10512xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10513 const char *filename) {
10514 int ret = 0;
10515 xmlParserCtxtPtr ctxt;
10516
10517 ctxt = xmlCreateFileParserCtxt(filename);
10518 if (ctxt == NULL) return -1;
10519 if (ctxt->sax != &xmlDefaultSAXHandler)
10520 xmlFree(ctxt->sax);
10521 ctxt->sax = sax;
10522 if (user_data != NULL)
10523 ctxt->userData = user_data;
10524
10525 xmlParseDocument(ctxt);
10526
10527 if (ctxt->wellFormed)
10528 ret = 0;
10529 else {
10530 if (ctxt->errNo != 0)
10531 ret = ctxt->errNo;
10532 else
10533 ret = -1;
10534 }
10535 if (sax != NULL)
10536 ctxt->sax = NULL;
10537 xmlFreeParserCtxt(ctxt);
10538
10539 return ret;
10540}
10541
10542/************************************************************************
10543 * *
10544 * Front ends when parsing from memory *
10545 * *
10546 ************************************************************************/
10547
10548/**
10549 * xmlCreateMemoryParserCtxt:
10550 * @buffer: a pointer to a char array
10551 * @size: the size of the array
10552 *
10553 * Create a parser context for an XML in-memory document.
10554 *
10555 * Returns the new parser context or NULL
10556 */
10557xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010558xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010559 xmlParserCtxtPtr ctxt;
10560 xmlParserInputPtr input;
10561 xmlParserInputBufferPtr buf;
10562
10563 if (buffer == NULL)
10564 return(NULL);
10565 if (size <= 0)
10566 return(NULL);
10567
10568 ctxt = xmlNewParserCtxt();
10569 if (ctxt == NULL)
10570 return(NULL);
10571
10572 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010573 if (buf == NULL) {
10574 xmlFreeParserCtxt(ctxt);
10575 return(NULL);
10576 }
Owen Taylor3473f882001-02-23 17:55:21 +000010577
10578 input = xmlNewInputStream(ctxt);
10579 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010580 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010581 xmlFreeParserCtxt(ctxt);
10582 return(NULL);
10583 }
10584
10585 input->filename = NULL;
10586 input->buf = buf;
10587 input->base = input->buf->buffer->content;
10588 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010589 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010590
10591 inputPush(ctxt, input);
10592 return(ctxt);
10593}
10594
10595/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010596 * xmlSAXParseMemoryWithData:
10597 * @sax: the SAX handler block
10598 * @buffer: an pointer to a char array
10599 * @size: the size of the array
10600 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10601 * documents
10602 * @data: the userdata
10603 *
10604 * parse an XML in-memory block and use the given SAX function block
10605 * to handle the parsing callback. If sax is NULL, fallback to the default
10606 * DOM tree building routines.
10607 *
10608 * User data (void *) is stored within the parser context in the
10609 * context's _private member, so it is available nearly everywhere in libxml
10610 *
10611 * Returns the resulting document tree
10612 */
10613
10614xmlDocPtr
10615xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10616 int size, int recovery, void *data) {
10617 xmlDocPtr ret;
10618 xmlParserCtxtPtr ctxt;
10619
10620 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10621 if (ctxt == NULL) return(NULL);
10622 if (sax != NULL) {
10623 if (ctxt->sax != NULL)
10624 xmlFree(ctxt->sax);
10625 ctxt->sax = sax;
10626 }
10627 if (data!=NULL) {
10628 ctxt->_private=data;
10629 }
10630
10631 xmlParseDocument(ctxt);
10632
10633 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10634 else {
10635 ret = NULL;
10636 xmlFreeDoc(ctxt->myDoc);
10637 ctxt->myDoc = NULL;
10638 }
10639 if (sax != NULL)
10640 ctxt->sax = NULL;
10641 xmlFreeParserCtxt(ctxt);
10642
10643 return(ret);
10644}
10645
10646/**
Owen Taylor3473f882001-02-23 17:55:21 +000010647 * xmlSAXParseMemory:
10648 * @sax: the SAX handler block
10649 * @buffer: an pointer to a char array
10650 * @size: the size of the array
10651 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10652 * documents
10653 *
10654 * parse an XML in-memory block and use the given SAX function block
10655 * to handle the parsing callback. If sax is NULL, fallback to the default
10656 * DOM tree building routines.
10657 *
10658 * Returns the resulting document tree
10659 */
10660xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010661xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10662 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010663 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010664}
10665
10666/**
10667 * xmlParseMemory:
10668 * @buffer: an pointer to a char array
10669 * @size: the size of the array
10670 *
10671 * parse an XML in-memory block and build a tree.
10672 *
10673 * Returns the resulting document tree
10674 */
10675
Daniel Veillard50822cb2001-07-26 20:05:51 +000010676xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010677 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10678}
10679
10680/**
10681 * xmlRecoverMemory:
10682 * @buffer: an pointer to a char array
10683 * @size: the size of the array
10684 *
10685 * parse an XML in-memory block and build a tree.
10686 * In the case the document is not Well Formed, a tree is built anyway
10687 *
10688 * Returns the resulting document tree
10689 */
10690
Daniel Veillard50822cb2001-07-26 20:05:51 +000010691xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010692 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10693}
10694
10695/**
10696 * xmlSAXUserParseMemory:
10697 * @sax: a SAX handler
10698 * @user_data: The user data returned on SAX callbacks
10699 * @buffer: an in-memory XML document input
10700 * @size: the length of the XML document in bytes
10701 *
10702 * A better SAX parsing routine.
10703 * parse an XML in-memory buffer and call the given SAX handler routines.
10704 *
10705 * Returns 0 in case of success or a error number otherwise
10706 */
10707int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010708 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010709 int ret = 0;
10710 xmlParserCtxtPtr ctxt;
10711 xmlSAXHandlerPtr oldsax = NULL;
10712
Daniel Veillard9e923512002-08-14 08:48:52 +000010713 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010714 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10715 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010716 oldsax = ctxt->sax;
10717 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010718 if (user_data != NULL)
10719 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010720
10721 xmlParseDocument(ctxt);
10722
10723 if (ctxt->wellFormed)
10724 ret = 0;
10725 else {
10726 if (ctxt->errNo != 0)
10727 ret = ctxt->errNo;
10728 else
10729 ret = -1;
10730 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010731 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010732 xmlFreeParserCtxt(ctxt);
10733
10734 return ret;
10735}
10736
10737/**
10738 * xmlCreateDocParserCtxt:
10739 * @cur: a pointer to an array of xmlChar
10740 *
10741 * Creates a parser context for an XML in-memory document.
10742 *
10743 * Returns the new parser context or NULL
10744 */
10745xmlParserCtxtPtr
10746xmlCreateDocParserCtxt(xmlChar *cur) {
10747 int len;
10748
10749 if (cur == NULL)
10750 return(NULL);
10751 len = xmlStrlen(cur);
10752 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10753}
10754
10755/**
10756 * xmlSAXParseDoc:
10757 * @sax: the SAX handler block
10758 * @cur: a pointer to an array of xmlChar
10759 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10760 * documents
10761 *
10762 * parse an XML in-memory document and build a tree.
10763 * It use the given SAX function block to handle the parsing callback.
10764 * If sax is NULL, fallback to the default DOM tree building routines.
10765 *
10766 * Returns the resulting document tree
10767 */
10768
10769xmlDocPtr
10770xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10771 xmlDocPtr ret;
10772 xmlParserCtxtPtr ctxt;
10773
10774 if (cur == NULL) return(NULL);
10775
10776
10777 ctxt = xmlCreateDocParserCtxt(cur);
10778 if (ctxt == NULL) return(NULL);
10779 if (sax != NULL) {
10780 ctxt->sax = sax;
10781 ctxt->userData = NULL;
10782 }
10783
10784 xmlParseDocument(ctxt);
10785 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10786 else {
10787 ret = NULL;
10788 xmlFreeDoc(ctxt->myDoc);
10789 ctxt->myDoc = NULL;
10790 }
10791 if (sax != NULL)
10792 ctxt->sax = NULL;
10793 xmlFreeParserCtxt(ctxt);
10794
10795 return(ret);
10796}
10797
10798/**
10799 * xmlParseDoc:
10800 * @cur: a pointer to an array of xmlChar
10801 *
10802 * parse an XML in-memory document and build a tree.
10803 *
10804 * Returns the resulting document tree
10805 */
10806
10807xmlDocPtr
10808xmlParseDoc(xmlChar *cur) {
10809 return(xmlSAXParseDoc(NULL, cur, 0));
10810}
10811
Daniel Veillard8107a222002-01-13 14:10:10 +000010812/************************************************************************
10813 * *
10814 * Specific function to keep track of entities references *
10815 * and used by the XSLT debugger *
10816 * *
10817 ************************************************************************/
10818
10819static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10820
10821/**
10822 * xmlAddEntityReference:
10823 * @ent : A valid entity
10824 * @firstNode : A valid first node for children of entity
10825 * @lastNode : A valid last node of children entity
10826 *
10827 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10828 */
10829static void
10830xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10831 xmlNodePtr lastNode)
10832{
10833 if (xmlEntityRefFunc != NULL) {
10834 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10835 }
10836}
10837
10838
10839/**
10840 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000010841 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000010842 *
10843 * Set the function to call call back when a xml reference has been made
10844 */
10845void
10846xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10847{
10848 xmlEntityRefFunc = func;
10849}
Owen Taylor3473f882001-02-23 17:55:21 +000010850
10851/************************************************************************
10852 * *
10853 * Miscellaneous *
10854 * *
10855 ************************************************************************/
10856
10857#ifdef LIBXML_XPATH_ENABLED
10858#include <libxml/xpath.h>
10859#endif
10860
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010861extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010862static int xmlParserInitialized = 0;
10863
10864/**
10865 * xmlInitParser:
10866 *
10867 * Initialization function for the XML parser.
10868 * This is not reentrant. Call once before processing in case of
10869 * use in multithreaded programs.
10870 */
10871
10872void
10873xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010874 if (xmlParserInitialized != 0)
10875 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010876
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010877 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10878 (xmlGenericError == NULL))
10879 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010880 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010881 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010882 xmlInitCharEncodingHandlers();
10883 xmlInitializePredefinedEntities();
10884 xmlDefaultSAXHandlerInit();
10885 xmlRegisterDefaultInputCallbacks();
10886 xmlRegisterDefaultOutputCallbacks();
10887#ifdef LIBXML_HTML_ENABLED
10888 htmlInitAutoClose();
10889 htmlDefaultSAXHandlerInit();
10890#endif
10891#ifdef LIBXML_XPATH_ENABLED
10892 xmlXPathInit();
10893#endif
10894 xmlParserInitialized = 1;
10895}
10896
10897/**
10898 * xmlCleanupParser:
10899 *
10900 * Cleanup function for the XML parser. It tries to reclaim all
10901 * parsing related global memory allocated for the parser processing.
10902 * It doesn't deallocate any document related memory. Calling this
10903 * function should not prevent reusing the parser.
10904 */
10905
10906void
10907xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010908 xmlCleanupCharEncodingHandlers();
10909 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010910#ifdef LIBXML_CATALOG_ENABLED
10911 xmlCatalogCleanup();
10912#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010913 xmlCleanupThreads();
10914 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010915}