blob: cb2be96ad0dcebd7720801f6e7ac55c1cce15f47 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Daniel Veillard328f48c2002-11-15 15:24:34 +0000108static int
109xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
110 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000111/************************************************************************
112 * *
113 * Parser stacks related functions and macros *
114 * *
115 ************************************************************************/
116
117xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
118 const xmlChar ** str);
119
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000120/**
121 * inputPush:
122 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000123 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000124 *
125 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000126 *
127 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000129extern int
130inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
131{
132 if (ctxt->inputNr >= ctxt->inputMax) {
133 ctxt->inputMax *= 2;
134 ctxt->inputTab =
135 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
136 ctxt->inputMax *
137 sizeof(ctxt->inputTab[0]));
138 if (ctxt->inputTab == NULL) {
139 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
140 return (0);
141 }
142 }
143 ctxt->inputTab[ctxt->inputNr] = value;
144 ctxt->input = value;
145 return (ctxt->inputNr++);
146}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000148 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000149 * @ctxt: an XML parser context
150 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000151 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000152 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000153 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000154 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000155extern xmlParserInputPtr
156inputPop(xmlParserCtxtPtr ctxt)
157{
158 xmlParserInputPtr ret;
159
160 if (ctxt->inputNr <= 0)
161 return (0);
162 ctxt->inputNr--;
163 if (ctxt->inputNr > 0)
164 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
165 else
166 ctxt->input = NULL;
167 ret = ctxt->inputTab[ctxt->inputNr];
168 ctxt->inputTab[ctxt->inputNr] = 0;
169 return (ret);
170}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000171/**
172 * nodePush:
173 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000174 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000175 *
176 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000177 *
178 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000180extern int
181nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
182{
183 if (ctxt->nodeNr >= ctxt->nodeMax) {
184 ctxt->nodeMax *= 2;
185 ctxt->nodeTab =
186 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
187 ctxt->nodeMax *
188 sizeof(ctxt->nodeTab[0]));
189 if (ctxt->nodeTab == NULL) {
190 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
191 return (0);
192 }
193 }
194 ctxt->nodeTab[ctxt->nodeNr] = value;
195 ctxt->node = value;
196 return (ctxt->nodeNr++);
197}
198/**
199 * nodePop:
200 * @ctxt: an XML parser context
201 *
202 * Pops the top element node from the node stack
203 *
204 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000205 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000206extern xmlNodePtr
207nodePop(xmlParserCtxtPtr ctxt)
208{
209 xmlNodePtr ret;
210
211 if (ctxt->nodeNr <= 0)
212 return (0);
213 ctxt->nodeNr--;
214 if (ctxt->nodeNr > 0)
215 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
216 else
217 ctxt->node = NULL;
218 ret = ctxt->nodeTab[ctxt->nodeNr];
219 ctxt->nodeTab[ctxt->nodeNr] = 0;
220 return (ret);
221}
222/**
223 * namePush:
224 * @ctxt: an XML parser context
225 * @value: the element name
226 *
227 * Pushes a new element name on top of the name stack
228 *
229 * Returns 0 in case of error, the index in the stack otherwise
230 */
231extern int
232namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
233{
234 if (ctxt->nameNr >= ctxt->nameMax) {
235 ctxt->nameMax *= 2;
236 ctxt->nameTab =
237 (xmlChar * *)xmlRealloc(ctxt->nameTab,
238 ctxt->nameMax *
239 sizeof(ctxt->nameTab[0]));
240 if (ctxt->nameTab == NULL) {
241 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
242 return (0);
243 }
244 }
245 ctxt->nameTab[ctxt->nameNr] = value;
246 ctxt->name = value;
247 return (ctxt->nameNr++);
248}
249/**
250 * namePop:
251 * @ctxt: an XML parser context
252 *
253 * Pops the top element name from the name stack
254 *
255 * Returns the name just removed
256 */
257extern xmlChar *
258namePop(xmlParserCtxtPtr ctxt)
259{
260 xmlChar *ret;
261
262 if (ctxt->nameNr <= 0)
263 return (0);
264 ctxt->nameNr--;
265 if (ctxt->nameNr > 0)
266 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
267 else
268 ctxt->name = NULL;
269 ret = ctxt->nameTab[ctxt->nameNr];
270 ctxt->nameTab[ctxt->nameNr] = 0;
271 return (ret);
272}
Owen Taylor3473f882001-02-23 17:55:21 +0000273
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000274static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000275 if (ctxt->spaceNr >= ctxt->spaceMax) {
276 ctxt->spaceMax *= 2;
277 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
278 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
279 if (ctxt->spaceTab == NULL) {
280 xmlGenericError(xmlGenericErrorContext,
281 "realloc failed !\n");
282 return(0);
283 }
284 }
285 ctxt->spaceTab[ctxt->spaceNr] = val;
286 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
287 return(ctxt->spaceNr++);
288}
289
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000290static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000291 int ret;
292 if (ctxt->spaceNr <= 0) return(0);
293 ctxt->spaceNr--;
294 if (ctxt->spaceNr > 0)
295 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
296 else
297 ctxt->space = NULL;
298 ret = ctxt->spaceTab[ctxt->spaceNr];
299 ctxt->spaceTab[ctxt->spaceNr] = -1;
300 return(ret);
301}
302
303/*
304 * Macros for accessing the content. Those should be used only by the parser,
305 * and not exported.
306 *
307 * Dirty macros, i.e. one often need to make assumption on the context to
308 * use them
309 *
310 * CUR_PTR return the current pointer to the xmlChar to be parsed.
311 * To be used with extreme caution since operations consuming
312 * characters may move the input buffer to a different location !
313 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
314 * This should be used internally by the parser
315 * only to compare to ASCII values otherwise it would break when
316 * running with UTF-8 encoding.
317 * RAW same as CUR but in the input buffer, bypass any token
318 * extraction that may have been done
319 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
320 * to compare on ASCII based substring.
321 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
322 * strings within the parser.
323 *
324 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
325 *
326 * NEXT Skip to the next character, this does the proper decoding
327 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000328 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000329 * CUR_CHAR(l) returns the current unicode character (int), set l
330 * to the number of xmlChars used for the encoding [0-5].
331 * CUR_SCHAR same but operate on a string instead of the context
332 * COPY_BUF copy the current unicode char to the target buffer, increment
333 * the index
334 * GROW, SHRINK handling of input buffers
335 */
336
Daniel Veillardfdc91562002-07-01 21:52:03 +0000337#define RAW (*ctxt->input->cur)
338#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000339#define NXT(val) ctxt->input->cur[(val)]
340#define CUR_PTR ctxt->input->cur
341
342#define SKIP(val) do { \
343 ctxt->nbChars += (val),ctxt->input->cur += (val); \
344 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000345 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000346 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
347 xmlPopInput(ctxt); \
348 } while (0)
349
Daniel Veillard46de64e2002-05-29 08:21:33 +0000350#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
351 xmlSHRINK (ctxt);
352
353static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
354 xmlParserInputShrink(ctxt->input);
355 if ((*ctxt->input->cur == 0) &&
356 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
357 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000358 }
Owen Taylor3473f882001-02-23 17:55:21 +0000359
Daniel Veillard46de64e2002-05-29 08:21:33 +0000360#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
361 xmlGROW (ctxt);
362
363static void xmlGROW (xmlParserCtxtPtr ctxt) {
364 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
365 if ((*ctxt->input->cur == 0) &&
366 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
367 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000368 }
Owen Taylor3473f882001-02-23 17:55:21 +0000369
370#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
371
372#define NEXT xmlNextChar(ctxt)
373
Daniel Veillard21a0f912001-02-25 19:54:14 +0000374#define NEXT1 { \
375 ctxt->input->cur++; \
376 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000377 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000378 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
379 }
380
Owen Taylor3473f882001-02-23 17:55:21 +0000381#define NEXTL(l) do { \
382 if (*(ctxt->input->cur) == '\n') { \
383 ctxt->input->line++; ctxt->input->col = 1; \
384 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000385 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000386 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000387 } while (0)
388
389#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
390#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
391
392#define COPY_BUF(l,b,i,v) \
393 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000394 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000395
396/**
397 * xmlSkipBlankChars:
398 * @ctxt: the XML parser context
399 *
400 * skip all blanks character found at that point in the input streams.
401 * It pops up finished entities in the process if allowable at that point.
402 *
403 * Returns the number of space chars skipped
404 */
405
406int
407xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000408 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000409
410 /*
411 * It's Okay to use CUR/NEXT here since all the blanks are on
412 * the ASCII range.
413 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000414 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
415 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000416 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000417 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000418 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000419 cur = ctxt->input->cur;
420 while (IS_BLANK(*cur)) {
421 if (*cur == '\n') {
422 ctxt->input->line++; ctxt->input->col = 1;
423 }
424 cur++;
425 res++;
426 if (*cur == 0) {
427 ctxt->input->cur = cur;
428 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
429 cur = ctxt->input->cur;
430 }
431 }
432 ctxt->input->cur = cur;
433 } else {
434 int cur;
435 do {
436 cur = CUR;
437 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
438 NEXT;
439 cur = CUR;
440 res++;
441 }
442 while ((cur == 0) && (ctxt->inputNr > 1) &&
443 (ctxt->instate != XML_PARSER_COMMENT)) {
444 xmlPopInput(ctxt);
445 cur = CUR;
446 }
447 /*
448 * Need to handle support of entities branching here
449 */
450 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
451 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
452 }
Owen Taylor3473f882001-02-23 17:55:21 +0000453 return(res);
454}
455
456/************************************************************************
457 * *
458 * Commodity functions to handle entities *
459 * *
460 ************************************************************************/
461
462/**
463 * xmlPopInput:
464 * @ctxt: an XML parser context
465 *
466 * xmlPopInput: the current input pointed by ctxt->input came to an end
467 * pop it and return the next char.
468 *
469 * Returns the current xmlChar in the parser context
470 */
471xmlChar
472xmlPopInput(xmlParserCtxtPtr ctxt) {
473 if (ctxt->inputNr == 1) return(0); /* End of main Input */
474 if (xmlParserDebugEntities)
475 xmlGenericError(xmlGenericErrorContext,
476 "Popping input %d\n", ctxt->inputNr);
477 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000478 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000479 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
480 return(xmlPopInput(ctxt));
481 return(CUR);
482}
483
484/**
485 * xmlPushInput:
486 * @ctxt: an XML parser context
487 * @input: an XML parser input fragment (entity, XML fragment ...).
488 *
489 * xmlPushInput: switch to a new input stream which is stacked on top
490 * of the previous one(s).
491 */
492void
493xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
494 if (input == NULL) return;
495
496 if (xmlParserDebugEntities) {
497 if ((ctxt->input != NULL) && (ctxt->input->filename))
498 xmlGenericError(xmlGenericErrorContext,
499 "%s(%d): ", ctxt->input->filename,
500 ctxt->input->line);
501 xmlGenericError(xmlGenericErrorContext,
502 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
503 }
504 inputPush(ctxt, input);
505 GROW;
506}
507
508/**
509 * xmlParseCharRef:
510 * @ctxt: an XML parser context
511 *
512 * parse Reference declarations
513 *
514 * [66] CharRef ::= '&#' [0-9]+ ';' |
515 * '&#x' [0-9a-fA-F]+ ';'
516 *
517 * [ WFC: Legal Character ]
518 * Characters referred to using character references must match the
519 * production for Char.
520 *
521 * Returns the value parsed (as an int), 0 in case of error
522 */
523int
524xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000525 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000526 int count = 0;
527
Owen Taylor3473f882001-02-23 17:55:21 +0000528 /*
529 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
530 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000531 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000532 (NXT(2) == 'x')) {
533 SKIP(3);
534 GROW;
535 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000536 if (count++ > 20) {
537 count = 0;
538 GROW;
539 }
540 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000541 val = val * 16 + (CUR - '0');
542 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
543 val = val * 16 + (CUR - 'a') + 10;
544 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
545 val = val * 16 + (CUR - 'A') + 10;
546 else {
547 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
549 ctxt->sax->error(ctxt->userData,
550 "xmlParseCharRef: invalid hexadecimal value\n");
551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000553 val = 0;
554 break;
555 }
556 NEXT;
557 count++;
558 }
559 if (RAW == ';') {
560 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
561 ctxt->nbChars ++;
562 ctxt->input->cur++;
563 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000564 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000565 SKIP(2);
566 GROW;
567 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000568 if (count++ > 20) {
569 count = 0;
570 GROW;
571 }
572 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000573 val = val * 10 + (CUR - '0');
574 else {
575 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
577 ctxt->sax->error(ctxt->userData,
578 "xmlParseCharRef: invalid decimal value\n");
579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000581 val = 0;
582 break;
583 }
584 NEXT;
585 count++;
586 }
587 if (RAW == ';') {
588 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
589 ctxt->nbChars ++;
590 ctxt->input->cur++;
591 }
592 } else {
593 ctxt->errNo = XML_ERR_INVALID_CHARREF;
594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
595 ctxt->sax->error(ctxt->userData,
596 "xmlParseCharRef: invalid value\n");
597 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000599 }
600
601 /*
602 * [ WFC: Legal Character ]
603 * Characters referred to using character references must match the
604 * production for Char.
605 */
606 if (IS_CHAR(val)) {
607 return(val);
608 } else {
609 ctxt->errNo = XML_ERR_INVALID_CHAR;
610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000611 ctxt->sax->error(ctxt->userData,
612 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000613 val);
614 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000615 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000616 }
617 return(0);
618}
619
620/**
621 * xmlParseStringCharRef:
622 * @ctxt: an XML parser context
623 * @str: a pointer to an index in the string
624 *
625 * parse Reference declarations, variant parsing from a string rather
626 * than an an input flow.
627 *
628 * [66] CharRef ::= '&#' [0-9]+ ';' |
629 * '&#x' [0-9a-fA-F]+ ';'
630 *
631 * [ WFC: Legal Character ]
632 * Characters referred to using character references must match the
633 * production for Char.
634 *
635 * Returns the value parsed (as an int), 0 in case of error, str will be
636 * updated to the current value of the index
637 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000638static int
Owen Taylor3473f882001-02-23 17:55:21 +0000639xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
640 const xmlChar *ptr;
641 xmlChar cur;
642 int val = 0;
643
644 if ((str == NULL) || (*str == NULL)) return(0);
645 ptr = *str;
646 cur = *ptr;
647 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
648 ptr += 3;
649 cur = *ptr;
650 while (cur != ';') { /* Non input consuming loop */
651 if ((cur >= '0') && (cur <= '9'))
652 val = val * 16 + (cur - '0');
653 else if ((cur >= 'a') && (cur <= 'f'))
654 val = val * 16 + (cur - 'a') + 10;
655 else if ((cur >= 'A') && (cur <= 'F'))
656 val = val * 16 + (cur - 'A') + 10;
657 else {
658 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
660 ctxt->sax->error(ctxt->userData,
661 "xmlParseStringCharRef: invalid hexadecimal value\n");
662 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000663 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000664 val = 0;
665 break;
666 }
667 ptr++;
668 cur = *ptr;
669 }
670 if (cur == ';')
671 ptr++;
672 } else if ((cur == '&') && (ptr[1] == '#')){
673 ptr += 2;
674 cur = *ptr;
675 while (cur != ';') { /* Non input consuming loops */
676 if ((cur >= '0') && (cur <= '9'))
677 val = val * 10 + (cur - '0');
678 else {
679 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
681 ctxt->sax->error(ctxt->userData,
682 "xmlParseStringCharRef: invalid decimal value\n");
683 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000684 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000685 val = 0;
686 break;
687 }
688 ptr++;
689 cur = *ptr;
690 }
691 if (cur == ';')
692 ptr++;
693 } else {
694 ctxt->errNo = XML_ERR_INVALID_CHARREF;
695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
696 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000697 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000698 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000699 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000700 return(0);
701 }
702 *str = ptr;
703
704 /*
705 * [ WFC: Legal Character ]
706 * Characters referred to using character references must match the
707 * production for Char.
708 */
709 if (IS_CHAR(val)) {
710 return(val);
711 } else {
712 ctxt->errNo = XML_ERR_INVALID_CHAR;
713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
714 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000715 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000716 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000717 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000718 }
719 return(0);
720}
721
722/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000723 * xmlNewBlanksWrapperInputStream:
724 * @ctxt: an XML parser context
725 * @entity: an Entity pointer
726 *
727 * Create a new input stream for wrapping
728 * blanks around a PEReference
729 *
730 * Returns the new input stream or NULL
731 */
732
733static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
734
Daniel Veillardf4862f02002-09-10 11:13:43 +0000735static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000736xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
737 xmlParserInputPtr input;
738 xmlChar *buffer;
739 size_t length;
740 if (entity == NULL) {
741 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
743 ctxt->sax->error(ctxt->userData,
744 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
745 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
746 return(NULL);
747 }
748 if (xmlParserDebugEntities)
749 xmlGenericError(xmlGenericErrorContext,
750 "new blanks wrapper for entity: %s\n", entity->name);
751 input = xmlNewInputStream(ctxt);
752 if (input == NULL) {
753 return(NULL);
754 }
755 length = xmlStrlen(entity->name) + 5;
756 buffer = xmlMalloc(length);
757 if (buffer == NULL) {
758 return(NULL);
759 }
760 buffer [0] = ' ';
761 buffer [1] = '%';
762 buffer [length-3] = ';';
763 buffer [length-2] = ' ';
764 buffer [length-1] = 0;
765 memcpy(buffer + 2, entity->name, length - 5);
766 input->free = deallocblankswrapper;
767 input->base = buffer;
768 input->cur = buffer;
769 input->length = length;
770 input->end = &buffer[length];
771 return(input);
772}
773
774/**
Owen Taylor3473f882001-02-23 17:55:21 +0000775 * xmlParserHandlePEReference:
776 * @ctxt: the parser context
777 *
778 * [69] PEReference ::= '%' Name ';'
779 *
780 * [ WFC: No Recursion ]
781 * A parsed entity must not contain a recursive
782 * reference to itself, either directly or indirectly.
783 *
784 * [ WFC: Entity Declared ]
785 * In a document without any DTD, a document with only an internal DTD
786 * subset which contains no parameter entity references, or a document
787 * with "standalone='yes'", ... ... The declaration of a parameter
788 * entity must precede any reference to it...
789 *
790 * [ VC: Entity Declared ]
791 * In a document with an external subset or external parameter entities
792 * with "standalone='no'", ... ... The declaration of a parameter entity
793 * must precede any reference to it...
794 *
795 * [ WFC: In DTD ]
796 * Parameter-entity references may only appear in the DTD.
797 * NOTE: misleading but this is handled.
798 *
799 * A PEReference may have been detected in the current input stream
800 * the handling is done accordingly to
801 * http://www.w3.org/TR/REC-xml#entproc
802 * i.e.
803 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000804 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000805 */
806void
807xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
808 xmlChar *name;
809 xmlEntityPtr entity = NULL;
810 xmlParserInputPtr input;
811
Owen Taylor3473f882001-02-23 17:55:21 +0000812 if (RAW != '%') return;
813 switch(ctxt->instate) {
814 case XML_PARSER_CDATA_SECTION:
815 return;
816 case XML_PARSER_COMMENT:
817 return;
818 case XML_PARSER_START_TAG:
819 return;
820 case XML_PARSER_END_TAG:
821 return;
822 case XML_PARSER_EOF:
823 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
825 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
826 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000827 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000828 return;
829 case XML_PARSER_PROLOG:
830 case XML_PARSER_START:
831 case XML_PARSER_MISC:
832 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
834 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
835 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000836 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000837 return;
838 case XML_PARSER_ENTITY_DECL:
839 case XML_PARSER_CONTENT:
840 case XML_PARSER_ATTRIBUTE_VALUE:
841 case XML_PARSER_PI:
842 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000843 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000844 /* we just ignore it there */
845 return;
846 case XML_PARSER_EPILOG:
847 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
849 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
850 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000851 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000852 return;
853 case XML_PARSER_ENTITY_VALUE:
854 /*
855 * NOTE: in the case of entity values, we don't do the
856 * substitution here since we need the literal
857 * entity value to be able to save the internal
858 * subset of the document.
859 * This will be handled by xmlStringDecodeEntities
860 */
861 return;
862 case XML_PARSER_DTD:
863 /*
864 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
865 * In the internal DTD subset, parameter-entity references
866 * can occur only where markup declarations can occur, not
867 * within markup declarations.
868 * In that case this is handled in xmlParseMarkupDecl
869 */
870 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
871 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000872 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
873 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000874 break;
875 case XML_PARSER_IGNORE:
876 return;
877 }
878
879 NEXT;
880 name = xmlParseName(ctxt);
881 if (xmlParserDebugEntities)
882 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000883 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000884 if (name == NULL) {
885 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000887 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000888 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000889 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000890 } else {
891 if (RAW == ';') {
892 NEXT;
893 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
894 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
895 if (entity == NULL) {
896
897 /*
898 * [ WFC: Entity Declared ]
899 * In a document without any DTD, a document with only an
900 * internal DTD subset which contains no parameter entity
901 * references, or a document with "standalone='yes'", ...
902 * ... The declaration of a parameter entity must precede
903 * any reference to it...
904 */
905 if ((ctxt->standalone == 1) ||
906 ((ctxt->hasExternalSubset == 0) &&
907 (ctxt->hasPErefs == 0))) {
908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
909 ctxt->sax->error(ctxt->userData,
910 "PEReference: %%%s; not found\n", name);
911 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000912 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000913 } else {
914 /*
915 * [ VC: Entity Declared ]
916 * In a document with an external subset or external
917 * parameter entities with "standalone='no'", ...
918 * ... The declaration of a parameter entity must precede
919 * any reference to it...
920 */
921 if ((!ctxt->disableSAX) &&
922 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
923 ctxt->vctxt.error(ctxt->vctxt.userData,
924 "PEReference: %%%s; not found\n", name);
925 } else if ((!ctxt->disableSAX) &&
926 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
927 ctxt->sax->warning(ctxt->userData,
928 "PEReference: %%%s; not found\n", name);
929 ctxt->valid = 0;
930 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000931 } else if (ctxt->input->free != deallocblankswrapper) {
932 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
933 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000934 } else {
935 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
936 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000937 xmlChar start[4];
938 xmlCharEncoding enc;
939
Owen Taylor3473f882001-02-23 17:55:21 +0000940 /*
941 * handle the extra spaces added before and after
942 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000943 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000944 */
945 input = xmlNewEntityInputStream(ctxt, entity);
946 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000947
948 /*
949 * Get the 4 first bytes and decode the charset
950 * if enc != XML_CHAR_ENCODING_NONE
951 * plug some encoding conversion routines.
952 */
953 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000954 if (entity->length >= 4) {
955 start[0] = RAW;
956 start[1] = NXT(1);
957 start[2] = NXT(2);
958 start[3] = NXT(3);
959 enc = xmlDetectCharEncoding(start, 4);
960 if (enc != XML_CHAR_ENCODING_NONE) {
961 xmlSwitchEncoding(ctxt, enc);
962 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000963 }
964
Owen Taylor3473f882001-02-23 17:55:21 +0000965 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
966 (RAW == '<') && (NXT(1) == '?') &&
967 (NXT(2) == 'x') && (NXT(3) == 'm') &&
968 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
969 xmlParseTextDecl(ctxt);
970 }
Owen Taylor3473f882001-02-23 17:55:21 +0000971 } else {
972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
973 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000974 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000975 name);
976 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000977 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000978 }
979 }
980 } else {
981 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
983 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000984 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000985 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000986 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000987 }
988 xmlFree(name);
989 }
990}
991
992/*
993 * Macro used to grow the current buffer.
994 */
995#define growBuffer(buffer) { \
996 buffer##_size *= 2; \
997 buffer = (xmlChar *) \
998 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
999 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001000 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001001 return(NULL); \
1002 } \
1003}
1004
1005/**
1006 * xmlStringDecodeEntities:
1007 * @ctxt: the parser context
1008 * @str: the input string
1009 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1010 * @end: an end marker xmlChar, 0 if none
1011 * @end2: an end marker xmlChar, 0 if none
1012 * @end3: an end marker xmlChar, 0 if none
1013 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001014 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001015 *
1016 * [67] Reference ::= EntityRef | CharRef
1017 *
1018 * [69] PEReference ::= '%' Name ';'
1019 *
1020 * Returns A newly allocated string with the substitution done. The caller
1021 * must deallocate it !
1022 */
1023xmlChar *
1024xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1025 xmlChar end, xmlChar end2, xmlChar end3) {
1026 xmlChar *buffer = NULL;
1027 int buffer_size = 0;
1028
1029 xmlChar *current = NULL;
1030 xmlEntityPtr ent;
1031 int c,l;
1032 int nbchars = 0;
1033
1034 if (str == NULL)
1035 return(NULL);
1036
1037 if (ctxt->depth > 40) {
1038 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1040 ctxt->sax->error(ctxt->userData,
1041 "Detected entity reference loop\n");
1042 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001043 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001044 return(NULL);
1045 }
1046
1047 /*
1048 * allocate a translation buffer.
1049 */
1050 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1051 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1052 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001053 xmlGenericError(xmlGenericErrorContext,
1054 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001055 return(NULL);
1056 }
1057
1058 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001059 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001060 * we are operating on already parsed values.
1061 */
1062 c = CUR_SCHAR(str, l);
1063 while ((c != 0) && (c != end) && /* non input consuming loop */
1064 (c != end2) && (c != end3)) {
1065
1066 if (c == 0) break;
1067 if ((c == '&') && (str[1] == '#')) {
1068 int val = xmlParseStringCharRef(ctxt, &str);
1069 if (val != 0) {
1070 COPY_BUF(0,buffer,nbchars,val);
1071 }
1072 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1073 if (xmlParserDebugEntities)
1074 xmlGenericError(xmlGenericErrorContext,
1075 "String decoding Entity Reference: %.30s\n",
1076 str);
1077 ent = xmlParseStringEntityRef(ctxt, &str);
1078 if ((ent != NULL) &&
1079 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1080 if (ent->content != NULL) {
1081 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1082 } else {
1083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1084 ctxt->sax->error(ctxt->userData,
1085 "internal error entity has no content\n");
1086 }
1087 } else if ((ent != NULL) && (ent->content != NULL)) {
1088 xmlChar *rep;
1089
1090 ctxt->depth++;
1091 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1092 0, 0, 0);
1093 ctxt->depth--;
1094 if (rep != NULL) {
1095 current = rep;
1096 while (*current != 0) { /* non input consuming loop */
1097 buffer[nbchars++] = *current++;
1098 if (nbchars >
1099 buffer_size - XML_PARSER_BUFFER_SIZE) {
1100 growBuffer(buffer);
1101 }
1102 }
1103 xmlFree(rep);
1104 }
1105 } else if (ent != NULL) {
1106 int i = xmlStrlen(ent->name);
1107 const xmlChar *cur = ent->name;
1108
1109 buffer[nbchars++] = '&';
1110 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1111 growBuffer(buffer);
1112 }
1113 for (;i > 0;i--)
1114 buffer[nbchars++] = *cur++;
1115 buffer[nbchars++] = ';';
1116 }
1117 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1118 if (xmlParserDebugEntities)
1119 xmlGenericError(xmlGenericErrorContext,
1120 "String decoding PE Reference: %.30s\n", str);
1121 ent = xmlParseStringPEReference(ctxt, &str);
1122 if (ent != NULL) {
1123 xmlChar *rep;
1124
1125 ctxt->depth++;
1126 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1127 0, 0, 0);
1128 ctxt->depth--;
1129 if (rep != NULL) {
1130 current = rep;
1131 while (*current != 0) { /* non input consuming loop */
1132 buffer[nbchars++] = *current++;
1133 if (nbchars >
1134 buffer_size - XML_PARSER_BUFFER_SIZE) {
1135 growBuffer(buffer);
1136 }
1137 }
1138 xmlFree(rep);
1139 }
1140 }
1141 } else {
1142 COPY_BUF(l,buffer,nbchars,c);
1143 str += l;
1144 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1145 growBuffer(buffer);
1146 }
1147 }
1148 c = CUR_SCHAR(str, l);
1149 }
1150 buffer[nbchars++] = 0;
1151 return(buffer);
1152}
1153
1154
1155/************************************************************************
1156 * *
1157 * Commodity functions to handle xmlChars *
1158 * *
1159 ************************************************************************/
1160
1161/**
1162 * xmlStrndup:
1163 * @cur: the input xmlChar *
1164 * @len: the len of @cur
1165 *
1166 * a strndup for array of xmlChar's
1167 *
1168 * Returns a new xmlChar * or NULL
1169 */
1170xmlChar *
1171xmlStrndup(const xmlChar *cur, int len) {
1172 xmlChar *ret;
1173
1174 if ((cur == NULL) || (len < 0)) return(NULL);
1175 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1176 if (ret == NULL) {
1177 xmlGenericError(xmlGenericErrorContext,
1178 "malloc of %ld byte failed\n",
1179 (len + 1) * (long)sizeof(xmlChar));
1180 return(NULL);
1181 }
1182 memcpy(ret, cur, len * sizeof(xmlChar));
1183 ret[len] = 0;
1184 return(ret);
1185}
1186
1187/**
1188 * xmlStrdup:
1189 * @cur: the input xmlChar *
1190 *
1191 * a strdup for array of xmlChar's. Since they are supposed to be
1192 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1193 * a termination mark of '0'.
1194 *
1195 * Returns a new xmlChar * or NULL
1196 */
1197xmlChar *
1198xmlStrdup(const xmlChar *cur) {
1199 const xmlChar *p = cur;
1200
1201 if (cur == NULL) return(NULL);
1202 while (*p != 0) p++; /* non input consuming */
1203 return(xmlStrndup(cur, p - cur));
1204}
1205
1206/**
1207 * xmlCharStrndup:
1208 * @cur: the input char *
1209 * @len: the len of @cur
1210 *
1211 * a strndup for char's to xmlChar's
1212 *
1213 * Returns a new xmlChar * or NULL
1214 */
1215
1216xmlChar *
1217xmlCharStrndup(const char *cur, int len) {
1218 int i;
1219 xmlChar *ret;
1220
1221 if ((cur == NULL) || (len < 0)) return(NULL);
1222 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1223 if (ret == NULL) {
1224 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1225 (len + 1) * (long)sizeof(xmlChar));
1226 return(NULL);
1227 }
1228 for (i = 0;i < len;i++)
1229 ret[i] = (xmlChar) cur[i];
1230 ret[len] = 0;
1231 return(ret);
1232}
1233
1234/**
1235 * xmlCharStrdup:
1236 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001237 *
1238 * a strdup for char's to xmlChar's
1239 *
1240 * Returns a new xmlChar * or NULL
1241 */
1242
1243xmlChar *
1244xmlCharStrdup(const char *cur) {
1245 const char *p = cur;
1246
1247 if (cur == NULL) return(NULL);
1248 while (*p != '\0') p++; /* non input consuming */
1249 return(xmlCharStrndup(cur, p - cur));
1250}
1251
1252/**
1253 * xmlStrcmp:
1254 * @str1: the first xmlChar *
1255 * @str2: the second xmlChar *
1256 *
1257 * a strcmp for xmlChar's
1258 *
1259 * Returns the integer result of the comparison
1260 */
1261
1262int
1263xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1264 register int tmp;
1265
1266 if (str1 == str2) return(0);
1267 if (str1 == NULL) return(-1);
1268 if (str2 == NULL) return(1);
1269 do {
1270 tmp = *str1++ - *str2;
1271 if (tmp != 0) return(tmp);
1272 } while (*str2++ != 0);
1273 return 0;
1274}
1275
1276/**
1277 * xmlStrEqual:
1278 * @str1: the first xmlChar *
1279 * @str2: the second xmlChar *
1280 *
1281 * Check if both string are equal of have same content
1282 * Should be a bit more readable and faster than xmlStrEqual()
1283 *
1284 * Returns 1 if they are equal, 0 if they are different
1285 */
1286
1287int
1288xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1289 if (str1 == str2) return(1);
1290 if (str1 == NULL) return(0);
1291 if (str2 == NULL) return(0);
1292 do {
1293 if (*str1++ != *str2) return(0);
1294 } while (*str2++);
1295 return(1);
1296}
1297
1298/**
1299 * xmlStrncmp:
1300 * @str1: the first xmlChar *
1301 * @str2: the second xmlChar *
1302 * @len: the max comparison length
1303 *
1304 * a strncmp for xmlChar's
1305 *
1306 * Returns the integer result of the comparison
1307 */
1308
1309int
1310xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1311 register int tmp;
1312
1313 if (len <= 0) return(0);
1314 if (str1 == str2) return(0);
1315 if (str1 == NULL) return(-1);
1316 if (str2 == NULL) return(1);
1317 do {
1318 tmp = *str1++ - *str2;
1319 if (tmp != 0 || --len == 0) return(tmp);
1320 } while (*str2++ != 0);
1321 return 0;
1322}
1323
Daniel Veillardb44025c2001-10-11 22:55:55 +00001324static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001325 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1326 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1327 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1328 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1329 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1330 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1331 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1332 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1333 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1334 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1335 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1336 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1337 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1338 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1339 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1340 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1341 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1342 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1343 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1344 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1345 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1346 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1347 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1348 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1349 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1350 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1351 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1352 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1353 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1354 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1355 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1356 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1357};
1358
1359/**
1360 * xmlStrcasecmp:
1361 * @str1: the first xmlChar *
1362 * @str2: the second xmlChar *
1363 *
1364 * a strcasecmp for xmlChar's
1365 *
1366 * Returns the integer result of the comparison
1367 */
1368
1369int
1370xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1371 register int tmp;
1372
1373 if (str1 == str2) return(0);
1374 if (str1 == NULL) return(-1);
1375 if (str2 == NULL) return(1);
1376 do {
1377 tmp = casemap[*str1++] - casemap[*str2];
1378 if (tmp != 0) return(tmp);
1379 } while (*str2++ != 0);
1380 return 0;
1381}
1382
1383/**
1384 * xmlStrncasecmp:
1385 * @str1: the first xmlChar *
1386 * @str2: the second xmlChar *
1387 * @len: the max comparison length
1388 *
1389 * a strncasecmp for xmlChar's
1390 *
1391 * Returns the integer result of the comparison
1392 */
1393
1394int
1395xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1396 register int tmp;
1397
1398 if (len <= 0) return(0);
1399 if (str1 == str2) return(0);
1400 if (str1 == NULL) return(-1);
1401 if (str2 == NULL) return(1);
1402 do {
1403 tmp = casemap[*str1++] - casemap[*str2];
1404 if (tmp != 0 || --len == 0) return(tmp);
1405 } while (*str2++ != 0);
1406 return 0;
1407}
1408
1409/**
1410 * xmlStrchr:
1411 * @str: the xmlChar * array
1412 * @val: the xmlChar to search
1413 *
1414 * a strchr for xmlChar's
1415 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001416 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001417 */
1418
1419const xmlChar *
1420xmlStrchr(const xmlChar *str, xmlChar val) {
1421 if (str == NULL) return(NULL);
1422 while (*str != 0) { /* non input consuming */
1423 if (*str == val) return((xmlChar *) str);
1424 str++;
1425 }
1426 return(NULL);
1427}
1428
1429/**
1430 * xmlStrstr:
1431 * @str: the xmlChar * array (haystack)
1432 * @val: the xmlChar to search (needle)
1433 *
1434 * a strstr for xmlChar's
1435 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001436 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001437 */
1438
1439const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001440xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001441 int n;
1442
1443 if (str == NULL) return(NULL);
1444 if (val == NULL) return(NULL);
1445 n = xmlStrlen(val);
1446
1447 if (n == 0) return(str);
1448 while (*str != 0) { /* non input consuming */
1449 if (*str == *val) {
1450 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1451 }
1452 str++;
1453 }
1454 return(NULL);
1455}
1456
1457/**
1458 * xmlStrcasestr:
1459 * @str: the xmlChar * array (haystack)
1460 * @val: the xmlChar to search (needle)
1461 *
1462 * a case-ignoring strstr for xmlChar's
1463 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001464 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001465 */
1466
1467const xmlChar *
1468xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1469 int n;
1470
1471 if (str == NULL) return(NULL);
1472 if (val == NULL) return(NULL);
1473 n = xmlStrlen(val);
1474
1475 if (n == 0) return(str);
1476 while (*str != 0) { /* non input consuming */
1477 if (casemap[*str] == casemap[*val])
1478 if (!xmlStrncasecmp(str, val, n)) return(str);
1479 str++;
1480 }
1481 return(NULL);
1482}
1483
1484/**
1485 * xmlStrsub:
1486 * @str: the xmlChar * array (haystack)
1487 * @start: the index of the first char (zero based)
1488 * @len: the length of the substring
1489 *
1490 * Extract a substring of a given string
1491 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001492 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001493 */
1494
1495xmlChar *
1496xmlStrsub(const xmlChar *str, int start, int len) {
1497 int i;
1498
1499 if (str == NULL) return(NULL);
1500 if (start < 0) return(NULL);
1501 if (len < 0) return(NULL);
1502
1503 for (i = 0;i < start;i++) {
1504 if (*str == 0) return(NULL);
1505 str++;
1506 }
1507 if (*str == 0) return(NULL);
1508 return(xmlStrndup(str, len));
1509}
1510
1511/**
1512 * xmlStrlen:
1513 * @str: the xmlChar * array
1514 *
1515 * length of a xmlChar's string
1516 *
1517 * Returns the number of xmlChar contained in the ARRAY.
1518 */
1519
1520int
1521xmlStrlen(const xmlChar *str) {
1522 int len = 0;
1523
1524 if (str == NULL) return(0);
1525 while (*str != 0) { /* non input consuming */
1526 str++;
1527 len++;
1528 }
1529 return(len);
1530}
1531
1532/**
1533 * xmlStrncat:
1534 * @cur: the original xmlChar * array
1535 * @add: the xmlChar * array added
1536 * @len: the length of @add
1537 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001538 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001539 * first bytes of @add.
1540 *
1541 * Returns a new xmlChar *, the original @cur is reallocated if needed
1542 * and should not be freed
1543 */
1544
1545xmlChar *
1546xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1547 int size;
1548 xmlChar *ret;
1549
1550 if ((add == NULL) || (len == 0))
1551 return(cur);
1552 if (cur == NULL)
1553 return(xmlStrndup(add, len));
1554
1555 size = xmlStrlen(cur);
1556 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1557 if (ret == NULL) {
1558 xmlGenericError(xmlGenericErrorContext,
1559 "xmlStrncat: realloc of %ld byte failed\n",
1560 (size + len + 1) * (long)sizeof(xmlChar));
1561 return(cur);
1562 }
1563 memcpy(&ret[size], add, len * sizeof(xmlChar));
1564 ret[size + len] = 0;
1565 return(ret);
1566}
1567
1568/**
1569 * xmlStrcat:
1570 * @cur: the original xmlChar * array
1571 * @add: the xmlChar * array added
1572 *
1573 * a strcat for array of xmlChar's. Since they are supposed to be
1574 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1575 * a termination mark of '0'.
1576 *
1577 * Returns a new xmlChar * containing the concatenated string.
1578 */
1579xmlChar *
1580xmlStrcat(xmlChar *cur, const xmlChar *add) {
1581 const xmlChar *p = add;
1582
1583 if (add == NULL) return(cur);
1584 if (cur == NULL)
1585 return(xmlStrdup(add));
1586
1587 while (*p != 0) p++; /* non input consuming */
1588 return(xmlStrncat(cur, add, p - add));
1589}
1590
1591/************************************************************************
1592 * *
1593 * Commodity functions, cleanup needed ? *
1594 * *
1595 ************************************************************************/
1596
1597/**
1598 * areBlanks:
1599 * @ctxt: an XML parser context
1600 * @str: a xmlChar *
1601 * @len: the size of @str
1602 *
1603 * Is this a sequence of blank chars that one can ignore ?
1604 *
1605 * Returns 1 if ignorable 0 otherwise.
1606 */
1607
1608static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1609 int i, ret;
1610 xmlNodePtr lastChild;
1611
Daniel Veillard05c13a22001-09-09 08:38:09 +00001612 /*
1613 * Don't spend time trying to differentiate them, the same callback is
1614 * used !
1615 */
1616 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001617 return(0);
1618
Owen Taylor3473f882001-02-23 17:55:21 +00001619 /*
1620 * Check for xml:space value.
1621 */
1622 if (*(ctxt->space) == 1)
1623 return(0);
1624
1625 /*
1626 * Check that the string is made of blanks
1627 */
1628 for (i = 0;i < len;i++)
1629 if (!(IS_BLANK(str[i]))) return(0);
1630
1631 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001632 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001633 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001634 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001635 if (ctxt->myDoc != NULL) {
1636 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1637 if (ret == 0) return(1);
1638 if (ret == 1) return(0);
1639 }
1640
1641 /*
1642 * Otherwise, heuristic :-\
1643 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001644 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001645 if ((ctxt->node->children == NULL) &&
1646 (RAW == '<') && (NXT(1) == '/')) return(0);
1647
1648 lastChild = xmlGetLastChild(ctxt->node);
1649 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001650 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1651 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001652 } else if (xmlNodeIsText(lastChild))
1653 return(0);
1654 else if ((ctxt->node->children != NULL) &&
1655 (xmlNodeIsText(ctxt->node->children)))
1656 return(0);
1657 return(1);
1658}
1659
Owen Taylor3473f882001-02-23 17:55:21 +00001660/************************************************************************
1661 * *
1662 * Extra stuff for namespace support *
1663 * Relates to http://www.w3.org/TR/WD-xml-names *
1664 * *
1665 ************************************************************************/
1666
1667/**
1668 * xmlSplitQName:
1669 * @ctxt: an XML parser context
1670 * @name: an XML parser context
1671 * @prefix: a xmlChar **
1672 *
1673 * parse an UTF8 encoded XML qualified name string
1674 *
1675 * [NS 5] QName ::= (Prefix ':')? LocalPart
1676 *
1677 * [NS 6] Prefix ::= NCName
1678 *
1679 * [NS 7] LocalPart ::= NCName
1680 *
1681 * Returns the local part, and prefix is updated
1682 * to get the Prefix if any.
1683 */
1684
1685xmlChar *
1686xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1687 xmlChar buf[XML_MAX_NAMELEN + 5];
1688 xmlChar *buffer = NULL;
1689 int len = 0;
1690 int max = XML_MAX_NAMELEN;
1691 xmlChar *ret = NULL;
1692 const xmlChar *cur = name;
1693 int c;
1694
1695 *prefix = NULL;
1696
1697#ifndef XML_XML_NAMESPACE
1698 /* xml: prefix is not really a namespace */
1699 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1700 (cur[2] == 'l') && (cur[3] == ':'))
1701 return(xmlStrdup(name));
1702#endif
1703
1704 /* nasty but valid */
1705 if (cur[0] == ':')
1706 return(xmlStrdup(name));
1707
1708 c = *cur++;
1709 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1710 buf[len++] = c;
1711 c = *cur++;
1712 }
1713 if (len >= max) {
1714 /*
1715 * Okay someone managed to make a huge name, so he's ready to pay
1716 * for the processing speed.
1717 */
1718 max = len * 2;
1719
1720 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1721 if (buffer == NULL) {
1722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1723 ctxt->sax->error(ctxt->userData,
1724 "xmlSplitQName: out of memory\n");
1725 return(NULL);
1726 }
1727 memcpy(buffer, buf, len);
1728 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1729 if (len + 10 > max) {
1730 max *= 2;
1731 buffer = (xmlChar *) xmlRealloc(buffer,
1732 max * sizeof(xmlChar));
1733 if (buffer == NULL) {
1734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1735 ctxt->sax->error(ctxt->userData,
1736 "xmlSplitQName: out of memory\n");
1737 return(NULL);
1738 }
1739 }
1740 buffer[len++] = c;
1741 c = *cur++;
1742 }
1743 buffer[len] = 0;
1744 }
1745
1746 if (buffer == NULL)
1747 ret = xmlStrndup(buf, len);
1748 else {
1749 ret = buffer;
1750 buffer = NULL;
1751 max = XML_MAX_NAMELEN;
1752 }
1753
1754
1755 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001756 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001757 if (c == 0) return(ret);
1758 *prefix = ret;
1759 len = 0;
1760
Daniel Veillardbb284f42002-10-16 18:02:47 +00001761 /*
1762 * Check that the first character is proper to start
1763 * a new name
1764 */
1765 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1766 ((c >= 0x41) && (c <= 0x5A)) ||
1767 (c == '_') || (c == ':'))) {
1768 int l;
1769 int first = CUR_SCHAR(cur, l);
1770
1771 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001772 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1773 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001774 ctxt->sax->error(ctxt->userData,
1775 "Name %s is not XML Namespace compliant\n",
1776 name);
1777 }
1778 }
1779 cur++;
1780
Owen Taylor3473f882001-02-23 17:55:21 +00001781 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1782 buf[len++] = c;
1783 c = *cur++;
1784 }
1785 if (len >= max) {
1786 /*
1787 * Okay someone managed to make a huge name, so he's ready to pay
1788 * for the processing speed.
1789 */
1790 max = len * 2;
1791
1792 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1793 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001794 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1795 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001796 ctxt->sax->error(ctxt->userData,
1797 "xmlSplitQName: out of memory\n");
1798 return(NULL);
1799 }
1800 memcpy(buffer, buf, len);
1801 while (c != 0) { /* tested bigname2.xml */
1802 if (len + 10 > max) {
1803 max *= 2;
1804 buffer = (xmlChar *) xmlRealloc(buffer,
1805 max * sizeof(xmlChar));
1806 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001807 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1808 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001809 ctxt->sax->error(ctxt->userData,
1810 "xmlSplitQName: out of memory\n");
1811 return(NULL);
1812 }
1813 }
1814 buffer[len++] = c;
1815 c = *cur++;
1816 }
1817 buffer[len] = 0;
1818 }
1819
1820 if (buffer == NULL)
1821 ret = xmlStrndup(buf, len);
1822 else {
1823 ret = buffer;
1824 }
1825 }
1826
1827 return(ret);
1828}
1829
1830/************************************************************************
1831 * *
1832 * The parser itself *
1833 * Relates to http://www.w3.org/TR/REC-xml *
1834 * *
1835 ************************************************************************/
1836
Daniel Veillard76d66f42001-05-16 21:05:17 +00001837static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001838/**
1839 * xmlParseName:
1840 * @ctxt: an XML parser context
1841 *
1842 * parse an XML name.
1843 *
1844 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1845 * CombiningChar | Extender
1846 *
1847 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1848 *
1849 * [6] Names ::= Name (S Name)*
1850 *
1851 * Returns the Name parsed or NULL
1852 */
1853
1854xmlChar *
1855xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001856 const xmlChar *in;
1857 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001858 int count = 0;
1859
1860 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001861
1862 /*
1863 * Accelerator for simple ASCII names
1864 */
1865 in = ctxt->input->cur;
1866 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1867 ((*in >= 0x41) && (*in <= 0x5A)) ||
1868 (*in == '_') || (*in == ':')) {
1869 in++;
1870 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1871 ((*in >= 0x41) && (*in <= 0x5A)) ||
1872 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001873 (*in == '_') || (*in == '-') ||
1874 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001875 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001876 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001877 count = in - ctxt->input->cur;
1878 ret = xmlStrndup(ctxt->input->cur, count);
1879 ctxt->input->cur = in;
1880 return(ret);
1881 }
1882 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001883 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001884}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001885
Daniel Veillard46de64e2002-05-29 08:21:33 +00001886/**
1887 * xmlParseNameAndCompare:
1888 * @ctxt: an XML parser context
1889 *
1890 * parse an XML name and compares for match
1891 * (specialized for endtag parsing)
1892 *
1893 *
1894 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1895 * and the name for mismatch
1896 */
1897
Daniel Veillardf4862f02002-09-10 11:13:43 +00001898static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001899xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1900 const xmlChar *cmp = other;
1901 const xmlChar *in;
1902 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001903
1904 GROW;
1905
1906 in = ctxt->input->cur;
1907 while (*in != 0 && *in == *cmp) {
1908 ++in;
1909 ++cmp;
1910 }
1911 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1912 /* success */
1913 ctxt->input->cur = in;
1914 return (xmlChar*) 1;
1915 }
1916 /* failure (or end of input buffer), check with full function */
1917 ret = xmlParseName (ctxt);
1918 if (ret != 0 && xmlStrEqual (ret, other)) {
1919 xmlFree (ret);
1920 return (xmlChar*) 1;
1921 }
1922 return ret;
1923}
1924
Daniel Veillard76d66f42001-05-16 21:05:17 +00001925static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001926xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1927 xmlChar buf[XML_MAX_NAMELEN + 5];
1928 int len = 0, l;
1929 int c;
1930 int count = 0;
1931
1932 /*
1933 * Handler for more complex cases
1934 */
1935 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001936 c = CUR_CHAR(l);
1937 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1938 (!IS_LETTER(c) && (c != '_') &&
1939 (c != ':'))) {
1940 return(NULL);
1941 }
1942
1943 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1944 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1945 (c == '.') || (c == '-') ||
1946 (c == '_') || (c == ':') ||
1947 (IS_COMBINING(c)) ||
1948 (IS_EXTENDER(c)))) {
1949 if (count++ > 100) {
1950 count = 0;
1951 GROW;
1952 }
1953 COPY_BUF(l,buf,len,c);
1954 NEXTL(l);
1955 c = CUR_CHAR(l);
1956 if (len >= XML_MAX_NAMELEN) {
1957 /*
1958 * Okay someone managed to make a huge name, so he's ready to pay
1959 * for the processing speed.
1960 */
1961 xmlChar *buffer;
1962 int max = len * 2;
1963
1964 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1965 if (buffer == NULL) {
1966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1967 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001968 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001969 return(NULL);
1970 }
1971 memcpy(buffer, buf, len);
1972 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1973 (c == '.') || (c == '-') ||
1974 (c == '_') || (c == ':') ||
1975 (IS_COMBINING(c)) ||
1976 (IS_EXTENDER(c))) {
1977 if (count++ > 100) {
1978 count = 0;
1979 GROW;
1980 }
1981 if (len + 10 > max) {
1982 max *= 2;
1983 buffer = (xmlChar *) xmlRealloc(buffer,
1984 max * sizeof(xmlChar));
1985 if (buffer == NULL) {
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1987 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001988 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001989 return(NULL);
1990 }
1991 }
1992 COPY_BUF(l,buffer,len,c);
1993 NEXTL(l);
1994 c = CUR_CHAR(l);
1995 }
1996 buffer[len] = 0;
1997 return(buffer);
1998 }
1999 }
2000 return(xmlStrndup(buf, len));
2001}
2002
2003/**
2004 * xmlParseStringName:
2005 * @ctxt: an XML parser context
2006 * @str: a pointer to the string pointer (IN/OUT)
2007 *
2008 * parse an XML name.
2009 *
2010 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2011 * CombiningChar | Extender
2012 *
2013 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2014 *
2015 * [6] Names ::= Name (S Name)*
2016 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002017 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002018 * is updated to the current location in the string.
2019 */
2020
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002021static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002022xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2023 xmlChar buf[XML_MAX_NAMELEN + 5];
2024 const xmlChar *cur = *str;
2025 int len = 0, l;
2026 int c;
2027
2028 c = CUR_SCHAR(cur, l);
2029 if (!IS_LETTER(c) && (c != '_') &&
2030 (c != ':')) {
2031 return(NULL);
2032 }
2033
2034 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2035 (c == '.') || (c == '-') ||
2036 (c == '_') || (c == ':') ||
2037 (IS_COMBINING(c)) ||
2038 (IS_EXTENDER(c))) {
2039 COPY_BUF(l,buf,len,c);
2040 cur += l;
2041 c = CUR_SCHAR(cur, l);
2042 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2043 /*
2044 * Okay someone managed to make a huge name, so he's ready to pay
2045 * for the processing speed.
2046 */
2047 xmlChar *buffer;
2048 int max = len * 2;
2049
2050 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2051 if (buffer == NULL) {
2052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2053 ctxt->sax->error(ctxt->userData,
2054 "xmlParseStringName: out of memory\n");
2055 return(NULL);
2056 }
2057 memcpy(buffer, buf, len);
2058 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2059 (c == '.') || (c == '-') ||
2060 (c == '_') || (c == ':') ||
2061 (IS_COMBINING(c)) ||
2062 (IS_EXTENDER(c))) {
2063 if (len + 10 > max) {
2064 max *= 2;
2065 buffer = (xmlChar *) xmlRealloc(buffer,
2066 max * sizeof(xmlChar));
2067 if (buffer == NULL) {
2068 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2069 ctxt->sax->error(ctxt->userData,
2070 "xmlParseStringName: out of memory\n");
2071 return(NULL);
2072 }
2073 }
2074 COPY_BUF(l,buffer,len,c);
2075 cur += l;
2076 c = CUR_SCHAR(cur, l);
2077 }
2078 buffer[len] = 0;
2079 *str = cur;
2080 return(buffer);
2081 }
2082 }
2083 *str = cur;
2084 return(xmlStrndup(buf, len));
2085}
2086
2087/**
2088 * xmlParseNmtoken:
2089 * @ctxt: an XML parser context
2090 *
2091 * parse an XML Nmtoken.
2092 *
2093 * [7] Nmtoken ::= (NameChar)+
2094 *
2095 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2096 *
2097 * Returns the Nmtoken parsed or NULL
2098 */
2099
2100xmlChar *
2101xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2102 xmlChar buf[XML_MAX_NAMELEN + 5];
2103 int len = 0, l;
2104 int c;
2105 int count = 0;
2106
2107 GROW;
2108 c = CUR_CHAR(l);
2109
2110 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2111 (c == '.') || (c == '-') ||
2112 (c == '_') || (c == ':') ||
2113 (IS_COMBINING(c)) ||
2114 (IS_EXTENDER(c))) {
2115 if (count++ > 100) {
2116 count = 0;
2117 GROW;
2118 }
2119 COPY_BUF(l,buf,len,c);
2120 NEXTL(l);
2121 c = CUR_CHAR(l);
2122 if (len >= XML_MAX_NAMELEN) {
2123 /*
2124 * Okay someone managed to make a huge token, so he's ready to pay
2125 * for the processing speed.
2126 */
2127 xmlChar *buffer;
2128 int max = len * 2;
2129
2130 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2131 if (buffer == NULL) {
2132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2133 ctxt->sax->error(ctxt->userData,
2134 "xmlParseNmtoken: out of memory\n");
2135 return(NULL);
2136 }
2137 memcpy(buffer, buf, len);
2138 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2139 (c == '.') || (c == '-') ||
2140 (c == '_') || (c == ':') ||
2141 (IS_COMBINING(c)) ||
2142 (IS_EXTENDER(c))) {
2143 if (count++ > 100) {
2144 count = 0;
2145 GROW;
2146 }
2147 if (len + 10 > max) {
2148 max *= 2;
2149 buffer = (xmlChar *) xmlRealloc(buffer,
2150 max * sizeof(xmlChar));
2151 if (buffer == NULL) {
2152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2153 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002154 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002155 return(NULL);
2156 }
2157 }
2158 COPY_BUF(l,buffer,len,c);
2159 NEXTL(l);
2160 c = CUR_CHAR(l);
2161 }
2162 buffer[len] = 0;
2163 return(buffer);
2164 }
2165 }
2166 if (len == 0)
2167 return(NULL);
2168 return(xmlStrndup(buf, len));
2169}
2170
2171/**
2172 * xmlParseEntityValue:
2173 * @ctxt: an XML parser context
2174 * @orig: if non-NULL store a copy of the original entity value
2175 *
2176 * parse a value for ENTITY declarations
2177 *
2178 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2179 * "'" ([^%&'] | PEReference | Reference)* "'"
2180 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002181 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002182 */
2183
2184xmlChar *
2185xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2186 xmlChar *buf = NULL;
2187 int len = 0;
2188 int size = XML_PARSER_BUFFER_SIZE;
2189 int c, l;
2190 xmlChar stop;
2191 xmlChar *ret = NULL;
2192 const xmlChar *cur = NULL;
2193 xmlParserInputPtr input;
2194
2195 if (RAW == '"') stop = '"';
2196 else if (RAW == '\'') stop = '\'';
2197 else {
2198 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2200 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2201 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002202 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 return(NULL);
2204 }
2205 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2206 if (buf == NULL) {
2207 xmlGenericError(xmlGenericErrorContext,
2208 "malloc of %d byte failed\n", size);
2209 return(NULL);
2210 }
2211
2212 /*
2213 * The content of the entity definition is copied in a buffer.
2214 */
2215
2216 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2217 input = ctxt->input;
2218 GROW;
2219 NEXT;
2220 c = CUR_CHAR(l);
2221 /*
2222 * NOTE: 4.4.5 Included in Literal
2223 * When a parameter entity reference appears in a literal entity
2224 * value, ... a single or double quote character in the replacement
2225 * text is always treated as a normal data character and will not
2226 * terminate the literal.
2227 * In practice it means we stop the loop only when back at parsing
2228 * the initial entity and the quote is found
2229 */
2230 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2231 (ctxt->input != input))) {
2232 if (len + 5 >= size) {
2233 size *= 2;
2234 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2235 if (buf == NULL) {
2236 xmlGenericError(xmlGenericErrorContext,
2237 "realloc of %d byte failed\n", size);
2238 return(NULL);
2239 }
2240 }
2241 COPY_BUF(l,buf,len,c);
2242 NEXTL(l);
2243 /*
2244 * Pop-up of finished entities.
2245 */
2246 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2247 xmlPopInput(ctxt);
2248
2249 GROW;
2250 c = CUR_CHAR(l);
2251 if (c == 0) {
2252 GROW;
2253 c = CUR_CHAR(l);
2254 }
2255 }
2256 buf[len] = 0;
2257
2258 /*
2259 * Raise problem w.r.t. '&' and '%' being used in non-entities
2260 * reference constructs. Note Charref will be handled in
2261 * xmlStringDecodeEntities()
2262 */
2263 cur = buf;
2264 while (*cur != 0) { /* non input consuming */
2265 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2266 xmlChar *name;
2267 xmlChar tmp = *cur;
2268
2269 cur++;
2270 name = xmlParseStringName(ctxt, &cur);
2271 if ((name == NULL) || (*cur != ';')) {
2272 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2274 ctxt->sax->error(ctxt->userData,
2275 "EntityValue: '%c' forbidden except for entities references\n",
2276 tmp);
2277 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002278 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002279 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002280 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2281 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002282 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2284 ctxt->sax->error(ctxt->userData,
2285 "EntityValue: PEReferences forbidden in internal subset\n",
2286 tmp);
2287 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002288 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002289 }
2290 if (name != NULL)
2291 xmlFree(name);
2292 }
2293 cur++;
2294 }
2295
2296 /*
2297 * Then PEReference entities are substituted.
2298 */
2299 if (c != stop) {
2300 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2302 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2303 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002304 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002305 xmlFree(buf);
2306 } else {
2307 NEXT;
2308 /*
2309 * NOTE: 4.4.7 Bypassed
2310 * When a general entity reference appears in the EntityValue in
2311 * an entity declaration, it is bypassed and left as is.
2312 * so XML_SUBSTITUTE_REF is not set here.
2313 */
2314 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2315 0, 0, 0);
2316 if (orig != NULL)
2317 *orig = buf;
2318 else
2319 xmlFree(buf);
2320 }
2321
2322 return(ret);
2323}
2324
2325/**
2326 * xmlParseAttValue:
2327 * @ctxt: an XML parser context
2328 *
2329 * parse a value for an attribute
2330 * Note: the parser won't do substitution of entities here, this
2331 * will be handled later in xmlStringGetNodeList
2332 *
2333 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2334 * "'" ([^<&'] | Reference)* "'"
2335 *
2336 * 3.3.3 Attribute-Value Normalization:
2337 * Before the value of an attribute is passed to the application or
2338 * checked for validity, the XML processor must normalize it as follows:
2339 * - a character reference is processed by appending the referenced
2340 * character to the attribute value
2341 * - an entity reference is processed by recursively processing the
2342 * replacement text of the entity
2343 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2344 * appending #x20 to the normalized value, except that only a single
2345 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2346 * parsed entity or the literal entity value of an internal parsed entity
2347 * - other characters are processed by appending them to the normalized value
2348 * If the declared value is not CDATA, then the XML processor must further
2349 * process the normalized attribute value by discarding any leading and
2350 * trailing space (#x20) characters, and by replacing sequences of space
2351 * (#x20) characters by a single space (#x20) character.
2352 * All attributes for which no declaration has been read should be treated
2353 * by a non-validating parser as if declared CDATA.
2354 *
2355 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2356 */
2357
2358xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002359xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2360
2361xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002362xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2363 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002364 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002365 xmlChar *ret = NULL;
2366 SHRINK;
2367 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002368 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002369 if (*in != '"' && *in != '\'') {
2370 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2372 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2373 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002374 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002375 return(NULL);
2376 }
2377 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2378 limit = *in;
2379 ++in;
2380
2381 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2382 *in != '&' && *in != '<'
2383 ) {
2384 ++in;
2385 }
2386 if (*in != limit) {
2387 return xmlParseAttValueComplex(ctxt);
2388 }
2389 ++in;
2390 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2391 CUR_PTR = in;
2392 return ret;
2393}
2394
Daniel Veillard01c13b52002-12-10 15:19:08 +00002395/**
2396 * xmlParseAttValueComplex:
2397 * @ctxt: an XML parser context
2398 *
2399 * parse a value for an attribute, this is the fallback function
2400 * of xmlParseAttValue() when the attribute parsing requires handling
2401 * of non-ASCII characters.
2402 *
2403 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2404 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002405xmlChar *
2406xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2407 xmlChar limit = 0;
2408 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002409 int len = 0;
2410 int buf_size = 0;
2411 int c, l;
2412 xmlChar *current = NULL;
2413 xmlEntityPtr ent;
2414
2415
2416 SHRINK;
2417 if (NXT(0) == '"') {
2418 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2419 limit = '"';
2420 NEXT;
2421 } else if (NXT(0) == '\'') {
2422 limit = '\'';
2423 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2424 NEXT;
2425 } else {
2426 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2427 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2428 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2429 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002430 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002431 return(NULL);
2432 }
2433
2434 /*
2435 * allocate a translation buffer.
2436 */
2437 buf_size = XML_PARSER_BUFFER_SIZE;
2438 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2439 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002440 xmlGenericError(xmlGenericErrorContext,
2441 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002442 return(NULL);
2443 }
2444
2445 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002446 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002447 */
2448 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002449 while ((NXT(0) != limit) && /* checked */
2450 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002451 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002452 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002453 if (NXT(1) == '#') {
2454 int val = xmlParseCharRef(ctxt);
2455 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002456 if (ctxt->replaceEntities) {
2457 if (len > buf_size - 10) {
2458 growBuffer(buf);
2459 }
2460 buf[len++] = '&';
2461 } else {
2462 /*
2463 * The reparsing will be done in xmlStringGetNodeList()
2464 * called by the attribute() function in SAX.c
2465 */
2466 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002467
Daniel Veillard319a7422001-09-11 09:27:09 +00002468 if (len > buf_size - 10) {
2469 growBuffer(buf);
2470 }
2471 current = &buffer[0];
2472 while (*current != 0) { /* non input consuming */
2473 buf[len++] = *current++;
2474 }
Owen Taylor3473f882001-02-23 17:55:21 +00002475 }
2476 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002477 if (len > buf_size - 10) {
2478 growBuffer(buf);
2479 }
Owen Taylor3473f882001-02-23 17:55:21 +00002480 len += xmlCopyChar(0, &buf[len], val);
2481 }
2482 } else {
2483 ent = xmlParseEntityRef(ctxt);
2484 if ((ent != NULL) &&
2485 (ctxt->replaceEntities != 0)) {
2486 xmlChar *rep;
2487
2488 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2489 rep = xmlStringDecodeEntities(ctxt, ent->content,
2490 XML_SUBSTITUTE_REF, 0, 0, 0);
2491 if (rep != NULL) {
2492 current = rep;
2493 while (*current != 0) { /* non input consuming */
2494 buf[len++] = *current++;
2495 if (len > buf_size - 10) {
2496 growBuffer(buf);
2497 }
2498 }
2499 xmlFree(rep);
2500 }
2501 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002502 if (len > buf_size - 10) {
2503 growBuffer(buf);
2504 }
Owen Taylor3473f882001-02-23 17:55:21 +00002505 if (ent->content != NULL)
2506 buf[len++] = ent->content[0];
2507 }
2508 } else if (ent != NULL) {
2509 int i = xmlStrlen(ent->name);
2510 const xmlChar *cur = ent->name;
2511
2512 /*
2513 * This may look absurd but is needed to detect
2514 * entities problems
2515 */
2516 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2517 (ent->content != NULL)) {
2518 xmlChar *rep;
2519 rep = xmlStringDecodeEntities(ctxt, ent->content,
2520 XML_SUBSTITUTE_REF, 0, 0, 0);
2521 if (rep != NULL)
2522 xmlFree(rep);
2523 }
2524
2525 /*
2526 * Just output the reference
2527 */
2528 buf[len++] = '&';
2529 if (len > buf_size - i - 10) {
2530 growBuffer(buf);
2531 }
2532 for (;i > 0;i--)
2533 buf[len++] = *cur++;
2534 buf[len++] = ';';
2535 }
2536 }
2537 } else {
2538 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2539 COPY_BUF(l,buf,len,0x20);
2540 if (len > buf_size - 10) {
2541 growBuffer(buf);
2542 }
2543 } else {
2544 COPY_BUF(l,buf,len,c);
2545 if (len > buf_size - 10) {
2546 growBuffer(buf);
2547 }
2548 }
2549 NEXTL(l);
2550 }
2551 GROW;
2552 c = CUR_CHAR(l);
2553 }
2554 buf[len++] = 0;
2555 if (RAW == '<') {
2556 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2558 ctxt->sax->error(ctxt->userData,
2559 "Unescaped '<' not allowed in attributes values\n");
2560 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002561 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002562 } else if (RAW != limit) {
2563 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2565 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2566 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002567 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002568 } else
2569 NEXT;
2570 return(buf);
2571}
2572
2573/**
2574 * xmlParseSystemLiteral:
2575 * @ctxt: an XML parser context
2576 *
2577 * parse an XML Literal
2578 *
2579 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2580 *
2581 * Returns the SystemLiteral parsed or NULL
2582 */
2583
2584xmlChar *
2585xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2586 xmlChar *buf = NULL;
2587 int len = 0;
2588 int size = XML_PARSER_BUFFER_SIZE;
2589 int cur, l;
2590 xmlChar stop;
2591 int state = ctxt->instate;
2592 int count = 0;
2593
2594 SHRINK;
2595 if (RAW == '"') {
2596 NEXT;
2597 stop = '"';
2598 } else if (RAW == '\'') {
2599 NEXT;
2600 stop = '\'';
2601 } else {
2602 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2604 ctxt->sax->error(ctxt->userData,
2605 "SystemLiteral \" or ' expected\n");
2606 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002607 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002608 return(NULL);
2609 }
2610
2611 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2612 if (buf == NULL) {
2613 xmlGenericError(xmlGenericErrorContext,
2614 "malloc of %d byte failed\n", size);
2615 return(NULL);
2616 }
2617 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2618 cur = CUR_CHAR(l);
2619 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2620 if (len + 5 >= size) {
2621 size *= 2;
2622 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2623 if (buf == NULL) {
2624 xmlGenericError(xmlGenericErrorContext,
2625 "realloc of %d byte failed\n", size);
2626 ctxt->instate = (xmlParserInputState) state;
2627 return(NULL);
2628 }
2629 }
2630 count++;
2631 if (count > 50) {
2632 GROW;
2633 count = 0;
2634 }
2635 COPY_BUF(l,buf,len,cur);
2636 NEXTL(l);
2637 cur = CUR_CHAR(l);
2638 if (cur == 0) {
2639 GROW;
2640 SHRINK;
2641 cur = CUR_CHAR(l);
2642 }
2643 }
2644 buf[len] = 0;
2645 ctxt->instate = (xmlParserInputState) state;
2646 if (!IS_CHAR(cur)) {
2647 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2649 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2650 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002651 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002652 } else {
2653 NEXT;
2654 }
2655 return(buf);
2656}
2657
2658/**
2659 * xmlParsePubidLiteral:
2660 * @ctxt: an XML parser context
2661 *
2662 * parse an XML public literal
2663 *
2664 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2665 *
2666 * Returns the PubidLiteral parsed or NULL.
2667 */
2668
2669xmlChar *
2670xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2671 xmlChar *buf = NULL;
2672 int len = 0;
2673 int size = XML_PARSER_BUFFER_SIZE;
2674 xmlChar cur;
2675 xmlChar stop;
2676 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002677 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002678
2679 SHRINK;
2680 if (RAW == '"') {
2681 NEXT;
2682 stop = '"';
2683 } else if (RAW == '\'') {
2684 NEXT;
2685 stop = '\'';
2686 } else {
2687 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2689 ctxt->sax->error(ctxt->userData,
2690 "SystemLiteral \" or ' expected\n");
2691 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002692 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002693 return(NULL);
2694 }
2695 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2696 if (buf == NULL) {
2697 xmlGenericError(xmlGenericErrorContext,
2698 "malloc of %d byte failed\n", size);
2699 return(NULL);
2700 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002701 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002702 cur = CUR;
2703 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2704 if (len + 1 >= size) {
2705 size *= 2;
2706 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2707 if (buf == NULL) {
2708 xmlGenericError(xmlGenericErrorContext,
2709 "realloc of %d byte failed\n", size);
2710 return(NULL);
2711 }
2712 }
2713 buf[len++] = cur;
2714 count++;
2715 if (count > 50) {
2716 GROW;
2717 count = 0;
2718 }
2719 NEXT;
2720 cur = CUR;
2721 if (cur == 0) {
2722 GROW;
2723 SHRINK;
2724 cur = CUR;
2725 }
2726 }
2727 buf[len] = 0;
2728 if (cur != stop) {
2729 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2731 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2732 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002733 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002734 } else {
2735 NEXT;
2736 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002737 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002738 return(buf);
2739}
2740
Daniel Veillard48b2f892001-02-25 16:11:03 +00002741void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002742/**
2743 * xmlParseCharData:
2744 * @ctxt: an XML parser context
2745 * @cdata: int indicating whether we are within a CDATA section
2746 *
2747 * parse a CharData section.
2748 * if we are within a CDATA section ']]>' marks an end of section.
2749 *
2750 * The right angle bracket (>) may be represented using the string "&gt;",
2751 * and must, for compatibility, be escaped using "&gt;" or a character
2752 * reference when it appears in the string "]]>" in content, when that
2753 * string is not marking the end of a CDATA section.
2754 *
2755 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2756 */
2757
2758void
2759xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002760 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002761 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002762 int line = ctxt->input->line;
2763 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002764
2765 SHRINK;
2766 GROW;
2767 /*
2768 * Accelerated common case where input don't need to be
2769 * modified before passing it to the handler.
2770 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002771 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002772 in = ctxt->input->cur;
2773 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002774get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002775 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2776 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002777 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002778 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002779 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002780 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002781 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002782 ctxt->input->line++;
2783 in++;
2784 }
2785 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002786 }
2787 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002788 if ((in[1] == ']') && (in[2] == '>')) {
2789 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2791 ctxt->sax->error(ctxt->userData,
2792 "Sequence ']]>' not allowed in content\n");
2793 ctxt->input->cur = in;
2794 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002795 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002796 return;
2797 }
2798 in++;
2799 goto get_more;
2800 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002801 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002802 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002803 if (IS_BLANK(*ctxt->input->cur)) {
2804 const xmlChar *tmp = ctxt->input->cur;
2805 ctxt->input->cur = in;
2806 if (areBlanks(ctxt, tmp, nbchar)) {
2807 if (ctxt->sax->ignorableWhitespace != NULL)
2808 ctxt->sax->ignorableWhitespace(ctxt->userData,
2809 tmp, nbchar);
2810 } else {
2811 if (ctxt->sax->characters != NULL)
2812 ctxt->sax->characters(ctxt->userData,
2813 tmp, nbchar);
2814 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002815 line = ctxt->input->line;
2816 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002817 } else {
2818 if (ctxt->sax->characters != NULL)
2819 ctxt->sax->characters(ctxt->userData,
2820 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002821 line = ctxt->input->line;
2822 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002823 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002824 }
2825 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002826 if (*in == 0xD) {
2827 in++;
2828 if (*in == 0xA) {
2829 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002830 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002831 ctxt->input->line++;
2832 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002833 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002834 in--;
2835 }
2836 if (*in == '<') {
2837 return;
2838 }
2839 if (*in == '&') {
2840 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002841 }
2842 SHRINK;
2843 GROW;
2844 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002845 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002846 nbchar = 0;
2847 }
Daniel Veillard50582112001-03-26 22:52:16 +00002848 ctxt->input->line = line;
2849 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002850 xmlParseCharDataComplex(ctxt, cdata);
2851}
2852
Daniel Veillard01c13b52002-12-10 15:19:08 +00002853/**
2854 * xmlParseCharDataComplex:
2855 * @ctxt: an XML parser context
2856 * @cdata: int indicating whether we are within a CDATA section
2857 *
2858 * parse a CharData section.this is the fallback function
2859 * of xmlParseCharData() when the parsing requires handling
2860 * of non-ASCII characters.
2861 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002862void
2863xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002864 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2865 int nbchar = 0;
2866 int cur, l;
2867 int count = 0;
2868
2869 SHRINK;
2870 GROW;
2871 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002872 while ((cur != '<') && /* checked */
2873 (cur != '&') &&
2874 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002875 if ((cur == ']') && (NXT(1) == ']') &&
2876 (NXT(2) == '>')) {
2877 if (cdata) break;
2878 else {
2879 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2881 ctxt->sax->error(ctxt->userData,
2882 "Sequence ']]>' not allowed in content\n");
2883 /* Should this be relaxed ??? I see a "must here */
2884 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002885 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002886 }
2887 }
2888 COPY_BUF(l,buf,nbchar,cur);
2889 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2890 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002891 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002892 */
2893 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2894 if (areBlanks(ctxt, buf, nbchar)) {
2895 if (ctxt->sax->ignorableWhitespace != NULL)
2896 ctxt->sax->ignorableWhitespace(ctxt->userData,
2897 buf, nbchar);
2898 } else {
2899 if (ctxt->sax->characters != NULL)
2900 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2901 }
2902 }
2903 nbchar = 0;
2904 }
2905 count++;
2906 if (count > 50) {
2907 GROW;
2908 count = 0;
2909 }
2910 NEXTL(l);
2911 cur = CUR_CHAR(l);
2912 }
2913 if (nbchar != 0) {
2914 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002915 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002916 */
2917 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2918 if (areBlanks(ctxt, buf, nbchar)) {
2919 if (ctxt->sax->ignorableWhitespace != NULL)
2920 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2921 } else {
2922 if (ctxt->sax->characters != NULL)
2923 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2924 }
2925 }
2926 }
2927}
2928
2929/**
2930 * xmlParseExternalID:
2931 * @ctxt: an XML parser context
2932 * @publicID: a xmlChar** receiving PubidLiteral
2933 * @strict: indicate whether we should restrict parsing to only
2934 * production [75], see NOTE below
2935 *
2936 * Parse an External ID or a Public ID
2937 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002938 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002939 * 'PUBLIC' S PubidLiteral S SystemLiteral
2940 *
2941 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2942 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2943 *
2944 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2945 *
2946 * Returns the function returns SystemLiteral and in the second
2947 * case publicID receives PubidLiteral, is strict is off
2948 * it is possible to return NULL and have publicID set.
2949 */
2950
2951xmlChar *
2952xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2953 xmlChar *URI = NULL;
2954
2955 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002956
2957 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002958 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2959 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2960 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2961 SKIP(6);
2962 if (!IS_BLANK(CUR)) {
2963 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2965 ctxt->sax->error(ctxt->userData,
2966 "Space required after 'SYSTEM'\n");
2967 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002968 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002969 }
2970 SKIP_BLANKS;
2971 URI = xmlParseSystemLiteral(ctxt);
2972 if (URI == NULL) {
2973 ctxt->errNo = XML_ERR_URI_REQUIRED;
2974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2975 ctxt->sax->error(ctxt->userData,
2976 "xmlParseExternalID: SYSTEM, no URI\n");
2977 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002978 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002979 }
2980 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2981 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2982 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2983 SKIP(6);
2984 if (!IS_BLANK(CUR)) {
2985 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2987 ctxt->sax->error(ctxt->userData,
2988 "Space required after 'PUBLIC'\n");
2989 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002990 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002991 }
2992 SKIP_BLANKS;
2993 *publicID = xmlParsePubidLiteral(ctxt);
2994 if (*publicID == NULL) {
2995 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2997 ctxt->sax->error(ctxt->userData,
2998 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2999 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003000 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003001 }
3002 if (strict) {
3003 /*
3004 * We don't handle [83] so "S SystemLiteral" is required.
3005 */
3006 if (!IS_BLANK(CUR)) {
3007 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3009 ctxt->sax->error(ctxt->userData,
3010 "Space required after the Public Identifier\n");
3011 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003012 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003013 }
3014 } else {
3015 /*
3016 * We handle [83] so we return immediately, if
3017 * "S SystemLiteral" is not detected. From a purely parsing
3018 * point of view that's a nice mess.
3019 */
3020 const xmlChar *ptr;
3021 GROW;
3022
3023 ptr = CUR_PTR;
3024 if (!IS_BLANK(*ptr)) return(NULL);
3025
3026 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3027 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3028 }
3029 SKIP_BLANKS;
3030 URI = xmlParseSystemLiteral(ctxt);
3031 if (URI == NULL) {
3032 ctxt->errNo = XML_ERR_URI_REQUIRED;
3033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3034 ctxt->sax->error(ctxt->userData,
3035 "xmlParseExternalID: PUBLIC, no URI\n");
3036 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003037 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003038 }
3039 }
3040 return(URI);
3041}
3042
3043/**
3044 * xmlParseComment:
3045 * @ctxt: an XML parser context
3046 *
3047 * Skip an XML (SGML) comment <!-- .... -->
3048 * The spec says that "For compatibility, the string "--" (double-hyphen)
3049 * must not occur within comments. "
3050 *
3051 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3052 */
3053void
3054xmlParseComment(xmlParserCtxtPtr ctxt) {
3055 xmlChar *buf = NULL;
3056 int len;
3057 int size = XML_PARSER_BUFFER_SIZE;
3058 int q, ql;
3059 int r, rl;
3060 int cur, l;
3061 xmlParserInputState state;
3062 xmlParserInputPtr input = ctxt->input;
3063 int count = 0;
3064
3065 /*
3066 * Check that there is a comment right here.
3067 */
3068 if ((RAW != '<') || (NXT(1) != '!') ||
3069 (NXT(2) != '-') || (NXT(3) != '-')) return;
3070
3071 state = ctxt->instate;
3072 ctxt->instate = XML_PARSER_COMMENT;
3073 SHRINK;
3074 SKIP(4);
3075 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3076 if (buf == NULL) {
3077 xmlGenericError(xmlGenericErrorContext,
3078 "malloc of %d byte failed\n", size);
3079 ctxt->instate = state;
3080 return;
3081 }
3082 q = CUR_CHAR(ql);
3083 NEXTL(ql);
3084 r = CUR_CHAR(rl);
3085 NEXTL(rl);
3086 cur = CUR_CHAR(l);
3087 len = 0;
3088 while (IS_CHAR(cur) && /* checked */
3089 ((cur != '>') ||
3090 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003091 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003092 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3094 ctxt->sax->error(ctxt->userData,
3095 "Comment must not contain '--' (double-hyphen)`\n");
3096 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003097 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003098 }
3099 if (len + 5 >= size) {
3100 size *= 2;
3101 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3102 if (buf == NULL) {
3103 xmlGenericError(xmlGenericErrorContext,
3104 "realloc of %d byte failed\n", size);
3105 ctxt->instate = state;
3106 return;
3107 }
3108 }
3109 COPY_BUF(ql,buf,len,q);
3110 q = r;
3111 ql = rl;
3112 r = cur;
3113 rl = l;
3114
3115 count++;
3116 if (count > 50) {
3117 GROW;
3118 count = 0;
3119 }
3120 NEXTL(l);
3121 cur = CUR_CHAR(l);
3122 if (cur == 0) {
3123 SHRINK;
3124 GROW;
3125 cur = CUR_CHAR(l);
3126 }
3127 }
3128 buf[len] = 0;
3129 if (!IS_CHAR(cur)) {
3130 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3132 ctxt->sax->error(ctxt->userData,
3133 "Comment not terminated \n<!--%.50s\n", buf);
3134 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003135 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003136 xmlFree(buf);
3137 } else {
3138 if (input != ctxt->input) {
3139 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3141 ctxt->sax->error(ctxt->userData,
3142"Comment doesn't start and stop in the same entity\n");
3143 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003144 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003145 }
3146 NEXT;
3147 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3148 (!ctxt->disableSAX))
3149 ctxt->sax->comment(ctxt->userData, buf);
3150 xmlFree(buf);
3151 }
3152 ctxt->instate = state;
3153}
3154
3155/**
3156 * xmlParsePITarget:
3157 * @ctxt: an XML parser context
3158 *
3159 * parse the name of a PI
3160 *
3161 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3162 *
3163 * Returns the PITarget name or NULL
3164 */
3165
3166xmlChar *
3167xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3168 xmlChar *name;
3169
3170 name = xmlParseName(ctxt);
3171 if ((name != NULL) &&
3172 ((name[0] == 'x') || (name[0] == 'X')) &&
3173 ((name[1] == 'm') || (name[1] == 'M')) &&
3174 ((name[2] == 'l') || (name[2] == 'L'))) {
3175 int i;
3176 if ((name[0] == 'x') && (name[1] == 'm') &&
3177 (name[2] == 'l') && (name[3] == 0)) {
3178 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3179 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3180 ctxt->sax->error(ctxt->userData,
3181 "XML declaration allowed only at the start of the document\n");
3182 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003183 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003184 return(name);
3185 } else if (name[3] == 0) {
3186 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3188 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3189 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003190 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003191 return(name);
3192 }
3193 for (i = 0;;i++) {
3194 if (xmlW3CPIs[i] == NULL) break;
3195 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3196 return(name);
3197 }
3198 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3199 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3200 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003201 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003202 }
3203 }
3204 return(name);
3205}
3206
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003207#ifdef LIBXML_CATALOG_ENABLED
3208/**
3209 * xmlParseCatalogPI:
3210 * @ctxt: an XML parser context
3211 * @catalog: the PI value string
3212 *
3213 * parse an XML Catalog Processing Instruction.
3214 *
3215 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3216 *
3217 * Occurs only if allowed by the user and if happening in the Misc
3218 * part of the document before any doctype informations
3219 * This will add the given catalog to the parsing context in order
3220 * to be used if there is a resolution need further down in the document
3221 */
3222
3223static void
3224xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3225 xmlChar *URL = NULL;
3226 const xmlChar *tmp, *base;
3227 xmlChar marker;
3228
3229 tmp = catalog;
3230 while (IS_BLANK(*tmp)) tmp++;
3231 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3232 goto error;
3233 tmp += 7;
3234 while (IS_BLANK(*tmp)) tmp++;
3235 if (*tmp != '=') {
3236 return;
3237 }
3238 tmp++;
3239 while (IS_BLANK(*tmp)) tmp++;
3240 marker = *tmp;
3241 if ((marker != '\'') && (marker != '"'))
3242 goto error;
3243 tmp++;
3244 base = tmp;
3245 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3246 if (*tmp == 0)
3247 goto error;
3248 URL = xmlStrndup(base, tmp - base);
3249 tmp++;
3250 while (IS_BLANK(*tmp)) tmp++;
3251 if (*tmp != 0)
3252 goto error;
3253
3254 if (URL != NULL) {
3255 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3256 xmlFree(URL);
3257 }
3258 return;
3259
3260error:
3261 ctxt->errNo = XML_WAR_CATALOG_PI;
3262 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3263 ctxt->sax->warning(ctxt->userData,
3264 "Catalog PI syntax error: %s\n", catalog);
3265 if (URL != NULL)
3266 xmlFree(URL);
3267}
3268#endif
3269
Owen Taylor3473f882001-02-23 17:55:21 +00003270/**
3271 * xmlParsePI:
3272 * @ctxt: an XML parser context
3273 *
3274 * parse an XML Processing Instruction.
3275 *
3276 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3277 *
3278 * The processing is transfered to SAX once parsed.
3279 */
3280
3281void
3282xmlParsePI(xmlParserCtxtPtr ctxt) {
3283 xmlChar *buf = NULL;
3284 int len = 0;
3285 int size = XML_PARSER_BUFFER_SIZE;
3286 int cur, l;
3287 xmlChar *target;
3288 xmlParserInputState state;
3289 int count = 0;
3290
3291 if ((RAW == '<') && (NXT(1) == '?')) {
3292 xmlParserInputPtr input = ctxt->input;
3293 state = ctxt->instate;
3294 ctxt->instate = XML_PARSER_PI;
3295 /*
3296 * this is a Processing Instruction.
3297 */
3298 SKIP(2);
3299 SHRINK;
3300
3301 /*
3302 * Parse the target name and check for special support like
3303 * namespace.
3304 */
3305 target = xmlParsePITarget(ctxt);
3306 if (target != NULL) {
3307 if ((RAW == '?') && (NXT(1) == '>')) {
3308 if (input != ctxt->input) {
3309 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3311 ctxt->sax->error(ctxt->userData,
3312 "PI declaration doesn't start and stop in the same entity\n");
3313 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003314 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003315 }
3316 SKIP(2);
3317
3318 /*
3319 * SAX: PI detected.
3320 */
3321 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3322 (ctxt->sax->processingInstruction != NULL))
3323 ctxt->sax->processingInstruction(ctxt->userData,
3324 target, NULL);
3325 ctxt->instate = state;
3326 xmlFree(target);
3327 return;
3328 }
3329 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3330 if (buf == NULL) {
3331 xmlGenericError(xmlGenericErrorContext,
3332 "malloc of %d byte failed\n", size);
3333 ctxt->instate = state;
3334 return;
3335 }
3336 cur = CUR;
3337 if (!IS_BLANK(cur)) {
3338 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3340 ctxt->sax->error(ctxt->userData,
3341 "xmlParsePI: PI %s space expected\n", target);
3342 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003343 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003344 }
3345 SKIP_BLANKS;
3346 cur = CUR_CHAR(l);
3347 while (IS_CHAR(cur) && /* checked */
3348 ((cur != '?') || (NXT(1) != '>'))) {
3349 if (len + 5 >= size) {
3350 size *= 2;
3351 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3352 if (buf == NULL) {
3353 xmlGenericError(xmlGenericErrorContext,
3354 "realloc of %d byte failed\n", size);
3355 ctxt->instate = state;
3356 return;
3357 }
3358 }
3359 count++;
3360 if (count > 50) {
3361 GROW;
3362 count = 0;
3363 }
3364 COPY_BUF(l,buf,len,cur);
3365 NEXTL(l);
3366 cur = CUR_CHAR(l);
3367 if (cur == 0) {
3368 SHRINK;
3369 GROW;
3370 cur = CUR_CHAR(l);
3371 }
3372 }
3373 buf[len] = 0;
3374 if (cur != '?') {
3375 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3377 ctxt->sax->error(ctxt->userData,
3378 "xmlParsePI: PI %s never end ...\n", target);
3379 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003380 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003381 } else {
3382 if (input != ctxt->input) {
3383 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3385 ctxt->sax->error(ctxt->userData,
3386 "PI declaration doesn't start and stop in the same entity\n");
3387 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003388 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003389 }
3390 SKIP(2);
3391
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003392#ifdef LIBXML_CATALOG_ENABLED
3393 if (((state == XML_PARSER_MISC) ||
3394 (state == XML_PARSER_START)) &&
3395 (xmlStrEqual(target, XML_CATALOG_PI))) {
3396 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3397 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3398 (allow == XML_CATA_ALLOW_ALL))
3399 xmlParseCatalogPI(ctxt, buf);
3400 }
3401#endif
3402
3403
Owen Taylor3473f882001-02-23 17:55:21 +00003404 /*
3405 * SAX: PI detected.
3406 */
3407 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3408 (ctxt->sax->processingInstruction != NULL))
3409 ctxt->sax->processingInstruction(ctxt->userData,
3410 target, buf);
3411 }
3412 xmlFree(buf);
3413 xmlFree(target);
3414 } else {
3415 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3417 ctxt->sax->error(ctxt->userData,
3418 "xmlParsePI : no target name\n");
3419 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003420 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003421 }
3422 ctxt->instate = state;
3423 }
3424}
3425
3426/**
3427 * xmlParseNotationDecl:
3428 * @ctxt: an XML parser context
3429 *
3430 * parse a notation declaration
3431 *
3432 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3433 *
3434 * Hence there is actually 3 choices:
3435 * 'PUBLIC' S PubidLiteral
3436 * 'PUBLIC' S PubidLiteral S SystemLiteral
3437 * and 'SYSTEM' S SystemLiteral
3438 *
3439 * See the NOTE on xmlParseExternalID().
3440 */
3441
3442void
3443xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3444 xmlChar *name;
3445 xmlChar *Pubid;
3446 xmlChar *Systemid;
3447
3448 if ((RAW == '<') && (NXT(1) == '!') &&
3449 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3450 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3451 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3452 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3453 xmlParserInputPtr input = ctxt->input;
3454 SHRINK;
3455 SKIP(10);
3456 if (!IS_BLANK(CUR)) {
3457 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3459 ctxt->sax->error(ctxt->userData,
3460 "Space required after '<!NOTATION'\n");
3461 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003462 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003463 return;
3464 }
3465 SKIP_BLANKS;
3466
Daniel Veillard76d66f42001-05-16 21:05:17 +00003467 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003468 if (name == NULL) {
3469 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3471 ctxt->sax->error(ctxt->userData,
3472 "NOTATION: Name expected here\n");
3473 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003474 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003475 return;
3476 }
3477 if (!IS_BLANK(CUR)) {
3478 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3480 ctxt->sax->error(ctxt->userData,
3481 "Space required after the NOTATION name'\n");
3482 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003483 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003484 return;
3485 }
3486 SKIP_BLANKS;
3487
3488 /*
3489 * Parse the IDs.
3490 */
3491 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3492 SKIP_BLANKS;
3493
3494 if (RAW == '>') {
3495 if (input != ctxt->input) {
3496 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3498 ctxt->sax->error(ctxt->userData,
3499"Notation declaration doesn't start and stop in the same entity\n");
3500 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003501 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003502 }
3503 NEXT;
3504 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3505 (ctxt->sax->notationDecl != NULL))
3506 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3507 } else {
3508 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3510 ctxt->sax->error(ctxt->userData,
3511 "'>' required to close NOTATION declaration\n");
3512 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003513 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003514 }
3515 xmlFree(name);
3516 if (Systemid != NULL) xmlFree(Systemid);
3517 if (Pubid != NULL) xmlFree(Pubid);
3518 }
3519}
3520
3521/**
3522 * xmlParseEntityDecl:
3523 * @ctxt: an XML parser context
3524 *
3525 * parse <!ENTITY declarations
3526 *
3527 * [70] EntityDecl ::= GEDecl | PEDecl
3528 *
3529 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3530 *
3531 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3532 *
3533 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3534 *
3535 * [74] PEDef ::= EntityValue | ExternalID
3536 *
3537 * [76] NDataDecl ::= S 'NDATA' S Name
3538 *
3539 * [ VC: Notation Declared ]
3540 * The Name must match the declared name of a notation.
3541 */
3542
3543void
3544xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3545 xmlChar *name = NULL;
3546 xmlChar *value = NULL;
3547 xmlChar *URI = NULL, *literal = NULL;
3548 xmlChar *ndata = NULL;
3549 int isParameter = 0;
3550 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003551 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003552
3553 GROW;
3554 if ((RAW == '<') && (NXT(1) == '!') &&
3555 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3556 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3557 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3558 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003559 SHRINK;
3560 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003561 skipped = SKIP_BLANKS;
3562 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003563 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3565 ctxt->sax->error(ctxt->userData,
3566 "Space required after '<!ENTITY'\n");
3567 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003568 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003569 }
Owen Taylor3473f882001-02-23 17:55:21 +00003570
3571 if (RAW == '%') {
3572 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003573 skipped = SKIP_BLANKS;
3574 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003575 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3577 ctxt->sax->error(ctxt->userData,
3578 "Space required after '%'\n");
3579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003581 }
Owen Taylor3473f882001-02-23 17:55:21 +00003582 isParameter = 1;
3583 }
3584
Daniel Veillard76d66f42001-05-16 21:05:17 +00003585 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003586 if (name == NULL) {
3587 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3589 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3590 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003591 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003592 return;
3593 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003594 skipped = SKIP_BLANKS;
3595 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003596 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3598 ctxt->sax->error(ctxt->userData,
3599 "Space required after the entity name\n");
3600 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003601 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003602 }
Owen Taylor3473f882001-02-23 17:55:21 +00003603
Daniel Veillardf5582f12002-06-11 10:08:16 +00003604 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003605 /*
3606 * handle the various case of definitions...
3607 */
3608 if (isParameter) {
3609 if ((RAW == '"') || (RAW == '\'')) {
3610 value = xmlParseEntityValue(ctxt, &orig);
3611 if (value) {
3612 if ((ctxt->sax != NULL) &&
3613 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3614 ctxt->sax->entityDecl(ctxt->userData, name,
3615 XML_INTERNAL_PARAMETER_ENTITY,
3616 NULL, NULL, value);
3617 }
3618 } else {
3619 URI = xmlParseExternalID(ctxt, &literal, 1);
3620 if ((URI == NULL) && (literal == NULL)) {
3621 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3623 ctxt->sax->error(ctxt->userData,
3624 "Entity value required\n");
3625 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003626 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003627 }
3628 if (URI) {
3629 xmlURIPtr uri;
3630
3631 uri = xmlParseURI((const char *) URI);
3632 if (uri == NULL) {
3633 ctxt->errNo = XML_ERR_INVALID_URI;
3634 if ((ctxt->sax != NULL) &&
3635 (!ctxt->disableSAX) &&
3636 (ctxt->sax->error != NULL))
3637 ctxt->sax->error(ctxt->userData,
3638 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003639 /*
3640 * This really ought to be a well formedness error
3641 * but the XML Core WG decided otherwise c.f. issue
3642 * E26 of the XML erratas.
3643 */
Owen Taylor3473f882001-02-23 17:55:21 +00003644 } else {
3645 if (uri->fragment != NULL) {
3646 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3647 if ((ctxt->sax != NULL) &&
3648 (!ctxt->disableSAX) &&
3649 (ctxt->sax->error != NULL))
3650 ctxt->sax->error(ctxt->userData,
3651 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003652 /*
3653 * Okay this is foolish to block those but not
3654 * invalid URIs.
3655 */
Owen Taylor3473f882001-02-23 17:55:21 +00003656 ctxt->wellFormed = 0;
3657 } else {
3658 if ((ctxt->sax != NULL) &&
3659 (!ctxt->disableSAX) &&
3660 (ctxt->sax->entityDecl != NULL))
3661 ctxt->sax->entityDecl(ctxt->userData, name,
3662 XML_EXTERNAL_PARAMETER_ENTITY,
3663 literal, URI, NULL);
3664 }
3665 xmlFreeURI(uri);
3666 }
3667 }
3668 }
3669 } else {
3670 if ((RAW == '"') || (RAW == '\'')) {
3671 value = xmlParseEntityValue(ctxt, &orig);
3672 if ((ctxt->sax != NULL) &&
3673 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3674 ctxt->sax->entityDecl(ctxt->userData, name,
3675 XML_INTERNAL_GENERAL_ENTITY,
3676 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003677 /*
3678 * For expat compatibility in SAX mode.
3679 */
3680 if ((ctxt->myDoc == NULL) ||
3681 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3682 if (ctxt->myDoc == NULL) {
3683 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3684 }
3685 if (ctxt->myDoc->intSubset == NULL)
3686 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3687 BAD_CAST "fake", NULL, NULL);
3688
3689 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3690 NULL, NULL, value);
3691 }
Owen Taylor3473f882001-02-23 17:55:21 +00003692 } else {
3693 URI = xmlParseExternalID(ctxt, &literal, 1);
3694 if ((URI == NULL) && (literal == NULL)) {
3695 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3697 ctxt->sax->error(ctxt->userData,
3698 "Entity value required\n");
3699 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003700 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003701 }
3702 if (URI) {
3703 xmlURIPtr uri;
3704
3705 uri = xmlParseURI((const char *)URI);
3706 if (uri == NULL) {
3707 ctxt->errNo = XML_ERR_INVALID_URI;
3708 if ((ctxt->sax != NULL) &&
3709 (!ctxt->disableSAX) &&
3710 (ctxt->sax->error != NULL))
3711 ctxt->sax->error(ctxt->userData,
3712 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003713 /*
3714 * This really ought to be a well formedness error
3715 * but the XML Core WG decided otherwise c.f. issue
3716 * E26 of the XML erratas.
3717 */
Owen Taylor3473f882001-02-23 17:55:21 +00003718 } else {
3719 if (uri->fragment != NULL) {
3720 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3721 if ((ctxt->sax != NULL) &&
3722 (!ctxt->disableSAX) &&
3723 (ctxt->sax->error != NULL))
3724 ctxt->sax->error(ctxt->userData,
3725 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003726 /*
3727 * Okay this is foolish to block those but not
3728 * invalid URIs.
3729 */
Owen Taylor3473f882001-02-23 17:55:21 +00003730 ctxt->wellFormed = 0;
3731 }
3732 xmlFreeURI(uri);
3733 }
3734 }
3735 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3736 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3738 ctxt->sax->error(ctxt->userData,
3739 "Space required before 'NDATA'\n");
3740 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003741 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003742 }
3743 SKIP_BLANKS;
3744 if ((RAW == 'N') && (NXT(1) == 'D') &&
3745 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3746 (NXT(4) == 'A')) {
3747 SKIP(5);
3748 if (!IS_BLANK(CUR)) {
3749 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3751 ctxt->sax->error(ctxt->userData,
3752 "Space required after 'NDATA'\n");
3753 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003754 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003755 }
3756 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003757 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003758 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3759 (ctxt->sax->unparsedEntityDecl != NULL))
3760 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3761 literal, URI, ndata);
3762 } else {
3763 if ((ctxt->sax != NULL) &&
3764 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3765 ctxt->sax->entityDecl(ctxt->userData, name,
3766 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3767 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003768 /*
3769 * For expat compatibility in SAX mode.
3770 * assuming the entity repalcement was asked for
3771 */
3772 if ((ctxt->replaceEntities != 0) &&
3773 ((ctxt->myDoc == NULL) ||
3774 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3775 if (ctxt->myDoc == NULL) {
3776 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3777 }
3778
3779 if (ctxt->myDoc->intSubset == NULL)
3780 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3781 BAD_CAST "fake", NULL, NULL);
3782 entityDecl(ctxt, name,
3783 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3784 literal, URI, NULL);
3785 }
Owen Taylor3473f882001-02-23 17:55:21 +00003786 }
3787 }
3788 }
3789 SKIP_BLANKS;
3790 if (RAW != '>') {
3791 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3793 ctxt->sax->error(ctxt->userData,
3794 "xmlParseEntityDecl: entity %s not terminated\n", name);
3795 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003796 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003797 } else {
3798 if (input != ctxt->input) {
3799 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3801 ctxt->sax->error(ctxt->userData,
3802"Entity declaration doesn't start and stop in the same entity\n");
3803 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003804 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003805 }
3806 NEXT;
3807 }
3808 if (orig != NULL) {
3809 /*
3810 * Ugly mechanism to save the raw entity value.
3811 */
3812 xmlEntityPtr cur = NULL;
3813
3814 if (isParameter) {
3815 if ((ctxt->sax != NULL) &&
3816 (ctxt->sax->getParameterEntity != NULL))
3817 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3818 } else {
3819 if ((ctxt->sax != NULL) &&
3820 (ctxt->sax->getEntity != NULL))
3821 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003822 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3823 cur = getEntity(ctxt, name);
3824 }
Owen Taylor3473f882001-02-23 17:55:21 +00003825 }
3826 if (cur != NULL) {
3827 if (cur->orig != NULL)
3828 xmlFree(orig);
3829 else
3830 cur->orig = orig;
3831 } else
3832 xmlFree(orig);
3833 }
3834 if (name != NULL) xmlFree(name);
3835 if (value != NULL) xmlFree(value);
3836 if (URI != NULL) xmlFree(URI);
3837 if (literal != NULL) xmlFree(literal);
3838 if (ndata != NULL) xmlFree(ndata);
3839 }
3840}
3841
3842/**
3843 * xmlParseDefaultDecl:
3844 * @ctxt: an XML parser context
3845 * @value: Receive a possible fixed default value for the attribute
3846 *
3847 * Parse an attribute default declaration
3848 *
3849 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3850 *
3851 * [ VC: Required Attribute ]
3852 * if the default declaration is the keyword #REQUIRED, then the
3853 * attribute must be specified for all elements of the type in the
3854 * attribute-list declaration.
3855 *
3856 * [ VC: Attribute Default Legal ]
3857 * The declared default value must meet the lexical constraints of
3858 * the declared attribute type c.f. xmlValidateAttributeDecl()
3859 *
3860 * [ VC: Fixed Attribute Default ]
3861 * if an attribute has a default value declared with the #FIXED
3862 * keyword, instances of that attribute must match the default value.
3863 *
3864 * [ WFC: No < in Attribute Values ]
3865 * handled in xmlParseAttValue()
3866 *
3867 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3868 * or XML_ATTRIBUTE_FIXED.
3869 */
3870
3871int
3872xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3873 int val;
3874 xmlChar *ret;
3875
3876 *value = NULL;
3877 if ((RAW == '#') && (NXT(1) == 'R') &&
3878 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3879 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3880 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3881 (NXT(8) == 'D')) {
3882 SKIP(9);
3883 return(XML_ATTRIBUTE_REQUIRED);
3884 }
3885 if ((RAW == '#') && (NXT(1) == 'I') &&
3886 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3887 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3888 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3889 SKIP(8);
3890 return(XML_ATTRIBUTE_IMPLIED);
3891 }
3892 val = XML_ATTRIBUTE_NONE;
3893 if ((RAW == '#') && (NXT(1) == 'F') &&
3894 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3895 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3896 SKIP(6);
3897 val = XML_ATTRIBUTE_FIXED;
3898 if (!IS_BLANK(CUR)) {
3899 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3901 ctxt->sax->error(ctxt->userData,
3902 "Space required after '#FIXED'\n");
3903 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003904 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003905 }
3906 SKIP_BLANKS;
3907 }
3908 ret = xmlParseAttValue(ctxt);
3909 ctxt->instate = XML_PARSER_DTD;
3910 if (ret == NULL) {
3911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3912 ctxt->sax->error(ctxt->userData,
3913 "Attribute default value declaration error\n");
3914 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003915 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003916 } else
3917 *value = ret;
3918 return(val);
3919}
3920
3921/**
3922 * xmlParseNotationType:
3923 * @ctxt: an XML parser context
3924 *
3925 * parse an Notation attribute type.
3926 *
3927 * Note: the leading 'NOTATION' S part has already being parsed...
3928 *
3929 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3930 *
3931 * [ VC: Notation Attributes ]
3932 * Values of this type must match one of the notation names included
3933 * in the declaration; all notation names in the declaration must be declared.
3934 *
3935 * Returns: the notation attribute tree built while parsing
3936 */
3937
3938xmlEnumerationPtr
3939xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3940 xmlChar *name;
3941 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3942
3943 if (RAW != '(') {
3944 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3946 ctxt->sax->error(ctxt->userData,
3947 "'(' required to start 'NOTATION'\n");
3948 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003949 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003950 return(NULL);
3951 }
3952 SHRINK;
3953 do {
3954 NEXT;
3955 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003956 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003957 if (name == NULL) {
3958 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3960 ctxt->sax->error(ctxt->userData,
3961 "Name expected in NOTATION declaration\n");
3962 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003963 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003964 return(ret);
3965 }
3966 cur = xmlCreateEnumeration(name);
3967 xmlFree(name);
3968 if (cur == NULL) return(ret);
3969 if (last == NULL) ret = last = cur;
3970 else {
3971 last->next = cur;
3972 last = cur;
3973 }
3974 SKIP_BLANKS;
3975 } while (RAW == '|');
3976 if (RAW != ')') {
3977 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3979 ctxt->sax->error(ctxt->userData,
3980 "')' required to finish NOTATION declaration\n");
3981 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003982 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003983 if ((last != NULL) && (last != ret))
3984 xmlFreeEnumeration(last);
3985 return(ret);
3986 }
3987 NEXT;
3988 return(ret);
3989}
3990
3991/**
3992 * xmlParseEnumerationType:
3993 * @ctxt: an XML parser context
3994 *
3995 * parse an Enumeration attribute type.
3996 *
3997 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3998 *
3999 * [ VC: Enumeration ]
4000 * Values of this type must match one of the Nmtoken tokens in
4001 * the declaration
4002 *
4003 * Returns: the enumeration attribute tree built while parsing
4004 */
4005
4006xmlEnumerationPtr
4007xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4008 xmlChar *name;
4009 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4010
4011 if (RAW != '(') {
4012 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4014 ctxt->sax->error(ctxt->userData,
4015 "'(' required to start ATTLIST enumeration\n");
4016 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004017 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004018 return(NULL);
4019 }
4020 SHRINK;
4021 do {
4022 NEXT;
4023 SKIP_BLANKS;
4024 name = xmlParseNmtoken(ctxt);
4025 if (name == NULL) {
4026 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4028 ctxt->sax->error(ctxt->userData,
4029 "NmToken expected in ATTLIST enumeration\n");
4030 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004031 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004032 return(ret);
4033 }
4034 cur = xmlCreateEnumeration(name);
4035 xmlFree(name);
4036 if (cur == NULL) return(ret);
4037 if (last == NULL) ret = last = cur;
4038 else {
4039 last->next = cur;
4040 last = cur;
4041 }
4042 SKIP_BLANKS;
4043 } while (RAW == '|');
4044 if (RAW != ')') {
4045 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4047 ctxt->sax->error(ctxt->userData,
4048 "')' required to finish ATTLIST enumeration\n");
4049 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004050 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004051 return(ret);
4052 }
4053 NEXT;
4054 return(ret);
4055}
4056
4057/**
4058 * xmlParseEnumeratedType:
4059 * @ctxt: an XML parser context
4060 * @tree: the enumeration tree built while parsing
4061 *
4062 * parse an Enumerated attribute type.
4063 *
4064 * [57] EnumeratedType ::= NotationType | Enumeration
4065 *
4066 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4067 *
4068 *
4069 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4070 */
4071
4072int
4073xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4074 if ((RAW == 'N') && (NXT(1) == 'O') &&
4075 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4076 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4077 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4078 SKIP(8);
4079 if (!IS_BLANK(CUR)) {
4080 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4082 ctxt->sax->error(ctxt->userData,
4083 "Space required after 'NOTATION'\n");
4084 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004085 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004086 return(0);
4087 }
4088 SKIP_BLANKS;
4089 *tree = xmlParseNotationType(ctxt);
4090 if (*tree == NULL) return(0);
4091 return(XML_ATTRIBUTE_NOTATION);
4092 }
4093 *tree = xmlParseEnumerationType(ctxt);
4094 if (*tree == NULL) return(0);
4095 return(XML_ATTRIBUTE_ENUMERATION);
4096}
4097
4098/**
4099 * xmlParseAttributeType:
4100 * @ctxt: an XML parser context
4101 * @tree: the enumeration tree built while parsing
4102 *
4103 * parse the Attribute list def for an element
4104 *
4105 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4106 *
4107 * [55] StringType ::= 'CDATA'
4108 *
4109 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4110 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4111 *
4112 * Validity constraints for attribute values syntax are checked in
4113 * xmlValidateAttributeValue()
4114 *
4115 * [ VC: ID ]
4116 * Values of type ID must match the Name production. A name must not
4117 * appear more than once in an XML document as a value of this type;
4118 * i.e., ID values must uniquely identify the elements which bear them.
4119 *
4120 * [ VC: One ID per Element Type ]
4121 * No element type may have more than one ID attribute specified.
4122 *
4123 * [ VC: ID Attribute Default ]
4124 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4125 *
4126 * [ VC: IDREF ]
4127 * Values of type IDREF must match the Name production, and values
4128 * of type IDREFS must match Names; each IDREF Name must match the value
4129 * of an ID attribute on some element in the XML document; i.e. IDREF
4130 * values must match the value of some ID attribute.
4131 *
4132 * [ VC: Entity Name ]
4133 * Values of type ENTITY must match the Name production, values
4134 * of type ENTITIES must match Names; each Entity Name must match the
4135 * name of an unparsed entity declared in the DTD.
4136 *
4137 * [ VC: Name Token ]
4138 * Values of type NMTOKEN must match the Nmtoken production; values
4139 * of type NMTOKENS must match Nmtokens.
4140 *
4141 * Returns the attribute type
4142 */
4143int
4144xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4145 SHRINK;
4146 if ((RAW == 'C') && (NXT(1) == 'D') &&
4147 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4148 (NXT(4) == 'A')) {
4149 SKIP(5);
4150 return(XML_ATTRIBUTE_CDATA);
4151 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4152 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4153 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4154 SKIP(6);
4155 return(XML_ATTRIBUTE_IDREFS);
4156 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4157 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4158 (NXT(4) == 'F')) {
4159 SKIP(5);
4160 return(XML_ATTRIBUTE_IDREF);
4161 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4162 SKIP(2);
4163 return(XML_ATTRIBUTE_ID);
4164 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4165 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4166 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4167 SKIP(6);
4168 return(XML_ATTRIBUTE_ENTITY);
4169 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4170 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4171 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4172 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4173 SKIP(8);
4174 return(XML_ATTRIBUTE_ENTITIES);
4175 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4176 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4177 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4178 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4179 SKIP(8);
4180 return(XML_ATTRIBUTE_NMTOKENS);
4181 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4182 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4183 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4184 (NXT(6) == 'N')) {
4185 SKIP(7);
4186 return(XML_ATTRIBUTE_NMTOKEN);
4187 }
4188 return(xmlParseEnumeratedType(ctxt, tree));
4189}
4190
4191/**
4192 * xmlParseAttributeListDecl:
4193 * @ctxt: an XML parser context
4194 *
4195 * : parse the Attribute list def for an element
4196 *
4197 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4198 *
4199 * [53] AttDef ::= S Name S AttType S DefaultDecl
4200 *
4201 */
4202void
4203xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4204 xmlChar *elemName;
4205 xmlChar *attrName;
4206 xmlEnumerationPtr tree;
4207
4208 if ((RAW == '<') && (NXT(1) == '!') &&
4209 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4210 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4211 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4212 (NXT(8) == 'T')) {
4213 xmlParserInputPtr input = ctxt->input;
4214
4215 SKIP(9);
4216 if (!IS_BLANK(CUR)) {
4217 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4219 ctxt->sax->error(ctxt->userData,
4220 "Space required after '<!ATTLIST'\n");
4221 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004223 }
4224 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004225 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004226 if (elemName == NULL) {
4227 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4229 ctxt->sax->error(ctxt->userData,
4230 "ATTLIST: no name for Element\n");
4231 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004232 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004233 return;
4234 }
4235 SKIP_BLANKS;
4236 GROW;
4237 while (RAW != '>') {
4238 const xmlChar *check = CUR_PTR;
4239 int type;
4240 int def;
4241 xmlChar *defaultValue = NULL;
4242
4243 GROW;
4244 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004245 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004246 if (attrName == NULL) {
4247 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4249 ctxt->sax->error(ctxt->userData,
4250 "ATTLIST: no name for Attribute\n");
4251 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004252 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004253 break;
4254 }
4255 GROW;
4256 if (!IS_BLANK(CUR)) {
4257 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4258 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4259 ctxt->sax->error(ctxt->userData,
4260 "Space required after the attribute name\n");
4261 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004262 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004263 if (attrName != NULL)
4264 xmlFree(attrName);
4265 if (defaultValue != NULL)
4266 xmlFree(defaultValue);
4267 break;
4268 }
4269 SKIP_BLANKS;
4270
4271 type = xmlParseAttributeType(ctxt, &tree);
4272 if (type <= 0) {
4273 if (attrName != NULL)
4274 xmlFree(attrName);
4275 if (defaultValue != NULL)
4276 xmlFree(defaultValue);
4277 break;
4278 }
4279
4280 GROW;
4281 if (!IS_BLANK(CUR)) {
4282 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4284 ctxt->sax->error(ctxt->userData,
4285 "Space required after the attribute type\n");
4286 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004287 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004288 if (attrName != NULL)
4289 xmlFree(attrName);
4290 if (defaultValue != NULL)
4291 xmlFree(defaultValue);
4292 if (tree != NULL)
4293 xmlFreeEnumeration(tree);
4294 break;
4295 }
4296 SKIP_BLANKS;
4297
4298 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4299 if (def <= 0) {
4300 if (attrName != NULL)
4301 xmlFree(attrName);
4302 if (defaultValue != NULL)
4303 xmlFree(defaultValue);
4304 if (tree != NULL)
4305 xmlFreeEnumeration(tree);
4306 break;
4307 }
4308
4309 GROW;
4310 if (RAW != '>') {
4311 if (!IS_BLANK(CUR)) {
4312 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4313 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4314 ctxt->sax->error(ctxt->userData,
4315 "Space required after the attribute default value\n");
4316 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004317 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004318 if (attrName != NULL)
4319 xmlFree(attrName);
4320 if (defaultValue != NULL)
4321 xmlFree(defaultValue);
4322 if (tree != NULL)
4323 xmlFreeEnumeration(tree);
4324 break;
4325 }
4326 SKIP_BLANKS;
4327 }
4328 if (check == CUR_PTR) {
4329 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4331 ctxt->sax->error(ctxt->userData,
4332 "xmlParseAttributeListDecl: detected internal error\n");
4333 if (attrName != NULL)
4334 xmlFree(attrName);
4335 if (defaultValue != NULL)
4336 xmlFree(defaultValue);
4337 if (tree != NULL)
4338 xmlFreeEnumeration(tree);
4339 break;
4340 }
4341 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4342 (ctxt->sax->attributeDecl != NULL))
4343 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4344 type, def, defaultValue, tree);
4345 if (attrName != NULL)
4346 xmlFree(attrName);
4347 if (defaultValue != NULL)
4348 xmlFree(defaultValue);
4349 GROW;
4350 }
4351 if (RAW == '>') {
4352 if (input != ctxt->input) {
4353 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4355 ctxt->sax->error(ctxt->userData,
4356"Attribute list declaration doesn't start and stop in the same entity\n");
4357 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004358 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004359 }
4360 NEXT;
4361 }
4362
4363 xmlFree(elemName);
4364 }
4365}
4366
4367/**
4368 * xmlParseElementMixedContentDecl:
4369 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004370 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004371 *
4372 * parse the declaration for a Mixed Element content
4373 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4374 *
4375 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4376 * '(' S? '#PCDATA' S? ')'
4377 *
4378 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4379 *
4380 * [ VC: No Duplicate Types ]
4381 * The same name must not appear more than once in a single
4382 * mixed-content declaration.
4383 *
4384 * returns: the list of the xmlElementContentPtr describing the element choices
4385 */
4386xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004387xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004388 xmlElementContentPtr ret = NULL, cur = NULL, n;
4389 xmlChar *elem = NULL;
4390
4391 GROW;
4392 if ((RAW == '#') && (NXT(1) == 'P') &&
4393 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4394 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4395 (NXT(6) == 'A')) {
4396 SKIP(7);
4397 SKIP_BLANKS;
4398 SHRINK;
4399 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004400 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4401 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4402 if (ctxt->vctxt.error != NULL)
4403 ctxt->vctxt.error(ctxt->vctxt.userData,
4404"Element content declaration doesn't start and stop in the same entity\n");
4405 ctxt->valid = 0;
4406 }
Owen Taylor3473f882001-02-23 17:55:21 +00004407 NEXT;
4408 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4409 if (RAW == '*') {
4410 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4411 NEXT;
4412 }
4413 return(ret);
4414 }
4415 if ((RAW == '(') || (RAW == '|')) {
4416 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4417 if (ret == NULL) return(NULL);
4418 }
4419 while (RAW == '|') {
4420 NEXT;
4421 if (elem == NULL) {
4422 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4423 if (ret == NULL) return(NULL);
4424 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004425 if (cur != NULL)
4426 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004427 cur = ret;
4428 } else {
4429 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4430 if (n == NULL) return(NULL);
4431 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004432 if (n->c1 != NULL)
4433 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004434 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004435 if (n != NULL)
4436 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004437 cur = n;
4438 xmlFree(elem);
4439 }
4440 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004441 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004442 if (elem == NULL) {
4443 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4445 ctxt->sax->error(ctxt->userData,
4446 "xmlParseElementMixedContentDecl : Name expected\n");
4447 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004448 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004449 xmlFreeElementContent(cur);
4450 return(NULL);
4451 }
4452 SKIP_BLANKS;
4453 GROW;
4454 }
4455 if ((RAW == ')') && (NXT(1) == '*')) {
4456 if (elem != NULL) {
4457 cur->c2 = xmlNewElementContent(elem,
4458 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004459 if (cur->c2 != NULL)
4460 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004461 xmlFree(elem);
4462 }
4463 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004464 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4465 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4466 if (ctxt->vctxt.error != NULL)
4467 ctxt->vctxt.error(ctxt->vctxt.userData,
4468"Element content declaration doesn't start and stop in the same entity\n");
4469 ctxt->valid = 0;
4470 }
Owen Taylor3473f882001-02-23 17:55:21 +00004471 SKIP(2);
4472 } else {
4473 if (elem != NULL) xmlFree(elem);
4474 xmlFreeElementContent(ret);
4475 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4477 ctxt->sax->error(ctxt->userData,
4478 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4479 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004480 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004481 return(NULL);
4482 }
4483
4484 } else {
4485 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4487 ctxt->sax->error(ctxt->userData,
4488 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4489 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004490 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004491 }
4492 return(ret);
4493}
4494
4495/**
4496 * xmlParseElementChildrenContentDecl:
4497 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004498 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004499 *
4500 * parse the declaration for a Mixed Element content
4501 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4502 *
4503 *
4504 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4505 *
4506 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4507 *
4508 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4509 *
4510 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4511 *
4512 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4513 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004514 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004515 * opening or closing parentheses in a choice, seq, or Mixed
4516 * construct is contained in the replacement text for a parameter
4517 * entity, both must be contained in the same replacement text. For
4518 * interoperability, if a parameter-entity reference appears in a
4519 * choice, seq, or Mixed construct, its replacement text should not
4520 * be empty, and neither the first nor last non-blank character of
4521 * the replacement text should be a connector (| or ,).
4522 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004523 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004524 * hierarchy.
4525 */
4526xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004527xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004528(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004529 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4530 xmlChar *elem;
4531 xmlChar type = 0;
4532
4533 SKIP_BLANKS;
4534 GROW;
4535 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004536 xmlParserInputPtr input = ctxt->input;
4537
Owen Taylor3473f882001-02-23 17:55:21 +00004538 /* Recurse on first child */
4539 NEXT;
4540 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004541 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004542 SKIP_BLANKS;
4543 GROW;
4544 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004545 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004546 if (elem == NULL) {
4547 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4549 ctxt->sax->error(ctxt->userData,
4550 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004553 return(NULL);
4554 }
4555 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4556 GROW;
4557 if (RAW == '?') {
4558 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4559 NEXT;
4560 } else if (RAW == '*') {
4561 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4562 NEXT;
4563 } else if (RAW == '+') {
4564 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4565 NEXT;
4566 } else {
4567 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4568 }
4569 xmlFree(elem);
4570 GROW;
4571 }
4572 SKIP_BLANKS;
4573 SHRINK;
4574 while (RAW != ')') {
4575 /*
4576 * Each loop we parse one separator and one element.
4577 */
4578 if (RAW == ',') {
4579 if (type == 0) type = CUR;
4580
4581 /*
4582 * Detect "Name | Name , Name" error
4583 */
4584 else if (type != CUR) {
4585 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4587 ctxt->sax->error(ctxt->userData,
4588 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4589 type);
4590 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004591 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004592 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004593 xmlFreeElementContent(last);
4594 if (ret != NULL)
4595 xmlFreeElementContent(ret);
4596 return(NULL);
4597 }
4598 NEXT;
4599
4600 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4601 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004602 if ((last != NULL) && (last != ret))
4603 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004604 xmlFreeElementContent(ret);
4605 return(NULL);
4606 }
4607 if (last == NULL) {
4608 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004609 if (ret != NULL)
4610 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004611 ret = cur = op;
4612 } else {
4613 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004614 if (op != NULL)
4615 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004616 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004617 if (last != NULL)
4618 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004619 cur =op;
4620 last = NULL;
4621 }
4622 } else if (RAW == '|') {
4623 if (type == 0) type = CUR;
4624
4625 /*
4626 * Detect "Name , Name | Name" error
4627 */
4628 else if (type != CUR) {
4629 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4631 ctxt->sax->error(ctxt->userData,
4632 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4633 type);
4634 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004635 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004636 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004637 xmlFreeElementContent(last);
4638 if (ret != NULL)
4639 xmlFreeElementContent(ret);
4640 return(NULL);
4641 }
4642 NEXT;
4643
4644 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4645 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004646 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004647 xmlFreeElementContent(last);
4648 if (ret != NULL)
4649 xmlFreeElementContent(ret);
4650 return(NULL);
4651 }
4652 if (last == NULL) {
4653 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004654 if (ret != NULL)
4655 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004656 ret = cur = op;
4657 } else {
4658 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004659 if (op != NULL)
4660 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004661 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004662 if (last != NULL)
4663 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004664 cur =op;
4665 last = NULL;
4666 }
4667 } else {
4668 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4670 ctxt->sax->error(ctxt->userData,
4671 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4672 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004673 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004674 if (ret != NULL)
4675 xmlFreeElementContent(ret);
4676 return(NULL);
4677 }
4678 GROW;
4679 SKIP_BLANKS;
4680 GROW;
4681 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004682 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004683 /* Recurse on second child */
4684 NEXT;
4685 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004686 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004687 SKIP_BLANKS;
4688 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004689 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004690 if (elem == NULL) {
4691 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4693 ctxt->sax->error(ctxt->userData,
4694 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4695 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004696 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004697 if (ret != NULL)
4698 xmlFreeElementContent(ret);
4699 return(NULL);
4700 }
4701 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4702 xmlFree(elem);
4703 if (RAW == '?') {
4704 last->ocur = XML_ELEMENT_CONTENT_OPT;
4705 NEXT;
4706 } else if (RAW == '*') {
4707 last->ocur = XML_ELEMENT_CONTENT_MULT;
4708 NEXT;
4709 } else if (RAW == '+') {
4710 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4711 NEXT;
4712 } else {
4713 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4714 }
4715 }
4716 SKIP_BLANKS;
4717 GROW;
4718 }
4719 if ((cur != NULL) && (last != NULL)) {
4720 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004721 if (last != NULL)
4722 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004723 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004724 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4725 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4726 if (ctxt->vctxt.error != NULL)
4727 ctxt->vctxt.error(ctxt->vctxt.userData,
4728"Element content declaration doesn't start and stop in the same entity\n");
4729 ctxt->valid = 0;
4730 }
Owen Taylor3473f882001-02-23 17:55:21 +00004731 NEXT;
4732 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004733 if (ret != NULL)
4734 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004735 NEXT;
4736 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004737 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004738 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004739 cur = ret;
4740 /*
4741 * Some normalization:
4742 * (a | b* | c?)* == (a | b | c)*
4743 */
4744 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4745 if ((cur->c1 != NULL) &&
4746 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4747 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4748 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4749 if ((cur->c2 != NULL) &&
4750 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4751 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4752 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4753 cur = cur->c2;
4754 }
4755 }
Owen Taylor3473f882001-02-23 17:55:21 +00004756 NEXT;
4757 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004758 if (ret != NULL) {
4759 int found = 0;
4760
Daniel Veillarde470df72001-04-18 21:41:07 +00004761 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004762 /*
4763 * Some normalization:
4764 * (a | b*)+ == (a | b)*
4765 * (a | b?)+ == (a | b)*
4766 */
4767 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4768 if ((cur->c1 != NULL) &&
4769 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4770 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4771 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4772 found = 1;
4773 }
4774 if ((cur->c2 != NULL) &&
4775 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4776 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4777 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4778 found = 1;
4779 }
4780 cur = cur->c2;
4781 }
4782 if (found)
4783 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4784 }
Owen Taylor3473f882001-02-23 17:55:21 +00004785 NEXT;
4786 }
4787 return(ret);
4788}
4789
4790/**
4791 * xmlParseElementContentDecl:
4792 * @ctxt: an XML parser context
4793 * @name: the name of the element being defined.
4794 * @result: the Element Content pointer will be stored here if any
4795 *
4796 * parse the declaration for an Element content either Mixed or Children,
4797 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4798 *
4799 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4800 *
4801 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4802 */
4803
4804int
4805xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4806 xmlElementContentPtr *result) {
4807
4808 xmlElementContentPtr tree = NULL;
4809 xmlParserInputPtr input = ctxt->input;
4810 int res;
4811
4812 *result = NULL;
4813
4814 if (RAW != '(') {
4815 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4817 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004818 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004819 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004820 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004821 return(-1);
4822 }
4823 NEXT;
4824 GROW;
4825 SKIP_BLANKS;
4826 if ((RAW == '#') && (NXT(1) == 'P') &&
4827 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4828 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4829 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004830 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004831 res = XML_ELEMENT_TYPE_MIXED;
4832 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004833 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004834 res = XML_ELEMENT_TYPE_ELEMENT;
4835 }
Owen Taylor3473f882001-02-23 17:55:21 +00004836 SKIP_BLANKS;
4837 *result = tree;
4838 return(res);
4839}
4840
4841/**
4842 * xmlParseElementDecl:
4843 * @ctxt: an XML parser context
4844 *
4845 * parse an Element declaration.
4846 *
4847 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4848 *
4849 * [ VC: Unique Element Type Declaration ]
4850 * No element type may be declared more than once
4851 *
4852 * Returns the type of the element, or -1 in case of error
4853 */
4854int
4855xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4856 xmlChar *name;
4857 int ret = -1;
4858 xmlElementContentPtr content = NULL;
4859
4860 GROW;
4861 if ((RAW == '<') && (NXT(1) == '!') &&
4862 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4863 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4864 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4865 (NXT(8) == 'T')) {
4866 xmlParserInputPtr input = ctxt->input;
4867
4868 SKIP(9);
4869 if (!IS_BLANK(CUR)) {
4870 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4872 ctxt->sax->error(ctxt->userData,
4873 "Space required after 'ELEMENT'\n");
4874 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004875 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004876 }
4877 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004878 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004879 if (name == NULL) {
4880 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4882 ctxt->sax->error(ctxt->userData,
4883 "xmlParseElementDecl: no name for Element\n");
4884 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004885 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004886 return(-1);
4887 }
4888 while ((RAW == 0) && (ctxt->inputNr > 1))
4889 xmlPopInput(ctxt);
4890 if (!IS_BLANK(CUR)) {
4891 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4893 ctxt->sax->error(ctxt->userData,
4894 "Space required after the element name\n");
4895 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004896 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004897 }
4898 SKIP_BLANKS;
4899 if ((RAW == 'E') && (NXT(1) == 'M') &&
4900 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4901 (NXT(4) == 'Y')) {
4902 SKIP(5);
4903 /*
4904 * Element must always be empty.
4905 */
4906 ret = XML_ELEMENT_TYPE_EMPTY;
4907 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4908 (NXT(2) == 'Y')) {
4909 SKIP(3);
4910 /*
4911 * Element is a generic container.
4912 */
4913 ret = XML_ELEMENT_TYPE_ANY;
4914 } else if (RAW == '(') {
4915 ret = xmlParseElementContentDecl(ctxt, name, &content);
4916 } else {
4917 /*
4918 * [ WFC: PEs in Internal Subset ] error handling.
4919 */
4920 if ((RAW == '%') && (ctxt->external == 0) &&
4921 (ctxt->inputNr == 1)) {
4922 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4924 ctxt->sax->error(ctxt->userData,
4925 "PEReference: forbidden within markup decl in internal subset\n");
4926 } else {
4927 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4929 ctxt->sax->error(ctxt->userData,
4930 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4931 }
4932 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004933 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004934 if (name != NULL) xmlFree(name);
4935 return(-1);
4936 }
4937
4938 SKIP_BLANKS;
4939 /*
4940 * Pop-up of finished entities.
4941 */
4942 while ((RAW == 0) && (ctxt->inputNr > 1))
4943 xmlPopInput(ctxt);
4944 SKIP_BLANKS;
4945
4946 if (RAW != '>') {
4947 ctxt->errNo = XML_ERR_GT_REQUIRED;
4948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4949 ctxt->sax->error(ctxt->userData,
4950 "xmlParseElementDecl: expected '>' at the end\n");
4951 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004952 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004953 } else {
4954 if (input != ctxt->input) {
4955 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4957 ctxt->sax->error(ctxt->userData,
4958"Element declaration doesn't start and stop in the same entity\n");
4959 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004960 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004961 }
4962
4963 NEXT;
4964 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4965 (ctxt->sax->elementDecl != NULL))
4966 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4967 content);
4968 }
4969 if (content != NULL) {
4970 xmlFreeElementContent(content);
4971 }
4972 if (name != NULL) {
4973 xmlFree(name);
4974 }
4975 }
4976 return(ret);
4977}
4978
4979/**
Owen Taylor3473f882001-02-23 17:55:21 +00004980 * xmlParseConditionalSections
4981 * @ctxt: an XML parser context
4982 *
4983 * [61] conditionalSect ::= includeSect | ignoreSect
4984 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4985 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4986 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4987 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4988 */
4989
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004990static void
Owen Taylor3473f882001-02-23 17:55:21 +00004991xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4992 SKIP(3);
4993 SKIP_BLANKS;
4994 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4995 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4996 (NXT(6) == 'E')) {
4997 SKIP(7);
4998 SKIP_BLANKS;
4999 if (RAW != '[') {
5000 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5001 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5002 ctxt->sax->error(ctxt->userData,
5003 "XML conditional section '[' expected\n");
5004 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005005 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005006 } else {
5007 NEXT;
5008 }
5009 if (xmlParserDebugEntities) {
5010 if ((ctxt->input != NULL) && (ctxt->input->filename))
5011 xmlGenericError(xmlGenericErrorContext,
5012 "%s(%d): ", ctxt->input->filename,
5013 ctxt->input->line);
5014 xmlGenericError(xmlGenericErrorContext,
5015 "Entering INCLUDE Conditional Section\n");
5016 }
5017
5018 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5019 (NXT(2) != '>'))) {
5020 const xmlChar *check = CUR_PTR;
5021 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005022
5023 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5024 xmlParseConditionalSections(ctxt);
5025 } else if (IS_BLANK(CUR)) {
5026 NEXT;
5027 } else if (RAW == '%') {
5028 xmlParsePEReference(ctxt);
5029 } else
5030 xmlParseMarkupDecl(ctxt);
5031
5032 /*
5033 * Pop-up of finished entities.
5034 */
5035 while ((RAW == 0) && (ctxt->inputNr > 1))
5036 xmlPopInput(ctxt);
5037
Daniel Veillardfdc91562002-07-01 21:52:03 +00005038 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005039 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5041 ctxt->sax->error(ctxt->userData,
5042 "Content error in the external subset\n");
5043 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005044 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005045 break;
5046 }
5047 }
5048 if (xmlParserDebugEntities) {
5049 if ((ctxt->input != NULL) && (ctxt->input->filename))
5050 xmlGenericError(xmlGenericErrorContext,
5051 "%s(%d): ", ctxt->input->filename,
5052 ctxt->input->line);
5053 xmlGenericError(xmlGenericErrorContext,
5054 "Leaving INCLUDE Conditional Section\n");
5055 }
5056
5057 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5058 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5059 int state;
5060 int instate;
5061 int depth = 0;
5062
5063 SKIP(6);
5064 SKIP_BLANKS;
5065 if (RAW != '[') {
5066 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5067 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5068 ctxt->sax->error(ctxt->userData,
5069 "XML conditional section '[' expected\n");
5070 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005071 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005072 } else {
5073 NEXT;
5074 }
5075 if (xmlParserDebugEntities) {
5076 if ((ctxt->input != NULL) && (ctxt->input->filename))
5077 xmlGenericError(xmlGenericErrorContext,
5078 "%s(%d): ", ctxt->input->filename,
5079 ctxt->input->line);
5080 xmlGenericError(xmlGenericErrorContext,
5081 "Entering IGNORE Conditional Section\n");
5082 }
5083
5084 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005085 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005086 * But disable SAX event generating DTD building in the meantime
5087 */
5088 state = ctxt->disableSAX;
5089 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005090 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005091 ctxt->instate = XML_PARSER_IGNORE;
5092
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005093 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005094 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5095 depth++;
5096 SKIP(3);
5097 continue;
5098 }
5099 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5100 if (--depth >= 0) SKIP(3);
5101 continue;
5102 }
5103 NEXT;
5104 continue;
5105 }
5106
5107 ctxt->disableSAX = state;
5108 ctxt->instate = instate;
5109
5110 if (xmlParserDebugEntities) {
5111 if ((ctxt->input != NULL) && (ctxt->input->filename))
5112 xmlGenericError(xmlGenericErrorContext,
5113 "%s(%d): ", ctxt->input->filename,
5114 ctxt->input->line);
5115 xmlGenericError(xmlGenericErrorContext,
5116 "Leaving IGNORE Conditional Section\n");
5117 }
5118
5119 } else {
5120 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5122 ctxt->sax->error(ctxt->userData,
5123 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5124 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005125 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005126 }
5127
5128 if (RAW == 0)
5129 SHRINK;
5130
5131 if (RAW == 0) {
5132 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5133 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5134 ctxt->sax->error(ctxt->userData,
5135 "XML conditional section not closed\n");
5136 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005137 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005138 } else {
5139 SKIP(3);
5140 }
5141}
5142
5143/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005144 * xmlParseMarkupDecl:
5145 * @ctxt: an XML parser context
5146 *
5147 * parse Markup declarations
5148 *
5149 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5150 * NotationDecl | PI | Comment
5151 *
5152 * [ VC: Proper Declaration/PE Nesting ]
5153 * Parameter-entity replacement text must be properly nested with
5154 * markup declarations. That is to say, if either the first character
5155 * or the last character of a markup declaration (markupdecl above) is
5156 * contained in the replacement text for a parameter-entity reference,
5157 * both must be contained in the same replacement text.
5158 *
5159 * [ WFC: PEs in Internal Subset ]
5160 * In the internal DTD subset, parameter-entity references can occur
5161 * only where markup declarations can occur, not within markup declarations.
5162 * (This does not apply to references that occur in external parameter
5163 * entities or to the external subset.)
5164 */
5165void
5166xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5167 GROW;
5168 xmlParseElementDecl(ctxt);
5169 xmlParseAttributeListDecl(ctxt);
5170 xmlParseEntityDecl(ctxt);
5171 xmlParseNotationDecl(ctxt);
5172 xmlParsePI(ctxt);
5173 xmlParseComment(ctxt);
5174 /*
5175 * This is only for internal subset. On external entities,
5176 * the replacement is done before parsing stage
5177 */
5178 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5179 xmlParsePEReference(ctxt);
5180
5181 /*
5182 * Conditional sections are allowed from entities included
5183 * by PE References in the internal subset.
5184 */
5185 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5186 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5187 xmlParseConditionalSections(ctxt);
5188 }
5189 }
5190
5191 ctxt->instate = XML_PARSER_DTD;
5192}
5193
5194/**
5195 * xmlParseTextDecl:
5196 * @ctxt: an XML parser context
5197 *
5198 * parse an XML declaration header for external entities
5199 *
5200 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5201 *
5202 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5203 */
5204
5205void
5206xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5207 xmlChar *version;
5208
5209 /*
5210 * We know that '<?xml' is here.
5211 */
5212 if ((RAW == '<') && (NXT(1) == '?') &&
5213 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5214 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5215 SKIP(5);
5216 } else {
5217 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5219 ctxt->sax->error(ctxt->userData,
5220 "Text declaration '<?xml' required\n");
5221 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005223
5224 return;
5225 }
5226
5227 if (!IS_BLANK(CUR)) {
5228 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5230 ctxt->sax->error(ctxt->userData,
5231 "Space needed after '<?xml'\n");
5232 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005233 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005234 }
5235 SKIP_BLANKS;
5236
5237 /*
5238 * We may have the VersionInfo here.
5239 */
5240 version = xmlParseVersionInfo(ctxt);
5241 if (version == NULL)
5242 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005243 else {
5244 if (!IS_BLANK(CUR)) {
5245 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5247 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5248 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005249 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005250 }
5251 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005252 ctxt->input->version = version;
5253
5254 /*
5255 * We must have the encoding declaration
5256 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005257 xmlParseEncodingDecl(ctxt);
5258 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5259 /*
5260 * The XML REC instructs us to stop parsing right here
5261 */
5262 return;
5263 }
5264
5265 SKIP_BLANKS;
5266 if ((RAW == '?') && (NXT(1) == '>')) {
5267 SKIP(2);
5268 } else if (RAW == '>') {
5269 /* Deprecated old WD ... */
5270 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5272 ctxt->sax->error(ctxt->userData,
5273 "XML declaration must end-up with '?>'\n");
5274 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005275 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005276 NEXT;
5277 } else {
5278 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5280 ctxt->sax->error(ctxt->userData,
5281 "parsing XML declaration: '?>' expected\n");
5282 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005283 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005284 MOVETO_ENDTAG(CUR_PTR);
5285 NEXT;
5286 }
5287}
5288
5289/**
Owen Taylor3473f882001-02-23 17:55:21 +00005290 * xmlParseExternalSubset:
5291 * @ctxt: an XML parser context
5292 * @ExternalID: the external identifier
5293 * @SystemID: the system identifier (or URL)
5294 *
5295 * parse Markup declarations from an external subset
5296 *
5297 * [30] extSubset ::= textDecl? extSubsetDecl
5298 *
5299 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5300 */
5301void
5302xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5303 const xmlChar *SystemID) {
5304 GROW;
5305 if ((RAW == '<') && (NXT(1) == '?') &&
5306 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5307 (NXT(4) == 'l')) {
5308 xmlParseTextDecl(ctxt);
5309 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5310 /*
5311 * The XML REC instructs us to stop parsing right here
5312 */
5313 ctxt->instate = XML_PARSER_EOF;
5314 return;
5315 }
5316 }
5317 if (ctxt->myDoc == NULL) {
5318 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5319 }
5320 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5321 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5322
5323 ctxt->instate = XML_PARSER_DTD;
5324 ctxt->external = 1;
5325 while (((RAW == '<') && (NXT(1) == '?')) ||
5326 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005327 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005328 const xmlChar *check = CUR_PTR;
5329 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005330
5331 GROW;
5332 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5333 xmlParseConditionalSections(ctxt);
5334 } else if (IS_BLANK(CUR)) {
5335 NEXT;
5336 } else if (RAW == '%') {
5337 xmlParsePEReference(ctxt);
5338 } else
5339 xmlParseMarkupDecl(ctxt);
5340
5341 /*
5342 * Pop-up of finished entities.
5343 */
5344 while ((RAW == 0) && (ctxt->inputNr > 1))
5345 xmlPopInput(ctxt);
5346
Daniel Veillardfdc91562002-07-01 21:52:03 +00005347 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005348 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5350 ctxt->sax->error(ctxt->userData,
5351 "Content error in the external subset\n");
5352 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005353 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005354 break;
5355 }
5356 }
5357
5358 if (RAW != 0) {
5359 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5361 ctxt->sax->error(ctxt->userData,
5362 "Extra content at the end of the document\n");
5363 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005364 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005365 }
5366
5367}
5368
5369/**
5370 * xmlParseReference:
5371 * @ctxt: an XML parser context
5372 *
5373 * parse and handle entity references in content, depending on the SAX
5374 * interface, this may end-up in a call to character() if this is a
5375 * CharRef, a predefined entity, if there is no reference() callback.
5376 * or if the parser was asked to switch to that mode.
5377 *
5378 * [67] Reference ::= EntityRef | CharRef
5379 */
5380void
5381xmlParseReference(xmlParserCtxtPtr ctxt) {
5382 xmlEntityPtr ent;
5383 xmlChar *val;
5384 if (RAW != '&') return;
5385
5386 if (NXT(1) == '#') {
5387 int i = 0;
5388 xmlChar out[10];
5389 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005390 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005391
5392 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5393 /*
5394 * So we are using non-UTF-8 buffers
5395 * Check that the char fit on 8bits, if not
5396 * generate a CharRef.
5397 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005398 if (value <= 0xFF) {
5399 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005400 out[1] = 0;
5401 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5402 (!ctxt->disableSAX))
5403 ctxt->sax->characters(ctxt->userData, out, 1);
5404 } else {
5405 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005406 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005407 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005408 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005409 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5410 (!ctxt->disableSAX))
5411 ctxt->sax->reference(ctxt->userData, out);
5412 }
5413 } else {
5414 /*
5415 * Just encode the value in UTF-8
5416 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005417 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005418 out[i] = 0;
5419 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5420 (!ctxt->disableSAX))
5421 ctxt->sax->characters(ctxt->userData, out, i);
5422 }
5423 } else {
5424 ent = xmlParseEntityRef(ctxt);
5425 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005426 if (!ctxt->wellFormed)
5427 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005428 if ((ent->name != NULL) &&
5429 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5430 xmlNodePtr list = NULL;
5431 int ret;
5432
5433
5434 /*
5435 * The first reference to the entity trigger a parsing phase
5436 * where the ent->children is filled with the result from
5437 * the parsing.
5438 */
5439 if (ent->children == NULL) {
5440 xmlChar *value;
5441 value = ent->content;
5442
5443 /*
5444 * Check that this entity is well formed
5445 */
5446 if ((value != NULL) &&
5447 (value[1] == 0) && (value[0] == '<') &&
5448 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5449 /*
5450 * DONE: get definite answer on this !!!
5451 * Lots of entity decls are used to declare a single
5452 * char
5453 * <!ENTITY lt "<">
5454 * Which seems to be valid since
5455 * 2.4: The ampersand character (&) and the left angle
5456 * bracket (<) may appear in their literal form only
5457 * when used ... They are also legal within the literal
5458 * entity value of an internal entity declaration;i
5459 * see "4.3.2 Well-Formed Parsed Entities".
5460 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5461 * Looking at the OASIS test suite and James Clark
5462 * tests, this is broken. However the XML REC uses
5463 * it. Is the XML REC not well-formed ????
5464 * This is a hack to avoid this problem
5465 *
5466 * ANSWER: since lt gt amp .. are already defined,
5467 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005468 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005469 * is lousy but acceptable.
5470 */
5471 list = xmlNewDocText(ctxt->myDoc, value);
5472 if (list != NULL) {
5473 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5474 (ent->children == NULL)) {
5475 ent->children = list;
5476 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005477 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005478 list->parent = (xmlNodePtr) ent;
5479 } else {
5480 xmlFreeNodeList(list);
5481 }
5482 } else if (list != NULL) {
5483 xmlFreeNodeList(list);
5484 }
5485 } else {
5486 /*
5487 * 4.3.2: An internal general parsed entity is well-formed
5488 * if its replacement text matches the production labeled
5489 * content.
5490 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005491
5492 void *user_data;
5493 /*
5494 * This is a bit hackish but this seems the best
5495 * way to make sure both SAX and DOM entity support
5496 * behaves okay.
5497 */
5498 if (ctxt->userData == ctxt)
5499 user_data = NULL;
5500 else
5501 user_data = ctxt->userData;
5502
Owen Taylor3473f882001-02-23 17:55:21 +00005503 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5504 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005505 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5506 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005507 ctxt->depth--;
5508 } else if (ent->etype ==
5509 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5510 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005511 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005512 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005513 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005514 ctxt->depth--;
5515 } else {
5516 ret = -1;
5517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5518 ctxt->sax->error(ctxt->userData,
5519 "Internal: invalid entity type\n");
5520 }
5521 if (ret == XML_ERR_ENTITY_LOOP) {
5522 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5524 ctxt->sax->error(ctxt->userData,
5525 "Detected entity reference loop\n");
5526 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005527 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005528 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005529 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005530 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5531 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005532 (ent->children == NULL)) {
5533 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005534 if (ctxt->replaceEntities) {
5535 /*
5536 * Prune it directly in the generated document
5537 * except for single text nodes.
5538 */
5539 if ((list->type == XML_TEXT_NODE) &&
5540 (list->next == NULL)) {
5541 list->parent = (xmlNodePtr) ent;
5542 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005543 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005544 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005545 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005546 while (list != NULL) {
5547 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005548 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005549 if (list->next == NULL)
5550 ent->last = list;
5551 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005552 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005553 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005554 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5555 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005556 }
5557 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005558 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005559 while (list != NULL) {
5560 list->parent = (xmlNodePtr) ent;
5561 if (list->next == NULL)
5562 ent->last = list;
5563 list = list->next;
5564 }
Owen Taylor3473f882001-02-23 17:55:21 +00005565 }
5566 } else {
5567 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005568 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005569 }
5570 } else if (ret > 0) {
5571 ctxt->errNo = ret;
5572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5573 ctxt->sax->error(ctxt->userData,
5574 "Entity value required\n");
5575 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005576 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005577 } else if (list != NULL) {
5578 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005579 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005580 }
5581 }
5582 }
5583 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5584 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5585 /*
5586 * Create a node.
5587 */
5588 ctxt->sax->reference(ctxt->userData, ent->name);
5589 return;
5590 } else if (ctxt->replaceEntities) {
5591 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5592 /*
5593 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005594 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005595 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005596 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005597 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005598 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005599 cur = ent->children;
5600 while (cur != NULL) {
5601 new = xmlCopyNode(cur, 1);
Daniel Veillard8f872442003-01-09 23:19:02 +00005602 if (new != NULL) {
5603 new->_private = cur->_private;
5604 if (firstChild == NULL){
5605 firstChild = new;
5606 }
5607 xmlAddChild(ctxt->node, new);
Daniel Veillard8107a222002-01-13 14:10:10 +00005608 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005609 if (cur == ent->last)
5610 break;
5611 cur = cur->next;
5612 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005613 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5614 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005615 } else {
5616 /*
5617 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005618 * node with a possible previous text one which
5619 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005620 */
5621 if (ent->children->type == XML_TEXT_NODE)
5622 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5623 if ((ent->last != ent->children) &&
5624 (ent->last->type == XML_TEXT_NODE))
5625 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5626 xmlAddChildList(ctxt->node, ent->children);
5627 }
5628
Owen Taylor3473f882001-02-23 17:55:21 +00005629 /*
5630 * This is to avoid a nasty side effect, see
5631 * characters() in SAX.c
5632 */
5633 ctxt->nodemem = 0;
5634 ctxt->nodelen = 0;
5635 return;
5636 } else {
5637 /*
5638 * Probably running in SAX mode
5639 */
5640 xmlParserInputPtr input;
5641
5642 input = xmlNewEntityInputStream(ctxt, ent);
5643 xmlPushInput(ctxt, input);
5644 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5645 (RAW == '<') && (NXT(1) == '?') &&
5646 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5647 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5648 xmlParseTextDecl(ctxt);
5649 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5650 /*
5651 * The XML REC instructs us to stop parsing right here
5652 */
5653 ctxt->instate = XML_PARSER_EOF;
5654 return;
5655 }
5656 if (input->standalone == 1) {
5657 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5659 ctxt->sax->error(ctxt->userData,
5660 "external parsed entities cannot be standalone\n");
5661 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005662 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005663 }
5664 }
5665 return;
5666 }
5667 }
5668 } else {
5669 val = ent->content;
5670 if (val == NULL) return;
5671 /*
5672 * inline the entity.
5673 */
5674 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5675 (!ctxt->disableSAX))
5676 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5677 }
5678 }
5679}
5680
5681/**
5682 * xmlParseEntityRef:
5683 * @ctxt: an XML parser context
5684 *
5685 * parse ENTITY references declarations
5686 *
5687 * [68] EntityRef ::= '&' Name ';'
5688 *
5689 * [ WFC: Entity Declared ]
5690 * In a document without any DTD, a document with only an internal DTD
5691 * subset which contains no parameter entity references, or a document
5692 * with "standalone='yes'", the Name given in the entity reference
5693 * must match that in an entity declaration, except that well-formed
5694 * documents need not declare any of the following entities: amp, lt,
5695 * gt, apos, quot. The declaration of a parameter entity must precede
5696 * any reference to it. Similarly, the declaration of a general entity
5697 * must precede any reference to it which appears in a default value in an
5698 * attribute-list declaration. Note that if entities are declared in the
5699 * external subset or in external parameter entities, a non-validating
5700 * processor is not obligated to read and process their declarations;
5701 * for such documents, the rule that an entity must be declared is a
5702 * well-formedness constraint only if standalone='yes'.
5703 *
5704 * [ WFC: Parsed Entity ]
5705 * An entity reference must not contain the name of an unparsed entity
5706 *
5707 * Returns the xmlEntityPtr if found, or NULL otherwise.
5708 */
5709xmlEntityPtr
5710xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5711 xmlChar *name;
5712 xmlEntityPtr ent = NULL;
5713
5714 GROW;
5715
5716 if (RAW == '&') {
5717 NEXT;
5718 name = xmlParseName(ctxt);
5719 if (name == NULL) {
5720 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5722 ctxt->sax->error(ctxt->userData,
5723 "xmlParseEntityRef: no name\n");
5724 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005725 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005726 } else {
5727 if (RAW == ';') {
5728 NEXT;
5729 /*
5730 * Ask first SAX for entity resolution, otherwise try the
5731 * predefined set.
5732 */
5733 if (ctxt->sax != NULL) {
5734 if (ctxt->sax->getEntity != NULL)
5735 ent = ctxt->sax->getEntity(ctxt->userData, name);
5736 if (ent == NULL)
5737 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005738 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5739 ent = getEntity(ctxt, name);
5740 }
Owen Taylor3473f882001-02-23 17:55:21 +00005741 }
5742 /*
5743 * [ WFC: Entity Declared ]
5744 * In a document without any DTD, a document with only an
5745 * internal DTD subset which contains no parameter entity
5746 * references, or a document with "standalone='yes'", the
5747 * Name given in the entity reference must match that in an
5748 * entity declaration, except that well-formed documents
5749 * need not declare any of the following entities: amp, lt,
5750 * gt, apos, quot.
5751 * The declaration of a parameter entity must precede any
5752 * reference to it.
5753 * Similarly, the declaration of a general entity must
5754 * precede any reference to it which appears in a default
5755 * value in an attribute-list declaration. Note that if
5756 * entities are declared in the external subset or in
5757 * external parameter entities, a non-validating processor
5758 * is not obligated to read and process their declarations;
5759 * for such documents, the rule that an entity must be
5760 * declared is a well-formedness constraint only if
5761 * standalone='yes'.
5762 */
5763 if (ent == NULL) {
5764 if ((ctxt->standalone == 1) ||
5765 ((ctxt->hasExternalSubset == 0) &&
5766 (ctxt->hasPErefs == 0))) {
5767 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5769 ctxt->sax->error(ctxt->userData,
5770 "Entity '%s' not defined\n", name);
5771 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005772 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005773 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005774 } else {
5775 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005777 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005778 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005779 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005780 }
5781 }
5782
5783 /*
5784 * [ WFC: Parsed Entity ]
5785 * An entity reference must not contain the name of an
5786 * unparsed entity
5787 */
5788 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5789 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5791 ctxt->sax->error(ctxt->userData,
5792 "Entity reference to unparsed entity %s\n", name);
5793 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005794 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005795 }
5796
5797 /*
5798 * [ WFC: No External Entity References ]
5799 * Attribute values cannot contain direct or indirect
5800 * entity references to external entities.
5801 */
5802 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5803 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5804 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5806 ctxt->sax->error(ctxt->userData,
5807 "Attribute references external entity '%s'\n", name);
5808 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005809 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005810 }
5811 /*
5812 * [ WFC: No < in Attribute Values ]
5813 * The replacement text of any entity referred to directly or
5814 * indirectly in an attribute value (other than "&lt;") must
5815 * not contain a <.
5816 */
5817 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5818 (ent != NULL) &&
5819 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5820 (ent->content != NULL) &&
5821 (xmlStrchr(ent->content, '<'))) {
5822 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5824 ctxt->sax->error(ctxt->userData,
5825 "'<' in entity '%s' is not allowed in attributes values\n", name);
5826 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005827 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005828 }
5829
5830 /*
5831 * Internal check, no parameter entities here ...
5832 */
5833 else {
5834 switch (ent->etype) {
5835 case XML_INTERNAL_PARAMETER_ENTITY:
5836 case XML_EXTERNAL_PARAMETER_ENTITY:
5837 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5839 ctxt->sax->error(ctxt->userData,
5840 "Attempt to reference the parameter entity '%s'\n", name);
5841 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005842 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005843 break;
5844 default:
5845 break;
5846 }
5847 }
5848
5849 /*
5850 * [ WFC: No Recursion ]
5851 * A parsed entity must not contain a recursive reference
5852 * to itself, either directly or indirectly.
5853 * Done somewhere else
5854 */
5855
5856 } else {
5857 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5859 ctxt->sax->error(ctxt->userData,
5860 "xmlParseEntityRef: expecting ';'\n");
5861 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005862 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005863 }
5864 xmlFree(name);
5865 }
5866 }
5867 return(ent);
5868}
5869
5870/**
5871 * xmlParseStringEntityRef:
5872 * @ctxt: an XML parser context
5873 * @str: a pointer to an index in the string
5874 *
5875 * parse ENTITY references declarations, but this version parses it from
5876 * a string value.
5877 *
5878 * [68] EntityRef ::= '&' Name ';'
5879 *
5880 * [ WFC: Entity Declared ]
5881 * In a document without any DTD, a document with only an internal DTD
5882 * subset which contains no parameter entity references, or a document
5883 * with "standalone='yes'", the Name given in the entity reference
5884 * must match that in an entity declaration, except that well-formed
5885 * documents need not declare any of the following entities: amp, lt,
5886 * gt, apos, quot. The declaration of a parameter entity must precede
5887 * any reference to it. Similarly, the declaration of a general entity
5888 * must precede any reference to it which appears in a default value in an
5889 * attribute-list declaration. Note that if entities are declared in the
5890 * external subset or in external parameter entities, a non-validating
5891 * processor is not obligated to read and process their declarations;
5892 * for such documents, the rule that an entity must be declared is a
5893 * well-formedness constraint only if standalone='yes'.
5894 *
5895 * [ WFC: Parsed Entity ]
5896 * An entity reference must not contain the name of an unparsed entity
5897 *
5898 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5899 * is updated to the current location in the string.
5900 */
5901xmlEntityPtr
5902xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5903 xmlChar *name;
5904 const xmlChar *ptr;
5905 xmlChar cur;
5906 xmlEntityPtr ent = NULL;
5907
5908 if ((str == NULL) || (*str == NULL))
5909 return(NULL);
5910 ptr = *str;
5911 cur = *ptr;
5912 if (cur == '&') {
5913 ptr++;
5914 cur = *ptr;
5915 name = xmlParseStringName(ctxt, &ptr);
5916 if (name == NULL) {
5917 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5919 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005920 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005921 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005922 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005923 } else {
5924 if (*ptr == ';') {
5925 ptr++;
5926 /*
5927 * Ask first SAX for entity resolution, otherwise try the
5928 * predefined set.
5929 */
5930 if (ctxt->sax != NULL) {
5931 if (ctxt->sax->getEntity != NULL)
5932 ent = ctxt->sax->getEntity(ctxt->userData, name);
5933 if (ent == NULL)
5934 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005935 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5936 ent = getEntity(ctxt, name);
5937 }
Owen Taylor3473f882001-02-23 17:55:21 +00005938 }
5939 /*
5940 * [ WFC: Entity Declared ]
5941 * In a document without any DTD, a document with only an
5942 * internal DTD subset which contains no parameter entity
5943 * references, or a document with "standalone='yes'", the
5944 * Name given in the entity reference must match that in an
5945 * entity declaration, except that well-formed documents
5946 * need not declare any of the following entities: amp, lt,
5947 * gt, apos, quot.
5948 * The declaration of a parameter entity must precede any
5949 * reference to it.
5950 * Similarly, the declaration of a general entity must
5951 * precede any reference to it which appears in a default
5952 * value in an attribute-list declaration. Note that if
5953 * entities are declared in the external subset or in
5954 * external parameter entities, a non-validating processor
5955 * is not obligated to read and process their declarations;
5956 * for such documents, the rule that an entity must be
5957 * declared is a well-formedness constraint only if
5958 * standalone='yes'.
5959 */
5960 if (ent == NULL) {
5961 if ((ctxt->standalone == 1) ||
5962 ((ctxt->hasExternalSubset == 0) &&
5963 (ctxt->hasPErefs == 0))) {
5964 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5966 ctxt->sax->error(ctxt->userData,
5967 "Entity '%s' not defined\n", name);
5968 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005969 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005970 } else {
5971 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5972 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5973 ctxt->sax->warning(ctxt->userData,
5974 "Entity '%s' not defined\n", name);
5975 }
5976 }
5977
5978 /*
5979 * [ WFC: Parsed Entity ]
5980 * An entity reference must not contain the name of an
5981 * unparsed entity
5982 */
5983 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5984 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5985 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5986 ctxt->sax->error(ctxt->userData,
5987 "Entity reference to unparsed entity %s\n", name);
5988 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005989 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005990 }
5991
5992 /*
5993 * [ WFC: No External Entity References ]
5994 * Attribute values cannot contain direct or indirect
5995 * entity references to external entities.
5996 */
5997 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5998 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5999 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6001 ctxt->sax->error(ctxt->userData,
6002 "Attribute references external entity '%s'\n", name);
6003 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006004 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006005 }
6006 /*
6007 * [ WFC: No < in Attribute Values ]
6008 * The replacement text of any entity referred to directly or
6009 * indirectly in an attribute value (other than "&lt;") must
6010 * not contain a <.
6011 */
6012 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6013 (ent != NULL) &&
6014 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6015 (ent->content != NULL) &&
6016 (xmlStrchr(ent->content, '<'))) {
6017 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6019 ctxt->sax->error(ctxt->userData,
6020 "'<' in entity '%s' is not allowed in attributes values\n", name);
6021 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006022 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006023 }
6024
6025 /*
6026 * Internal check, no parameter entities here ...
6027 */
6028 else {
6029 switch (ent->etype) {
6030 case XML_INTERNAL_PARAMETER_ENTITY:
6031 case XML_EXTERNAL_PARAMETER_ENTITY:
6032 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6034 ctxt->sax->error(ctxt->userData,
6035 "Attempt to reference the parameter entity '%s'\n", name);
6036 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006037 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006038 break;
6039 default:
6040 break;
6041 }
6042 }
6043
6044 /*
6045 * [ WFC: No Recursion ]
6046 * A parsed entity must not contain a recursive reference
6047 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006048 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006049 */
6050
6051 } else {
6052 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6054 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006055 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006056 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006057 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006058 }
6059 xmlFree(name);
6060 }
6061 }
6062 *str = ptr;
6063 return(ent);
6064}
6065
6066/**
6067 * xmlParsePEReference:
6068 * @ctxt: an XML parser context
6069 *
6070 * parse PEReference declarations
6071 * The entity content is handled directly by pushing it's content as
6072 * a new input stream.
6073 *
6074 * [69] PEReference ::= '%' Name ';'
6075 *
6076 * [ WFC: No Recursion ]
6077 * A parsed entity must not contain a recursive
6078 * reference to itself, either directly or indirectly.
6079 *
6080 * [ WFC: Entity Declared ]
6081 * In a document without any DTD, a document with only an internal DTD
6082 * subset which contains no parameter entity references, or a document
6083 * with "standalone='yes'", ... ... The declaration of a parameter
6084 * entity must precede any reference to it...
6085 *
6086 * [ VC: Entity Declared ]
6087 * In a document with an external subset or external parameter entities
6088 * with "standalone='no'", ... ... The declaration of a parameter entity
6089 * must precede any reference to it...
6090 *
6091 * [ WFC: In DTD ]
6092 * Parameter-entity references may only appear in the DTD.
6093 * NOTE: misleading but this is handled.
6094 */
6095void
6096xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6097 xmlChar *name;
6098 xmlEntityPtr entity = NULL;
6099 xmlParserInputPtr input;
6100
6101 if (RAW == '%') {
6102 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006103 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006104 if (name == NULL) {
6105 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6106 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6107 ctxt->sax->error(ctxt->userData,
6108 "xmlParsePEReference: no name\n");
6109 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006110 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006111 } else {
6112 if (RAW == ';') {
6113 NEXT;
6114 if ((ctxt->sax != NULL) &&
6115 (ctxt->sax->getParameterEntity != NULL))
6116 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6117 name);
6118 if (entity == NULL) {
6119 /*
6120 * [ WFC: Entity Declared ]
6121 * In a document without any DTD, a document with only an
6122 * internal DTD subset which contains no parameter entity
6123 * references, or a document with "standalone='yes'", ...
6124 * ... The declaration of a parameter entity must precede
6125 * any reference to it...
6126 */
6127 if ((ctxt->standalone == 1) ||
6128 ((ctxt->hasExternalSubset == 0) &&
6129 (ctxt->hasPErefs == 0))) {
6130 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6131 if ((!ctxt->disableSAX) &&
6132 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6133 ctxt->sax->error(ctxt->userData,
6134 "PEReference: %%%s; not found\n", name);
6135 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006136 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006137 } else {
6138 /*
6139 * [ VC: Entity Declared ]
6140 * In a document with an external subset or external
6141 * parameter entities with "standalone='no'", ...
6142 * ... The declaration of a parameter entity must precede
6143 * any reference to it...
6144 */
6145 if ((!ctxt->disableSAX) &&
6146 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6147 ctxt->sax->warning(ctxt->userData,
6148 "PEReference: %%%s; not found\n", name);
6149 ctxt->valid = 0;
6150 }
6151 } else {
6152 /*
6153 * Internal checking in case the entity quest barfed
6154 */
6155 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6156 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6157 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6158 ctxt->sax->warning(ctxt->userData,
6159 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006160 } else if (ctxt->input->free != deallocblankswrapper) {
6161 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6162 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006163 } else {
6164 /*
6165 * TODO !!!
6166 * handle the extra spaces added before and after
6167 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6168 */
6169 input = xmlNewEntityInputStream(ctxt, entity);
6170 xmlPushInput(ctxt, input);
6171 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6172 (RAW == '<') && (NXT(1) == '?') &&
6173 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6174 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6175 xmlParseTextDecl(ctxt);
6176 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6177 /*
6178 * The XML REC instructs us to stop parsing
6179 * right here
6180 */
6181 ctxt->instate = XML_PARSER_EOF;
6182 xmlFree(name);
6183 return;
6184 }
6185 }
Owen Taylor3473f882001-02-23 17:55:21 +00006186 }
6187 }
6188 ctxt->hasPErefs = 1;
6189 } else {
6190 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6191 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6192 ctxt->sax->error(ctxt->userData,
6193 "xmlParsePEReference: expecting ';'\n");
6194 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006195 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006196 }
6197 xmlFree(name);
6198 }
6199 }
6200}
6201
6202/**
6203 * xmlParseStringPEReference:
6204 * @ctxt: an XML parser context
6205 * @str: a pointer to an index in the string
6206 *
6207 * parse PEReference declarations
6208 *
6209 * [69] PEReference ::= '%' Name ';'
6210 *
6211 * [ WFC: No Recursion ]
6212 * A parsed entity must not contain a recursive
6213 * reference to itself, either directly or indirectly.
6214 *
6215 * [ WFC: Entity Declared ]
6216 * In a document without any DTD, a document with only an internal DTD
6217 * subset which contains no parameter entity references, or a document
6218 * with "standalone='yes'", ... ... The declaration of a parameter
6219 * entity must precede any reference to it...
6220 *
6221 * [ VC: Entity Declared ]
6222 * In a document with an external subset or external parameter entities
6223 * with "standalone='no'", ... ... The declaration of a parameter entity
6224 * must precede any reference to it...
6225 *
6226 * [ WFC: In DTD ]
6227 * Parameter-entity references may only appear in the DTD.
6228 * NOTE: misleading but this is handled.
6229 *
6230 * Returns the string of the entity content.
6231 * str is updated to the current value of the index
6232 */
6233xmlEntityPtr
6234xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6235 const xmlChar *ptr;
6236 xmlChar cur;
6237 xmlChar *name;
6238 xmlEntityPtr entity = NULL;
6239
6240 if ((str == NULL) || (*str == NULL)) return(NULL);
6241 ptr = *str;
6242 cur = *ptr;
6243 if (cur == '%') {
6244 ptr++;
6245 cur = *ptr;
6246 name = xmlParseStringName(ctxt, &ptr);
6247 if (name == NULL) {
6248 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6250 ctxt->sax->error(ctxt->userData,
6251 "xmlParseStringPEReference: no name\n");
6252 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006253 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006254 } else {
6255 cur = *ptr;
6256 if (cur == ';') {
6257 ptr++;
6258 cur = *ptr;
6259 if ((ctxt->sax != NULL) &&
6260 (ctxt->sax->getParameterEntity != NULL))
6261 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6262 name);
6263 if (entity == NULL) {
6264 /*
6265 * [ WFC: Entity Declared ]
6266 * In a document without any DTD, a document with only an
6267 * internal DTD subset which contains no parameter entity
6268 * references, or a document with "standalone='yes'", ...
6269 * ... The declaration of a parameter entity must precede
6270 * any reference to it...
6271 */
6272 if ((ctxt->standalone == 1) ||
6273 ((ctxt->hasExternalSubset == 0) &&
6274 (ctxt->hasPErefs == 0))) {
6275 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6277 ctxt->sax->error(ctxt->userData,
6278 "PEReference: %%%s; not found\n", name);
6279 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006280 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006281 } else {
6282 /*
6283 * [ VC: Entity Declared ]
6284 * In a document with an external subset or external
6285 * parameter entities with "standalone='no'", ...
6286 * ... The declaration of a parameter entity must
6287 * precede any reference to it...
6288 */
6289 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6290 ctxt->sax->warning(ctxt->userData,
6291 "PEReference: %%%s; not found\n", name);
6292 ctxt->valid = 0;
6293 }
6294 } else {
6295 /*
6296 * Internal checking in case the entity quest barfed
6297 */
6298 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6299 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6300 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6301 ctxt->sax->warning(ctxt->userData,
6302 "Internal: %%%s; is not a parameter entity\n", name);
6303 }
6304 }
6305 ctxt->hasPErefs = 1;
6306 } else {
6307 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6308 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6309 ctxt->sax->error(ctxt->userData,
6310 "xmlParseStringPEReference: expecting ';'\n");
6311 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006312 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006313 }
6314 xmlFree(name);
6315 }
6316 }
6317 *str = ptr;
6318 return(entity);
6319}
6320
6321/**
6322 * xmlParseDocTypeDecl:
6323 * @ctxt: an XML parser context
6324 *
6325 * parse a DOCTYPE declaration
6326 *
6327 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6328 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6329 *
6330 * [ VC: Root Element Type ]
6331 * The Name in the document type declaration must match the element
6332 * type of the root element.
6333 */
6334
6335void
6336xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6337 xmlChar *name = NULL;
6338 xmlChar *ExternalID = NULL;
6339 xmlChar *URI = NULL;
6340
6341 /*
6342 * We know that '<!DOCTYPE' has been detected.
6343 */
6344 SKIP(9);
6345
6346 SKIP_BLANKS;
6347
6348 /*
6349 * Parse the DOCTYPE name.
6350 */
6351 name = xmlParseName(ctxt);
6352 if (name == NULL) {
6353 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6355 ctxt->sax->error(ctxt->userData,
6356 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6357 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006358 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006359 }
6360 ctxt->intSubName = name;
6361
6362 SKIP_BLANKS;
6363
6364 /*
6365 * Check for SystemID and ExternalID
6366 */
6367 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6368
6369 if ((URI != NULL) || (ExternalID != NULL)) {
6370 ctxt->hasExternalSubset = 1;
6371 }
6372 ctxt->extSubURI = URI;
6373 ctxt->extSubSystem = ExternalID;
6374
6375 SKIP_BLANKS;
6376
6377 /*
6378 * Create and update the internal subset.
6379 */
6380 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6381 (!ctxt->disableSAX))
6382 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6383
6384 /*
6385 * Is there any internal subset declarations ?
6386 * they are handled separately in xmlParseInternalSubset()
6387 */
6388 if (RAW == '[')
6389 return;
6390
6391 /*
6392 * We should be at the end of the DOCTYPE declaration.
6393 */
6394 if (RAW != '>') {
6395 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006397 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006398 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006399 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006400 }
6401 NEXT;
6402}
6403
6404/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006405 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006406 * @ctxt: an XML parser context
6407 *
6408 * parse the internal subset declaration
6409 *
6410 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6411 */
6412
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006413static void
Owen Taylor3473f882001-02-23 17:55:21 +00006414xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6415 /*
6416 * Is there any DTD definition ?
6417 */
6418 if (RAW == '[') {
6419 ctxt->instate = XML_PARSER_DTD;
6420 NEXT;
6421 /*
6422 * Parse the succession of Markup declarations and
6423 * PEReferences.
6424 * Subsequence (markupdecl | PEReference | S)*
6425 */
6426 while (RAW != ']') {
6427 const xmlChar *check = CUR_PTR;
6428 int cons = ctxt->input->consumed;
6429
6430 SKIP_BLANKS;
6431 xmlParseMarkupDecl(ctxt);
6432 xmlParsePEReference(ctxt);
6433
6434 /*
6435 * Pop-up of finished entities.
6436 */
6437 while ((RAW == 0) && (ctxt->inputNr > 1))
6438 xmlPopInput(ctxt);
6439
6440 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6441 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6443 ctxt->sax->error(ctxt->userData,
6444 "xmlParseInternalSubset: error detected in Markup declaration\n");
6445 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006446 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006447 break;
6448 }
6449 }
6450 if (RAW == ']') {
6451 NEXT;
6452 SKIP_BLANKS;
6453 }
6454 }
6455
6456 /*
6457 * We should be at the end of the DOCTYPE declaration.
6458 */
6459 if (RAW != '>') {
6460 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006462 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006463 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006464 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006465 }
6466 NEXT;
6467}
6468
6469/**
6470 * xmlParseAttribute:
6471 * @ctxt: an XML parser context
6472 * @value: a xmlChar ** used to store the value of the attribute
6473 *
6474 * parse an attribute
6475 *
6476 * [41] Attribute ::= Name Eq AttValue
6477 *
6478 * [ WFC: No External Entity References ]
6479 * Attribute values cannot contain direct or indirect entity references
6480 * to external entities.
6481 *
6482 * [ WFC: No < in Attribute Values ]
6483 * The replacement text of any entity referred to directly or indirectly in
6484 * an attribute value (other than "&lt;") must not contain a <.
6485 *
6486 * [ VC: Attribute Value Type ]
6487 * The attribute must have been declared; the value must be of the type
6488 * declared for it.
6489 *
6490 * [25] Eq ::= S? '=' S?
6491 *
6492 * With namespace:
6493 *
6494 * [NS 11] Attribute ::= QName Eq AttValue
6495 *
6496 * Also the case QName == xmlns:??? is handled independently as a namespace
6497 * definition.
6498 *
6499 * Returns the attribute name, and the value in *value.
6500 */
6501
6502xmlChar *
6503xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6504 xmlChar *name, *val;
6505
6506 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006507 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006508 name = xmlParseName(ctxt);
6509 if (name == NULL) {
6510 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6511 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6512 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6513 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006514 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006515 return(NULL);
6516 }
6517
6518 /*
6519 * read the value
6520 */
6521 SKIP_BLANKS;
6522 if (RAW == '=') {
6523 NEXT;
6524 SKIP_BLANKS;
6525 val = xmlParseAttValue(ctxt);
6526 ctxt->instate = XML_PARSER_CONTENT;
6527 } else {
6528 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6530 ctxt->sax->error(ctxt->userData,
6531 "Specification mandate value for attribute %s\n", name);
6532 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006533 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006534 xmlFree(name);
6535 return(NULL);
6536 }
6537
6538 /*
6539 * Check that xml:lang conforms to the specification
6540 * No more registered as an error, just generate a warning now
6541 * since this was deprecated in XML second edition
6542 */
6543 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6544 if (!xmlCheckLanguageID(val)) {
6545 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6546 ctxt->sax->warning(ctxt->userData,
6547 "Malformed value for xml:lang : %s\n", val);
6548 }
6549 }
6550
6551 /*
6552 * Check that xml:space conforms to the specification
6553 */
6554 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6555 if (xmlStrEqual(val, BAD_CAST "default"))
6556 *(ctxt->space) = 0;
6557 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6558 *(ctxt->space) = 1;
6559 else {
6560 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6562 ctxt->sax->error(ctxt->userData,
6563"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6564 val);
6565 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006566 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006567 }
6568 }
6569
6570 *value = val;
6571 return(name);
6572}
6573
6574/**
6575 * xmlParseStartTag:
6576 * @ctxt: an XML parser context
6577 *
6578 * parse a start of tag either for rule element or
6579 * EmptyElement. In both case we don't parse the tag closing chars.
6580 *
6581 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6582 *
6583 * [ WFC: Unique Att Spec ]
6584 * No attribute name may appear more than once in the same start-tag or
6585 * empty-element tag.
6586 *
6587 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6588 *
6589 * [ WFC: Unique Att Spec ]
6590 * No attribute name may appear more than once in the same start-tag or
6591 * empty-element tag.
6592 *
6593 * With namespace:
6594 *
6595 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6596 *
6597 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6598 *
6599 * Returns the element name parsed
6600 */
6601
6602xmlChar *
6603xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6604 xmlChar *name;
6605 xmlChar *attname;
6606 xmlChar *attvalue;
6607 const xmlChar **atts = NULL;
6608 int nbatts = 0;
6609 int maxatts = 0;
6610 int i;
6611
6612 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006613 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006614
6615 name = xmlParseName(ctxt);
6616 if (name == NULL) {
6617 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6619 ctxt->sax->error(ctxt->userData,
6620 "xmlParseStartTag: invalid element name\n");
6621 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006622 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006623 return(NULL);
6624 }
6625
6626 /*
6627 * Now parse the attributes, it ends up with the ending
6628 *
6629 * (S Attribute)* S?
6630 */
6631 SKIP_BLANKS;
6632 GROW;
6633
Daniel Veillard21a0f912001-02-25 19:54:14 +00006634 while ((RAW != '>') &&
6635 ((RAW != '/') || (NXT(1) != '>')) &&
6636 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006637 const xmlChar *q = CUR_PTR;
6638 int cons = ctxt->input->consumed;
6639
6640 attname = xmlParseAttribute(ctxt, &attvalue);
6641 if ((attname != NULL) && (attvalue != NULL)) {
6642 /*
6643 * [ WFC: Unique Att Spec ]
6644 * No attribute name may appear more than once in the same
6645 * start-tag or empty-element tag.
6646 */
6647 for (i = 0; i < nbatts;i += 2) {
6648 if (xmlStrEqual(atts[i], attname)) {
6649 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6651 ctxt->sax->error(ctxt->userData,
6652 "Attribute %s redefined\n",
6653 attname);
6654 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006655 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006656 xmlFree(attname);
6657 xmlFree(attvalue);
6658 goto failed;
6659 }
6660 }
6661
6662 /*
6663 * Add the pair to atts
6664 */
6665 if (atts == NULL) {
6666 maxatts = 10;
6667 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6668 if (atts == NULL) {
6669 xmlGenericError(xmlGenericErrorContext,
6670 "malloc of %ld byte failed\n",
6671 maxatts * (long)sizeof(xmlChar *));
6672 return(NULL);
6673 }
6674 } else if (nbatts + 4 > maxatts) {
6675 maxatts *= 2;
6676 atts = (const xmlChar **) xmlRealloc((void *) atts,
6677 maxatts * sizeof(xmlChar *));
6678 if (atts == NULL) {
6679 xmlGenericError(xmlGenericErrorContext,
6680 "realloc of %ld byte failed\n",
6681 maxatts * (long)sizeof(xmlChar *));
6682 return(NULL);
6683 }
6684 }
6685 atts[nbatts++] = attname;
6686 atts[nbatts++] = attvalue;
6687 atts[nbatts] = NULL;
6688 atts[nbatts + 1] = NULL;
6689 } else {
6690 if (attname != NULL)
6691 xmlFree(attname);
6692 if (attvalue != NULL)
6693 xmlFree(attvalue);
6694 }
6695
6696failed:
6697
Daniel Veillard3772de32002-12-17 10:31:45 +00006698 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006699 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6700 break;
6701 if (!IS_BLANK(RAW)) {
6702 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6704 ctxt->sax->error(ctxt->userData,
6705 "attributes construct error\n");
6706 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006707 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006708 }
6709 SKIP_BLANKS;
6710 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6711 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6713 ctxt->sax->error(ctxt->userData,
6714 "xmlParseStartTag: problem parsing attributes\n");
6715 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006716 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006717 break;
6718 }
6719 GROW;
6720 }
6721
6722 /*
6723 * SAX: Start of Element !
6724 */
6725 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6726 (!ctxt->disableSAX))
6727 ctxt->sax->startElement(ctxt->userData, name, atts);
6728
6729 if (atts != NULL) {
6730 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6731 xmlFree((void *) atts);
6732 }
6733 return(name);
6734}
6735
6736/**
6737 * xmlParseEndTag:
6738 * @ctxt: an XML parser context
6739 *
6740 * parse an end of tag
6741 *
6742 * [42] ETag ::= '</' Name S? '>'
6743 *
6744 * With namespace
6745 *
6746 * [NS 9] ETag ::= '</' QName S? '>'
6747 */
6748
6749void
6750xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6751 xmlChar *name;
6752 xmlChar *oldname;
6753
6754 GROW;
6755 if ((RAW != '<') || (NXT(1) != '/')) {
6756 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6758 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6759 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006760 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006761 return;
6762 }
6763 SKIP(2);
6764
Daniel Veillard46de64e2002-05-29 08:21:33 +00006765 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006766
6767 /*
6768 * We should definitely be at the ending "S? '>'" part
6769 */
6770 GROW;
6771 SKIP_BLANKS;
6772 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6773 ctxt->errNo = XML_ERR_GT_REQUIRED;
6774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6775 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6776 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006777 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006778 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006779 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006780
6781 /*
6782 * [ WFC: Element Type Match ]
6783 * The Name in an element's end-tag must match the element type in the
6784 * start-tag.
6785 *
6786 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006787 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006788 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006790 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006791 ctxt->sax->error(ctxt->userData,
6792 "Opening and ending tag mismatch: %s and %s\n",
6793 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006794 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006795 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006796 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006797 }
6798
6799 }
6800 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006801 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6802#if 0
6803 else {
6804 /*
6805 * Recover in case of one missing close
6806 */
6807 if ((ctxt->nameNr > 2) &&
6808 (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) {
6809 namePop(ctxt);
6810 spacePop(ctxt);
6811 }
6812 }
6813#endif
6814 if (name != NULL)
6815 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006816 }
6817
6818 /*
6819 * SAX: End of Tag
6820 */
6821 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6822 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006823 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006824
Owen Taylor3473f882001-02-23 17:55:21 +00006825 oldname = namePop(ctxt);
6826 spacePop(ctxt);
6827 if (oldname != NULL) {
6828#ifdef DEBUG_STACK
6829 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6830#endif
6831 xmlFree(oldname);
6832 }
6833 return;
6834}
6835
6836/**
6837 * xmlParseCDSect:
6838 * @ctxt: an XML parser context
6839 *
6840 * Parse escaped pure raw content.
6841 *
6842 * [18] CDSect ::= CDStart CData CDEnd
6843 *
6844 * [19] CDStart ::= '<![CDATA['
6845 *
6846 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6847 *
6848 * [21] CDEnd ::= ']]>'
6849 */
6850void
6851xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6852 xmlChar *buf = NULL;
6853 int len = 0;
6854 int size = XML_PARSER_BUFFER_SIZE;
6855 int r, rl;
6856 int s, sl;
6857 int cur, l;
6858 int count = 0;
6859
6860 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6861 (NXT(2) == '[') && (NXT(3) == 'C') &&
6862 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6863 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6864 (NXT(8) == '[')) {
6865 SKIP(9);
6866 } else
6867 return;
6868
6869 ctxt->instate = XML_PARSER_CDATA_SECTION;
6870 r = CUR_CHAR(rl);
6871 if (!IS_CHAR(r)) {
6872 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6874 ctxt->sax->error(ctxt->userData,
6875 "CData section not finished\n");
6876 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006877 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006878 ctxt->instate = XML_PARSER_CONTENT;
6879 return;
6880 }
6881 NEXTL(rl);
6882 s = CUR_CHAR(sl);
6883 if (!IS_CHAR(s)) {
6884 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6885 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6886 ctxt->sax->error(ctxt->userData,
6887 "CData section not finished\n");
6888 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006889 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006890 ctxt->instate = XML_PARSER_CONTENT;
6891 return;
6892 }
6893 NEXTL(sl);
6894 cur = CUR_CHAR(l);
6895 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6896 if (buf == NULL) {
6897 xmlGenericError(xmlGenericErrorContext,
6898 "malloc of %d byte failed\n", size);
6899 return;
6900 }
6901 while (IS_CHAR(cur) &&
6902 ((r != ']') || (s != ']') || (cur != '>'))) {
6903 if (len + 5 >= size) {
6904 size *= 2;
6905 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6906 if (buf == NULL) {
6907 xmlGenericError(xmlGenericErrorContext,
6908 "realloc of %d byte failed\n", size);
6909 return;
6910 }
6911 }
6912 COPY_BUF(rl,buf,len,r);
6913 r = s;
6914 rl = sl;
6915 s = cur;
6916 sl = l;
6917 count++;
6918 if (count > 50) {
6919 GROW;
6920 count = 0;
6921 }
6922 NEXTL(l);
6923 cur = CUR_CHAR(l);
6924 }
6925 buf[len] = 0;
6926 ctxt->instate = XML_PARSER_CONTENT;
6927 if (cur != '>') {
6928 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6930 ctxt->sax->error(ctxt->userData,
6931 "CData section not finished\n%.50s\n", buf);
6932 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006933 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006934 xmlFree(buf);
6935 return;
6936 }
6937 NEXTL(l);
6938
6939 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006940 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006941 */
6942 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6943 if (ctxt->sax->cdataBlock != NULL)
6944 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006945 else if (ctxt->sax->characters != NULL)
6946 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006947 }
6948 xmlFree(buf);
6949}
6950
6951/**
6952 * xmlParseContent:
6953 * @ctxt: an XML parser context
6954 *
6955 * Parse a content:
6956 *
6957 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6958 */
6959
6960void
6961xmlParseContent(xmlParserCtxtPtr ctxt) {
6962 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006963 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006964 ((RAW != '<') || (NXT(1) != '/'))) {
6965 const xmlChar *test = CUR_PTR;
6966 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006967 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006968
6969 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006970 * First case : a Processing Instruction.
6971 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006972 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006973 xmlParsePI(ctxt);
6974 }
6975
6976 /*
6977 * Second case : a CDSection
6978 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006979 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006980 (NXT(2) == '[') && (NXT(3) == 'C') &&
6981 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6982 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6983 (NXT(8) == '[')) {
6984 xmlParseCDSect(ctxt);
6985 }
6986
6987 /*
6988 * Third case : a comment
6989 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006990 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006991 (NXT(2) == '-') && (NXT(3) == '-')) {
6992 xmlParseComment(ctxt);
6993 ctxt->instate = XML_PARSER_CONTENT;
6994 }
6995
6996 /*
6997 * Fourth case : a sub-element.
6998 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006999 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007000 xmlParseElement(ctxt);
7001 }
7002
7003 /*
7004 * Fifth case : a reference. If if has not been resolved,
7005 * parsing returns it's Name, create the node
7006 */
7007
Daniel Veillard21a0f912001-02-25 19:54:14 +00007008 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007009 xmlParseReference(ctxt);
7010 }
7011
7012 /*
7013 * Last case, text. Note that References are handled directly.
7014 */
7015 else {
7016 xmlParseCharData(ctxt, 0);
7017 }
7018
7019 GROW;
7020 /*
7021 * Pop-up of finished entities.
7022 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007023 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007024 xmlPopInput(ctxt);
7025 SHRINK;
7026
Daniel Veillardfdc91562002-07-01 21:52:03 +00007027 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007028 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7030 ctxt->sax->error(ctxt->userData,
7031 "detected an error in element content\n");
7032 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007033 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007034 ctxt->instate = XML_PARSER_EOF;
7035 break;
7036 }
7037 }
7038}
7039
7040/**
7041 * xmlParseElement:
7042 * @ctxt: an XML parser context
7043 *
7044 * parse an XML element, this is highly recursive
7045 *
7046 * [39] element ::= EmptyElemTag | STag content ETag
7047 *
7048 * [ WFC: Element Type Match ]
7049 * The Name in an element's end-tag must match the element type in the
7050 * start-tag.
7051 *
7052 * [ VC: Element Valid ]
7053 * An element is valid if there is a declaration matching elementdecl
7054 * where the Name matches the element type and one of the following holds:
7055 * - The declaration matches EMPTY and the element has no content.
7056 * - The declaration matches children and the sequence of child elements
7057 * belongs to the language generated by the regular expression in the
7058 * content model, with optional white space (characters matching the
7059 * nonterminal S) between each pair of child elements.
7060 * - The declaration matches Mixed and the content consists of character
7061 * data and child elements whose types match names in the content model.
7062 * - The declaration matches ANY, and the types of any child elements have
7063 * been declared.
7064 */
7065
7066void
7067xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007068 xmlChar *name;
7069 xmlChar *oldname;
7070 xmlParserNodeInfo node_info;
7071 xmlNodePtr ret;
7072
7073 /* Capture start position */
7074 if (ctxt->record_info) {
7075 node_info.begin_pos = ctxt->input->consumed +
7076 (CUR_PTR - ctxt->input->base);
7077 node_info.begin_line = ctxt->input->line;
7078 }
7079
7080 if (ctxt->spaceNr == 0)
7081 spacePush(ctxt, -1);
7082 else
7083 spacePush(ctxt, *ctxt->space);
7084
7085 name = xmlParseStartTag(ctxt);
7086 if (name == NULL) {
7087 spacePop(ctxt);
7088 return;
7089 }
7090 namePush(ctxt, name);
7091 ret = ctxt->node;
7092
7093 /*
7094 * [ VC: Root Element Type ]
7095 * The Name in the document type declaration must match the element
7096 * type of the root element.
7097 */
7098 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7099 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7100 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7101
7102 /*
7103 * Check for an Empty Element.
7104 */
7105 if ((RAW == '/') && (NXT(1) == '>')) {
7106 SKIP(2);
7107 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7108 (!ctxt->disableSAX))
7109 ctxt->sax->endElement(ctxt->userData, name);
7110 oldname = namePop(ctxt);
7111 spacePop(ctxt);
7112 if (oldname != NULL) {
7113#ifdef DEBUG_STACK
7114 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7115#endif
7116 xmlFree(oldname);
7117 }
7118 if ( ret != NULL && ctxt->record_info ) {
7119 node_info.end_pos = ctxt->input->consumed +
7120 (CUR_PTR - ctxt->input->base);
7121 node_info.end_line = ctxt->input->line;
7122 node_info.node = ret;
7123 xmlParserAddNodeInfo(ctxt, &node_info);
7124 }
7125 return;
7126 }
7127 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007128 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007129 } else {
7130 ctxt->errNo = XML_ERR_GT_REQUIRED;
7131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7132 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007133 "Couldn't find end of Start Tag %s\n",
7134 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007135 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007136 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007137
7138 /*
7139 * end of parsing of this node.
7140 */
7141 nodePop(ctxt);
7142 oldname = namePop(ctxt);
7143 spacePop(ctxt);
7144 if (oldname != NULL) {
7145#ifdef DEBUG_STACK
7146 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7147#endif
7148 xmlFree(oldname);
7149 }
7150
7151 /*
7152 * Capture end position and add node
7153 */
7154 if ( ret != NULL && ctxt->record_info ) {
7155 node_info.end_pos = ctxt->input->consumed +
7156 (CUR_PTR - ctxt->input->base);
7157 node_info.end_line = ctxt->input->line;
7158 node_info.node = ret;
7159 xmlParserAddNodeInfo(ctxt, &node_info);
7160 }
7161 return;
7162 }
7163
7164 /*
7165 * Parse the content of the element:
7166 */
7167 xmlParseContent(ctxt);
7168 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007169 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7171 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007172 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007173 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007174 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007175
7176 /*
7177 * end of parsing of this node.
7178 */
7179 nodePop(ctxt);
7180 oldname = namePop(ctxt);
7181 spacePop(ctxt);
7182 if (oldname != NULL) {
7183#ifdef DEBUG_STACK
7184 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7185#endif
7186 xmlFree(oldname);
7187 }
7188 return;
7189 }
7190
7191 /*
7192 * parse the end of tag: '</' should be here.
7193 */
7194 xmlParseEndTag(ctxt);
7195
7196 /*
7197 * Capture end position and add node
7198 */
7199 if ( ret != NULL && ctxt->record_info ) {
7200 node_info.end_pos = ctxt->input->consumed +
7201 (CUR_PTR - ctxt->input->base);
7202 node_info.end_line = ctxt->input->line;
7203 node_info.node = ret;
7204 xmlParserAddNodeInfo(ctxt, &node_info);
7205 }
7206}
7207
7208/**
7209 * xmlParseVersionNum:
7210 * @ctxt: an XML parser context
7211 *
7212 * parse the XML version value.
7213 *
7214 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7215 *
7216 * Returns the string giving the XML version number, or NULL
7217 */
7218xmlChar *
7219xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7220 xmlChar *buf = NULL;
7221 int len = 0;
7222 int size = 10;
7223 xmlChar cur;
7224
7225 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7226 if (buf == NULL) {
7227 xmlGenericError(xmlGenericErrorContext,
7228 "malloc of %d byte failed\n", size);
7229 return(NULL);
7230 }
7231 cur = CUR;
7232 while (((cur >= 'a') && (cur <= 'z')) ||
7233 ((cur >= 'A') && (cur <= 'Z')) ||
7234 ((cur >= '0') && (cur <= '9')) ||
7235 (cur == '_') || (cur == '.') ||
7236 (cur == ':') || (cur == '-')) {
7237 if (len + 1 >= size) {
7238 size *= 2;
7239 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7240 if (buf == NULL) {
7241 xmlGenericError(xmlGenericErrorContext,
7242 "realloc of %d byte failed\n", size);
7243 return(NULL);
7244 }
7245 }
7246 buf[len++] = cur;
7247 NEXT;
7248 cur=CUR;
7249 }
7250 buf[len] = 0;
7251 return(buf);
7252}
7253
7254/**
7255 * xmlParseVersionInfo:
7256 * @ctxt: an XML parser context
7257 *
7258 * parse the XML version.
7259 *
7260 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7261 *
7262 * [25] Eq ::= S? '=' S?
7263 *
7264 * Returns the version string, e.g. "1.0"
7265 */
7266
7267xmlChar *
7268xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7269 xmlChar *version = NULL;
7270 const xmlChar *q;
7271
7272 if ((RAW == 'v') && (NXT(1) == 'e') &&
7273 (NXT(2) == 'r') && (NXT(3) == 's') &&
7274 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7275 (NXT(6) == 'n')) {
7276 SKIP(7);
7277 SKIP_BLANKS;
7278 if (RAW != '=') {
7279 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7281 ctxt->sax->error(ctxt->userData,
7282 "xmlParseVersionInfo : expected '='\n");
7283 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007284 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007285 return(NULL);
7286 }
7287 NEXT;
7288 SKIP_BLANKS;
7289 if (RAW == '"') {
7290 NEXT;
7291 q = CUR_PTR;
7292 version = xmlParseVersionNum(ctxt);
7293 if (RAW != '"') {
7294 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7295 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7296 ctxt->sax->error(ctxt->userData,
7297 "String not closed\n%.50s\n", q);
7298 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007299 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007300 } else
7301 NEXT;
7302 } else if (RAW == '\''){
7303 NEXT;
7304 q = CUR_PTR;
7305 version = xmlParseVersionNum(ctxt);
7306 if (RAW != '\'') {
7307 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7308 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7309 ctxt->sax->error(ctxt->userData,
7310 "String not closed\n%.50s\n", q);
7311 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007312 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007313 } else
7314 NEXT;
7315 } else {
7316 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7317 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7318 ctxt->sax->error(ctxt->userData,
7319 "xmlParseVersionInfo : expected ' or \"\n");
7320 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007321 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007322 }
7323 }
7324 return(version);
7325}
7326
7327/**
7328 * xmlParseEncName:
7329 * @ctxt: an XML parser context
7330 *
7331 * parse the XML encoding name
7332 *
7333 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7334 *
7335 * Returns the encoding name value or NULL
7336 */
7337xmlChar *
7338xmlParseEncName(xmlParserCtxtPtr ctxt) {
7339 xmlChar *buf = NULL;
7340 int len = 0;
7341 int size = 10;
7342 xmlChar cur;
7343
7344 cur = CUR;
7345 if (((cur >= 'a') && (cur <= 'z')) ||
7346 ((cur >= 'A') && (cur <= 'Z'))) {
7347 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7348 if (buf == NULL) {
7349 xmlGenericError(xmlGenericErrorContext,
7350 "malloc of %d byte failed\n", size);
7351 return(NULL);
7352 }
7353
7354 buf[len++] = cur;
7355 NEXT;
7356 cur = CUR;
7357 while (((cur >= 'a') && (cur <= 'z')) ||
7358 ((cur >= 'A') && (cur <= 'Z')) ||
7359 ((cur >= '0') && (cur <= '9')) ||
7360 (cur == '.') || (cur == '_') ||
7361 (cur == '-')) {
7362 if (len + 1 >= size) {
7363 size *= 2;
7364 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7365 if (buf == NULL) {
7366 xmlGenericError(xmlGenericErrorContext,
7367 "realloc of %d byte failed\n", size);
7368 return(NULL);
7369 }
7370 }
7371 buf[len++] = cur;
7372 NEXT;
7373 cur = CUR;
7374 if (cur == 0) {
7375 SHRINK;
7376 GROW;
7377 cur = CUR;
7378 }
7379 }
7380 buf[len] = 0;
7381 } else {
7382 ctxt->errNo = XML_ERR_ENCODING_NAME;
7383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7384 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7385 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007386 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007387 }
7388 return(buf);
7389}
7390
7391/**
7392 * xmlParseEncodingDecl:
7393 * @ctxt: an XML parser context
7394 *
7395 * parse the XML encoding declaration
7396 *
7397 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7398 *
7399 * this setups the conversion filters.
7400 *
7401 * Returns the encoding value or NULL
7402 */
7403
7404xmlChar *
7405xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7406 xmlChar *encoding = NULL;
7407 const xmlChar *q;
7408
7409 SKIP_BLANKS;
7410 if ((RAW == 'e') && (NXT(1) == 'n') &&
7411 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7412 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7413 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7414 SKIP(8);
7415 SKIP_BLANKS;
7416 if (RAW != '=') {
7417 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7418 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7419 ctxt->sax->error(ctxt->userData,
7420 "xmlParseEncodingDecl : expected '='\n");
7421 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007422 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007423 return(NULL);
7424 }
7425 NEXT;
7426 SKIP_BLANKS;
7427 if (RAW == '"') {
7428 NEXT;
7429 q = CUR_PTR;
7430 encoding = xmlParseEncName(ctxt);
7431 if (RAW != '"') {
7432 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7433 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7434 ctxt->sax->error(ctxt->userData,
7435 "String not closed\n%.50s\n", q);
7436 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007437 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007438 } else
7439 NEXT;
7440 } else if (RAW == '\''){
7441 NEXT;
7442 q = CUR_PTR;
7443 encoding = xmlParseEncName(ctxt);
7444 if (RAW != '\'') {
7445 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7446 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7447 ctxt->sax->error(ctxt->userData,
7448 "String not closed\n%.50s\n", q);
7449 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007450 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007451 } else
7452 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007453 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007454 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7456 ctxt->sax->error(ctxt->userData,
7457 "xmlParseEncodingDecl : expected ' or \"\n");
7458 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007459 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007460 }
7461 if (encoding != NULL) {
7462 xmlCharEncoding enc;
7463 xmlCharEncodingHandlerPtr handler;
7464
7465 if (ctxt->input->encoding != NULL)
7466 xmlFree((xmlChar *) ctxt->input->encoding);
7467 ctxt->input->encoding = encoding;
7468
7469 enc = xmlParseCharEncoding((const char *) encoding);
7470 /*
7471 * registered set of known encodings
7472 */
7473 if (enc != XML_CHAR_ENCODING_ERROR) {
7474 xmlSwitchEncoding(ctxt, enc);
7475 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007476 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007477 xmlFree(encoding);
7478 return(NULL);
7479 }
7480 } else {
7481 /*
7482 * fallback for unknown encodings
7483 */
7484 handler = xmlFindCharEncodingHandler((const char *) encoding);
7485 if (handler != NULL) {
7486 xmlSwitchToEncoding(ctxt, handler);
7487 } else {
7488 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7490 ctxt->sax->error(ctxt->userData,
7491 "Unsupported encoding %s\n", encoding);
7492 return(NULL);
7493 }
7494 }
7495 }
7496 }
7497 return(encoding);
7498}
7499
7500/**
7501 * xmlParseSDDecl:
7502 * @ctxt: an XML parser context
7503 *
7504 * parse the XML standalone declaration
7505 *
7506 * [32] SDDecl ::= S 'standalone' Eq
7507 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7508 *
7509 * [ VC: Standalone Document Declaration ]
7510 * TODO The standalone document declaration must have the value "no"
7511 * if any external markup declarations contain declarations of:
7512 * - attributes with default values, if elements to which these
7513 * attributes apply appear in the document without specifications
7514 * of values for these attributes, or
7515 * - entities (other than amp, lt, gt, apos, quot), if references
7516 * to those entities appear in the document, or
7517 * - attributes with values subject to normalization, where the
7518 * attribute appears in the document with a value which will change
7519 * as a result of normalization, or
7520 * - element types with element content, if white space occurs directly
7521 * within any instance of those types.
7522 *
7523 * Returns 1 if standalone, 0 otherwise
7524 */
7525
7526int
7527xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7528 int standalone = -1;
7529
7530 SKIP_BLANKS;
7531 if ((RAW == 's') && (NXT(1) == 't') &&
7532 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7533 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7534 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7535 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7536 SKIP(10);
7537 SKIP_BLANKS;
7538 if (RAW != '=') {
7539 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7540 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7541 ctxt->sax->error(ctxt->userData,
7542 "XML standalone declaration : expected '='\n");
7543 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007544 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007545 return(standalone);
7546 }
7547 NEXT;
7548 SKIP_BLANKS;
7549 if (RAW == '\''){
7550 NEXT;
7551 if ((RAW == 'n') && (NXT(1) == 'o')) {
7552 standalone = 0;
7553 SKIP(2);
7554 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7555 (NXT(2) == 's')) {
7556 standalone = 1;
7557 SKIP(3);
7558 } else {
7559 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7561 ctxt->sax->error(ctxt->userData,
7562 "standalone accepts only 'yes' or 'no'\n");
7563 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007564 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007565 }
7566 if (RAW != '\'') {
7567 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7569 ctxt->sax->error(ctxt->userData, "String not closed\n");
7570 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007571 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007572 } else
7573 NEXT;
7574 } else if (RAW == '"'){
7575 NEXT;
7576 if ((RAW == 'n') && (NXT(1) == 'o')) {
7577 standalone = 0;
7578 SKIP(2);
7579 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7580 (NXT(2) == 's')) {
7581 standalone = 1;
7582 SKIP(3);
7583 } else {
7584 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7586 ctxt->sax->error(ctxt->userData,
7587 "standalone accepts only 'yes' or 'no'\n");
7588 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007589 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007590 }
7591 if (RAW != '"') {
7592 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7594 ctxt->sax->error(ctxt->userData, "String not closed\n");
7595 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007596 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007597 } else
7598 NEXT;
7599 } else {
7600 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7602 ctxt->sax->error(ctxt->userData,
7603 "Standalone value not found\n");
7604 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007605 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007606 }
7607 }
7608 return(standalone);
7609}
7610
7611/**
7612 * xmlParseXMLDecl:
7613 * @ctxt: an XML parser context
7614 *
7615 * parse an XML declaration header
7616 *
7617 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7618 */
7619
7620void
7621xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7622 xmlChar *version;
7623
7624 /*
7625 * We know that '<?xml' is here.
7626 */
7627 SKIP(5);
7628
7629 if (!IS_BLANK(RAW)) {
7630 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7632 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7633 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007634 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007635 }
7636 SKIP_BLANKS;
7637
7638 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007639 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007640 */
7641 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007642 if (version == NULL) {
7643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7644 ctxt->sax->error(ctxt->userData,
7645 "Malformed declaration expecting version\n");
7646 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007647 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007648 } else {
7649 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7650 /*
7651 * TODO: Blueberry should be detected here
7652 */
7653 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7654 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7655 version);
7656 }
7657 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007658 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007659 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007660 }
Owen Taylor3473f882001-02-23 17:55:21 +00007661
7662 /*
7663 * We may have the encoding declaration
7664 */
7665 if (!IS_BLANK(RAW)) {
7666 if ((RAW == '?') && (NXT(1) == '>')) {
7667 SKIP(2);
7668 return;
7669 }
7670 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7672 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7673 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007674 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007675 }
7676 xmlParseEncodingDecl(ctxt);
7677 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7678 /*
7679 * The XML REC instructs us to stop parsing right here
7680 */
7681 return;
7682 }
7683
7684 /*
7685 * We may have the standalone status.
7686 */
7687 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7688 if ((RAW == '?') && (NXT(1) == '>')) {
7689 SKIP(2);
7690 return;
7691 }
7692 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7694 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7695 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007696 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007697 }
7698 SKIP_BLANKS;
7699 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7700
7701 SKIP_BLANKS;
7702 if ((RAW == '?') && (NXT(1) == '>')) {
7703 SKIP(2);
7704 } else if (RAW == '>') {
7705 /* Deprecated old WD ... */
7706 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7708 ctxt->sax->error(ctxt->userData,
7709 "XML declaration must end-up with '?>'\n");
7710 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007711 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007712 NEXT;
7713 } else {
7714 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7716 ctxt->sax->error(ctxt->userData,
7717 "parsing XML declaration: '?>' expected\n");
7718 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007719 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007720 MOVETO_ENDTAG(CUR_PTR);
7721 NEXT;
7722 }
7723}
7724
7725/**
7726 * xmlParseMisc:
7727 * @ctxt: an XML parser context
7728 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007729 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007730 *
7731 * [27] Misc ::= Comment | PI | S
7732 */
7733
7734void
7735xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007736 while (((RAW == '<') && (NXT(1) == '?')) ||
7737 ((RAW == '<') && (NXT(1) == '!') &&
7738 (NXT(2) == '-') && (NXT(3) == '-')) ||
7739 IS_BLANK(CUR)) {
7740 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007741 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007742 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007743 NEXT;
7744 } else
7745 xmlParseComment(ctxt);
7746 }
7747}
7748
7749/**
7750 * xmlParseDocument:
7751 * @ctxt: an XML parser context
7752 *
7753 * parse an XML document (and build a tree if using the standard SAX
7754 * interface).
7755 *
7756 * [1] document ::= prolog element Misc*
7757 *
7758 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7759 *
7760 * Returns 0, -1 in case of error. the parser context is augmented
7761 * as a result of the parsing.
7762 */
7763
7764int
7765xmlParseDocument(xmlParserCtxtPtr ctxt) {
7766 xmlChar start[4];
7767 xmlCharEncoding enc;
7768
7769 xmlInitParser();
7770
7771 GROW;
7772
7773 /*
7774 * SAX: beginning of the document processing.
7775 */
7776 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7777 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7778
Daniel Veillard50f34372001-08-03 12:06:36 +00007779 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007780 /*
7781 * Get the 4 first bytes and decode the charset
7782 * if enc != XML_CHAR_ENCODING_NONE
7783 * plug some encoding conversion routines.
7784 */
7785 start[0] = RAW;
7786 start[1] = NXT(1);
7787 start[2] = NXT(2);
7788 start[3] = NXT(3);
7789 enc = xmlDetectCharEncoding(start, 4);
7790 if (enc != XML_CHAR_ENCODING_NONE) {
7791 xmlSwitchEncoding(ctxt, enc);
7792 }
Owen Taylor3473f882001-02-23 17:55:21 +00007793 }
7794
7795
7796 if (CUR == 0) {
7797 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7799 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7800 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007801 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007802 }
7803
7804 /*
7805 * Check for the XMLDecl in the Prolog.
7806 */
7807 GROW;
7808 if ((RAW == '<') && (NXT(1) == '?') &&
7809 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7810 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7811
7812 /*
7813 * Note that we will switch encoding on the fly.
7814 */
7815 xmlParseXMLDecl(ctxt);
7816 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7817 /*
7818 * The XML REC instructs us to stop parsing right here
7819 */
7820 return(-1);
7821 }
7822 ctxt->standalone = ctxt->input->standalone;
7823 SKIP_BLANKS;
7824 } else {
7825 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7826 }
7827 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7828 ctxt->sax->startDocument(ctxt->userData);
7829
7830 /*
7831 * The Misc part of the Prolog
7832 */
7833 GROW;
7834 xmlParseMisc(ctxt);
7835
7836 /*
7837 * Then possibly doc type declaration(s) and more Misc
7838 * (doctypedecl Misc*)?
7839 */
7840 GROW;
7841 if ((RAW == '<') && (NXT(1) == '!') &&
7842 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7843 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7844 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7845 (NXT(8) == 'E')) {
7846
7847 ctxt->inSubset = 1;
7848 xmlParseDocTypeDecl(ctxt);
7849 if (RAW == '[') {
7850 ctxt->instate = XML_PARSER_DTD;
7851 xmlParseInternalSubset(ctxt);
7852 }
7853
7854 /*
7855 * Create and update the external subset.
7856 */
7857 ctxt->inSubset = 2;
7858 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7859 (!ctxt->disableSAX))
7860 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7861 ctxt->extSubSystem, ctxt->extSubURI);
7862 ctxt->inSubset = 0;
7863
7864
7865 ctxt->instate = XML_PARSER_PROLOG;
7866 xmlParseMisc(ctxt);
7867 }
7868
7869 /*
7870 * Time to start parsing the tree itself
7871 */
7872 GROW;
7873 if (RAW != '<') {
7874 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7876 ctxt->sax->error(ctxt->userData,
7877 "Start tag expected, '<' not found\n");
7878 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007879 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007880 ctxt->instate = XML_PARSER_EOF;
7881 } else {
7882 ctxt->instate = XML_PARSER_CONTENT;
7883 xmlParseElement(ctxt);
7884 ctxt->instate = XML_PARSER_EPILOG;
7885
7886
7887 /*
7888 * The Misc part at the end
7889 */
7890 xmlParseMisc(ctxt);
7891
Daniel Veillard561b7f82002-03-20 21:55:57 +00007892 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007893 ctxt->errNo = XML_ERR_DOCUMENT_END;
7894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7895 ctxt->sax->error(ctxt->userData,
7896 "Extra content at the end of the document\n");
7897 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007898 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007899 }
7900 ctxt->instate = XML_PARSER_EOF;
7901 }
7902
7903 /*
7904 * SAX: end of the document processing.
7905 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007906 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007907 ctxt->sax->endDocument(ctxt->userData);
7908
Daniel Veillard5997aca2002-03-18 18:36:20 +00007909 /*
7910 * Remove locally kept entity definitions if the tree was not built
7911 */
7912 if ((ctxt->myDoc != NULL) &&
7913 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7914 xmlFreeDoc(ctxt->myDoc);
7915 ctxt->myDoc = NULL;
7916 }
7917
Daniel Veillardc7612992002-02-17 22:47:37 +00007918 if (! ctxt->wellFormed) {
7919 ctxt->valid = 0;
7920 return(-1);
7921 }
Owen Taylor3473f882001-02-23 17:55:21 +00007922 return(0);
7923}
7924
7925/**
7926 * xmlParseExtParsedEnt:
7927 * @ctxt: an XML parser context
7928 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007929 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007930 * An external general parsed entity is well-formed if it matches the
7931 * production labeled extParsedEnt.
7932 *
7933 * [78] extParsedEnt ::= TextDecl? content
7934 *
7935 * Returns 0, -1 in case of error. the parser context is augmented
7936 * as a result of the parsing.
7937 */
7938
7939int
7940xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7941 xmlChar start[4];
7942 xmlCharEncoding enc;
7943
7944 xmlDefaultSAXHandlerInit();
7945
7946 GROW;
7947
7948 /*
7949 * SAX: beginning of the document processing.
7950 */
7951 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7952 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7953
7954 /*
7955 * Get the 4 first bytes and decode the charset
7956 * if enc != XML_CHAR_ENCODING_NONE
7957 * plug some encoding conversion routines.
7958 */
7959 start[0] = RAW;
7960 start[1] = NXT(1);
7961 start[2] = NXT(2);
7962 start[3] = NXT(3);
7963 enc = xmlDetectCharEncoding(start, 4);
7964 if (enc != XML_CHAR_ENCODING_NONE) {
7965 xmlSwitchEncoding(ctxt, enc);
7966 }
7967
7968
7969 if (CUR == 0) {
7970 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7972 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7973 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007974 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007975 }
7976
7977 /*
7978 * Check for the XMLDecl in the Prolog.
7979 */
7980 GROW;
7981 if ((RAW == '<') && (NXT(1) == '?') &&
7982 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7983 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7984
7985 /*
7986 * Note that we will switch encoding on the fly.
7987 */
7988 xmlParseXMLDecl(ctxt);
7989 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7990 /*
7991 * The XML REC instructs us to stop parsing right here
7992 */
7993 return(-1);
7994 }
7995 SKIP_BLANKS;
7996 } else {
7997 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7998 }
7999 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8000 ctxt->sax->startDocument(ctxt->userData);
8001
8002 /*
8003 * Doing validity checking on chunk doesn't make sense
8004 */
8005 ctxt->instate = XML_PARSER_CONTENT;
8006 ctxt->validate = 0;
8007 ctxt->loadsubset = 0;
8008 ctxt->depth = 0;
8009
8010 xmlParseContent(ctxt);
8011
8012 if ((RAW == '<') && (NXT(1) == '/')) {
8013 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8014 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8015 ctxt->sax->error(ctxt->userData,
8016 "chunk is not well balanced\n");
8017 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008018 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008019 } else if (RAW != 0) {
8020 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8022 ctxt->sax->error(ctxt->userData,
8023 "extra content at the end of well balanced chunk\n");
8024 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008025 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008026 }
8027
8028 /*
8029 * SAX: end of the document processing.
8030 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008031 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008032 ctxt->sax->endDocument(ctxt->userData);
8033
8034 if (! ctxt->wellFormed) return(-1);
8035 return(0);
8036}
8037
8038/************************************************************************
8039 * *
8040 * Progressive parsing interfaces *
8041 * *
8042 ************************************************************************/
8043
8044/**
8045 * xmlParseLookupSequence:
8046 * @ctxt: an XML parser context
8047 * @first: the first char to lookup
8048 * @next: the next char to lookup or zero
8049 * @third: the next char to lookup or zero
8050 *
8051 * Try to find if a sequence (first, next, third) or just (first next) or
8052 * (first) is available in the input stream.
8053 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8054 * to avoid rescanning sequences of bytes, it DOES change the state of the
8055 * parser, do not use liberally.
8056 *
8057 * Returns the index to the current parsing point if the full sequence
8058 * is available, -1 otherwise.
8059 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008060static int
Owen Taylor3473f882001-02-23 17:55:21 +00008061xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8062 xmlChar next, xmlChar third) {
8063 int base, len;
8064 xmlParserInputPtr in;
8065 const xmlChar *buf;
8066
8067 in = ctxt->input;
8068 if (in == NULL) return(-1);
8069 base = in->cur - in->base;
8070 if (base < 0) return(-1);
8071 if (ctxt->checkIndex > base)
8072 base = ctxt->checkIndex;
8073 if (in->buf == NULL) {
8074 buf = in->base;
8075 len = in->length;
8076 } else {
8077 buf = in->buf->buffer->content;
8078 len = in->buf->buffer->use;
8079 }
8080 /* take into account the sequence length */
8081 if (third) len -= 2;
8082 else if (next) len --;
8083 for (;base < len;base++) {
8084 if (buf[base] == first) {
8085 if (third != 0) {
8086 if ((buf[base + 1] != next) ||
8087 (buf[base + 2] != third)) continue;
8088 } else if (next != 0) {
8089 if (buf[base + 1] != next) continue;
8090 }
8091 ctxt->checkIndex = 0;
8092#ifdef DEBUG_PUSH
8093 if (next == 0)
8094 xmlGenericError(xmlGenericErrorContext,
8095 "PP: lookup '%c' found at %d\n",
8096 first, base);
8097 else if (third == 0)
8098 xmlGenericError(xmlGenericErrorContext,
8099 "PP: lookup '%c%c' found at %d\n",
8100 first, next, base);
8101 else
8102 xmlGenericError(xmlGenericErrorContext,
8103 "PP: lookup '%c%c%c' found at %d\n",
8104 first, next, third, base);
8105#endif
8106 return(base - (in->cur - in->base));
8107 }
8108 }
8109 ctxt->checkIndex = base;
8110#ifdef DEBUG_PUSH
8111 if (next == 0)
8112 xmlGenericError(xmlGenericErrorContext,
8113 "PP: lookup '%c' failed\n", first);
8114 else if (third == 0)
8115 xmlGenericError(xmlGenericErrorContext,
8116 "PP: lookup '%c%c' failed\n", first, next);
8117 else
8118 xmlGenericError(xmlGenericErrorContext,
8119 "PP: lookup '%c%c%c' failed\n", first, next, third);
8120#endif
8121 return(-1);
8122}
8123
8124/**
8125 * xmlParseTryOrFinish:
8126 * @ctxt: an XML parser context
8127 * @terminate: last chunk indicator
8128 *
8129 * Try to progress on parsing
8130 *
8131 * Returns zero if no parsing was possible
8132 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008133static int
Owen Taylor3473f882001-02-23 17:55:21 +00008134xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8135 int ret = 0;
8136 int avail;
8137 xmlChar cur, next;
8138
8139#ifdef DEBUG_PUSH
8140 switch (ctxt->instate) {
8141 case XML_PARSER_EOF:
8142 xmlGenericError(xmlGenericErrorContext,
8143 "PP: try EOF\n"); break;
8144 case XML_PARSER_START:
8145 xmlGenericError(xmlGenericErrorContext,
8146 "PP: try START\n"); break;
8147 case XML_PARSER_MISC:
8148 xmlGenericError(xmlGenericErrorContext,
8149 "PP: try MISC\n");break;
8150 case XML_PARSER_COMMENT:
8151 xmlGenericError(xmlGenericErrorContext,
8152 "PP: try COMMENT\n");break;
8153 case XML_PARSER_PROLOG:
8154 xmlGenericError(xmlGenericErrorContext,
8155 "PP: try PROLOG\n");break;
8156 case XML_PARSER_START_TAG:
8157 xmlGenericError(xmlGenericErrorContext,
8158 "PP: try START_TAG\n");break;
8159 case XML_PARSER_CONTENT:
8160 xmlGenericError(xmlGenericErrorContext,
8161 "PP: try CONTENT\n");break;
8162 case XML_PARSER_CDATA_SECTION:
8163 xmlGenericError(xmlGenericErrorContext,
8164 "PP: try CDATA_SECTION\n");break;
8165 case XML_PARSER_END_TAG:
8166 xmlGenericError(xmlGenericErrorContext,
8167 "PP: try END_TAG\n");break;
8168 case XML_PARSER_ENTITY_DECL:
8169 xmlGenericError(xmlGenericErrorContext,
8170 "PP: try ENTITY_DECL\n");break;
8171 case XML_PARSER_ENTITY_VALUE:
8172 xmlGenericError(xmlGenericErrorContext,
8173 "PP: try ENTITY_VALUE\n");break;
8174 case XML_PARSER_ATTRIBUTE_VALUE:
8175 xmlGenericError(xmlGenericErrorContext,
8176 "PP: try ATTRIBUTE_VALUE\n");break;
8177 case XML_PARSER_DTD:
8178 xmlGenericError(xmlGenericErrorContext,
8179 "PP: try DTD\n");break;
8180 case XML_PARSER_EPILOG:
8181 xmlGenericError(xmlGenericErrorContext,
8182 "PP: try EPILOG\n");break;
8183 case XML_PARSER_PI:
8184 xmlGenericError(xmlGenericErrorContext,
8185 "PP: try PI\n");break;
8186 case XML_PARSER_IGNORE:
8187 xmlGenericError(xmlGenericErrorContext,
8188 "PP: try IGNORE\n");break;
8189 }
8190#endif
8191
8192 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008193 SHRINK;
8194
Owen Taylor3473f882001-02-23 17:55:21 +00008195 /*
8196 * Pop-up of finished entities.
8197 */
8198 while ((RAW == 0) && (ctxt->inputNr > 1))
8199 xmlPopInput(ctxt);
8200
8201 if (ctxt->input ==NULL) break;
8202 if (ctxt->input->buf == NULL)
8203 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008204 else {
8205 /*
8206 * If we are operating on converted input, try to flush
8207 * remainng chars to avoid them stalling in the non-converted
8208 * buffer.
8209 */
8210 if ((ctxt->input->buf->raw != NULL) &&
8211 (ctxt->input->buf->raw->use > 0)) {
8212 int base = ctxt->input->base -
8213 ctxt->input->buf->buffer->content;
8214 int current = ctxt->input->cur - ctxt->input->base;
8215
8216 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8217 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8218 ctxt->input->cur = ctxt->input->base + current;
8219 ctxt->input->end =
8220 &ctxt->input->buf->buffer->content[
8221 ctxt->input->buf->buffer->use];
8222 }
8223 avail = ctxt->input->buf->buffer->use -
8224 (ctxt->input->cur - ctxt->input->base);
8225 }
Owen Taylor3473f882001-02-23 17:55:21 +00008226 if (avail < 1)
8227 goto done;
8228 switch (ctxt->instate) {
8229 case XML_PARSER_EOF:
8230 /*
8231 * Document parsing is done !
8232 */
8233 goto done;
8234 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008235 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8236 xmlChar start[4];
8237 xmlCharEncoding enc;
8238
8239 /*
8240 * Very first chars read from the document flow.
8241 */
8242 if (avail < 4)
8243 goto done;
8244
8245 /*
8246 * Get the 4 first bytes and decode the charset
8247 * if enc != XML_CHAR_ENCODING_NONE
8248 * plug some encoding conversion routines.
8249 */
8250 start[0] = RAW;
8251 start[1] = NXT(1);
8252 start[2] = NXT(2);
8253 start[3] = NXT(3);
8254 enc = xmlDetectCharEncoding(start, 4);
8255 if (enc != XML_CHAR_ENCODING_NONE) {
8256 xmlSwitchEncoding(ctxt, enc);
8257 }
8258 break;
8259 }
Owen Taylor3473f882001-02-23 17:55:21 +00008260
8261 cur = ctxt->input->cur[0];
8262 next = ctxt->input->cur[1];
8263 if (cur == 0) {
8264 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8265 ctxt->sax->setDocumentLocator(ctxt->userData,
8266 &xmlDefaultSAXLocator);
8267 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8269 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8270 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008271 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008272 ctxt->instate = XML_PARSER_EOF;
8273#ifdef DEBUG_PUSH
8274 xmlGenericError(xmlGenericErrorContext,
8275 "PP: entering EOF\n");
8276#endif
8277 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8278 ctxt->sax->endDocument(ctxt->userData);
8279 goto done;
8280 }
8281 if ((cur == '<') && (next == '?')) {
8282 /* PI or XML decl */
8283 if (avail < 5) return(ret);
8284 if ((!terminate) &&
8285 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8286 return(ret);
8287 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8288 ctxt->sax->setDocumentLocator(ctxt->userData,
8289 &xmlDefaultSAXLocator);
8290 if ((ctxt->input->cur[2] == 'x') &&
8291 (ctxt->input->cur[3] == 'm') &&
8292 (ctxt->input->cur[4] == 'l') &&
8293 (IS_BLANK(ctxt->input->cur[5]))) {
8294 ret += 5;
8295#ifdef DEBUG_PUSH
8296 xmlGenericError(xmlGenericErrorContext,
8297 "PP: Parsing XML Decl\n");
8298#endif
8299 xmlParseXMLDecl(ctxt);
8300 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8301 /*
8302 * The XML REC instructs us to stop parsing right
8303 * here
8304 */
8305 ctxt->instate = XML_PARSER_EOF;
8306 return(0);
8307 }
8308 ctxt->standalone = ctxt->input->standalone;
8309 if ((ctxt->encoding == NULL) &&
8310 (ctxt->input->encoding != NULL))
8311 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8312 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8313 (!ctxt->disableSAX))
8314 ctxt->sax->startDocument(ctxt->userData);
8315 ctxt->instate = XML_PARSER_MISC;
8316#ifdef DEBUG_PUSH
8317 xmlGenericError(xmlGenericErrorContext,
8318 "PP: entering MISC\n");
8319#endif
8320 } else {
8321 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8322 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8323 (!ctxt->disableSAX))
8324 ctxt->sax->startDocument(ctxt->userData);
8325 ctxt->instate = XML_PARSER_MISC;
8326#ifdef DEBUG_PUSH
8327 xmlGenericError(xmlGenericErrorContext,
8328 "PP: entering MISC\n");
8329#endif
8330 }
8331 } else {
8332 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8333 ctxt->sax->setDocumentLocator(ctxt->userData,
8334 &xmlDefaultSAXLocator);
8335 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8336 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8337 (!ctxt->disableSAX))
8338 ctxt->sax->startDocument(ctxt->userData);
8339 ctxt->instate = XML_PARSER_MISC;
8340#ifdef DEBUG_PUSH
8341 xmlGenericError(xmlGenericErrorContext,
8342 "PP: entering MISC\n");
8343#endif
8344 }
8345 break;
8346 case XML_PARSER_MISC:
8347 SKIP_BLANKS;
8348 if (ctxt->input->buf == NULL)
8349 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8350 else
8351 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8352 if (avail < 2)
8353 goto done;
8354 cur = ctxt->input->cur[0];
8355 next = ctxt->input->cur[1];
8356 if ((cur == '<') && (next == '?')) {
8357 if ((!terminate) &&
8358 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8359 goto done;
8360#ifdef DEBUG_PUSH
8361 xmlGenericError(xmlGenericErrorContext,
8362 "PP: Parsing PI\n");
8363#endif
8364 xmlParsePI(ctxt);
8365 } else if ((cur == '<') && (next == '!') &&
8366 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8367 if ((!terminate) &&
8368 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8369 goto done;
8370#ifdef DEBUG_PUSH
8371 xmlGenericError(xmlGenericErrorContext,
8372 "PP: Parsing Comment\n");
8373#endif
8374 xmlParseComment(ctxt);
8375 ctxt->instate = XML_PARSER_MISC;
8376 } else if ((cur == '<') && (next == '!') &&
8377 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8378 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8379 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8380 (ctxt->input->cur[8] == 'E')) {
8381 if ((!terminate) &&
8382 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8383 goto done;
8384#ifdef DEBUG_PUSH
8385 xmlGenericError(xmlGenericErrorContext,
8386 "PP: Parsing internal subset\n");
8387#endif
8388 ctxt->inSubset = 1;
8389 xmlParseDocTypeDecl(ctxt);
8390 if (RAW == '[') {
8391 ctxt->instate = XML_PARSER_DTD;
8392#ifdef DEBUG_PUSH
8393 xmlGenericError(xmlGenericErrorContext,
8394 "PP: entering DTD\n");
8395#endif
8396 } else {
8397 /*
8398 * Create and update the external subset.
8399 */
8400 ctxt->inSubset = 2;
8401 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8402 (ctxt->sax->externalSubset != NULL))
8403 ctxt->sax->externalSubset(ctxt->userData,
8404 ctxt->intSubName, ctxt->extSubSystem,
8405 ctxt->extSubURI);
8406 ctxt->inSubset = 0;
8407 ctxt->instate = XML_PARSER_PROLOG;
8408#ifdef DEBUG_PUSH
8409 xmlGenericError(xmlGenericErrorContext,
8410 "PP: entering PROLOG\n");
8411#endif
8412 }
8413 } else if ((cur == '<') && (next == '!') &&
8414 (avail < 9)) {
8415 goto done;
8416 } else {
8417 ctxt->instate = XML_PARSER_START_TAG;
8418#ifdef DEBUG_PUSH
8419 xmlGenericError(xmlGenericErrorContext,
8420 "PP: entering START_TAG\n");
8421#endif
8422 }
8423 break;
8424 case XML_PARSER_IGNORE:
8425 xmlGenericError(xmlGenericErrorContext,
8426 "PP: internal error, state == IGNORE");
8427 ctxt->instate = XML_PARSER_DTD;
8428#ifdef DEBUG_PUSH
8429 xmlGenericError(xmlGenericErrorContext,
8430 "PP: entering DTD\n");
8431#endif
8432 break;
8433 case XML_PARSER_PROLOG:
8434 SKIP_BLANKS;
8435 if (ctxt->input->buf == NULL)
8436 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8437 else
8438 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8439 if (avail < 2)
8440 goto done;
8441 cur = ctxt->input->cur[0];
8442 next = ctxt->input->cur[1];
8443 if ((cur == '<') && (next == '?')) {
8444 if ((!terminate) &&
8445 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8446 goto done;
8447#ifdef DEBUG_PUSH
8448 xmlGenericError(xmlGenericErrorContext,
8449 "PP: Parsing PI\n");
8450#endif
8451 xmlParsePI(ctxt);
8452 } else if ((cur == '<') && (next == '!') &&
8453 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8454 if ((!terminate) &&
8455 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8456 goto done;
8457#ifdef DEBUG_PUSH
8458 xmlGenericError(xmlGenericErrorContext,
8459 "PP: Parsing Comment\n");
8460#endif
8461 xmlParseComment(ctxt);
8462 ctxt->instate = XML_PARSER_PROLOG;
8463 } else if ((cur == '<') && (next == '!') &&
8464 (avail < 4)) {
8465 goto done;
8466 } else {
8467 ctxt->instate = XML_PARSER_START_TAG;
8468#ifdef DEBUG_PUSH
8469 xmlGenericError(xmlGenericErrorContext,
8470 "PP: entering START_TAG\n");
8471#endif
8472 }
8473 break;
8474 case XML_PARSER_EPILOG:
8475 SKIP_BLANKS;
8476 if (ctxt->input->buf == NULL)
8477 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8478 else
8479 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8480 if (avail < 2)
8481 goto done;
8482 cur = ctxt->input->cur[0];
8483 next = ctxt->input->cur[1];
8484 if ((cur == '<') && (next == '?')) {
8485 if ((!terminate) &&
8486 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8487 goto done;
8488#ifdef DEBUG_PUSH
8489 xmlGenericError(xmlGenericErrorContext,
8490 "PP: Parsing PI\n");
8491#endif
8492 xmlParsePI(ctxt);
8493 ctxt->instate = XML_PARSER_EPILOG;
8494 } else if ((cur == '<') && (next == '!') &&
8495 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8496 if ((!terminate) &&
8497 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8498 goto done;
8499#ifdef DEBUG_PUSH
8500 xmlGenericError(xmlGenericErrorContext,
8501 "PP: Parsing Comment\n");
8502#endif
8503 xmlParseComment(ctxt);
8504 ctxt->instate = XML_PARSER_EPILOG;
8505 } else if ((cur == '<') && (next == '!') &&
8506 (avail < 4)) {
8507 goto done;
8508 } else {
8509 ctxt->errNo = XML_ERR_DOCUMENT_END;
8510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8511 ctxt->sax->error(ctxt->userData,
8512 "Extra content at the end of the document\n");
8513 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008514 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008515 ctxt->instate = XML_PARSER_EOF;
8516#ifdef DEBUG_PUSH
8517 xmlGenericError(xmlGenericErrorContext,
8518 "PP: entering EOF\n");
8519#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008520 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008521 ctxt->sax->endDocument(ctxt->userData);
8522 goto done;
8523 }
8524 break;
8525 case XML_PARSER_START_TAG: {
8526 xmlChar *name, *oldname;
8527
8528 if ((avail < 2) && (ctxt->inputNr == 1))
8529 goto done;
8530 cur = ctxt->input->cur[0];
8531 if (cur != '<') {
8532 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8534 ctxt->sax->error(ctxt->userData,
8535 "Start tag expect, '<' not found\n");
8536 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008537 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008538 ctxt->instate = XML_PARSER_EOF;
8539#ifdef DEBUG_PUSH
8540 xmlGenericError(xmlGenericErrorContext,
8541 "PP: entering EOF\n");
8542#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008543 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008544 ctxt->sax->endDocument(ctxt->userData);
8545 goto done;
8546 }
8547 if ((!terminate) &&
8548 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8549 goto done;
8550 if (ctxt->spaceNr == 0)
8551 spacePush(ctxt, -1);
8552 else
8553 spacePush(ctxt, *ctxt->space);
8554 name = xmlParseStartTag(ctxt);
8555 if (name == NULL) {
8556 spacePop(ctxt);
8557 ctxt->instate = XML_PARSER_EOF;
8558#ifdef DEBUG_PUSH
8559 xmlGenericError(xmlGenericErrorContext,
8560 "PP: entering EOF\n");
8561#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008562 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008563 ctxt->sax->endDocument(ctxt->userData);
8564 goto done;
8565 }
8566 namePush(ctxt, xmlStrdup(name));
8567
8568 /*
8569 * [ VC: Root Element Type ]
8570 * The Name in the document type declaration must match
8571 * the element type of the root element.
8572 */
8573 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8574 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8575 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8576
8577 /*
8578 * Check for an Empty Element.
8579 */
8580 if ((RAW == '/') && (NXT(1) == '>')) {
8581 SKIP(2);
8582 if ((ctxt->sax != NULL) &&
8583 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8584 ctxt->sax->endElement(ctxt->userData, name);
8585 xmlFree(name);
8586 oldname = namePop(ctxt);
8587 spacePop(ctxt);
8588 if (oldname != NULL) {
8589#ifdef DEBUG_STACK
8590 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8591#endif
8592 xmlFree(oldname);
8593 }
8594 if (ctxt->name == NULL) {
8595 ctxt->instate = XML_PARSER_EPILOG;
8596#ifdef DEBUG_PUSH
8597 xmlGenericError(xmlGenericErrorContext,
8598 "PP: entering EPILOG\n");
8599#endif
8600 } else {
8601 ctxt->instate = XML_PARSER_CONTENT;
8602#ifdef DEBUG_PUSH
8603 xmlGenericError(xmlGenericErrorContext,
8604 "PP: entering CONTENT\n");
8605#endif
8606 }
8607 break;
8608 }
8609 if (RAW == '>') {
8610 NEXT;
8611 } else {
8612 ctxt->errNo = XML_ERR_GT_REQUIRED;
8613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8614 ctxt->sax->error(ctxt->userData,
8615 "Couldn't find end of Start Tag %s\n",
8616 name);
8617 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008618 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008619
8620 /*
8621 * end of parsing of this node.
8622 */
8623 nodePop(ctxt);
8624 oldname = namePop(ctxt);
8625 spacePop(ctxt);
8626 if (oldname != NULL) {
8627#ifdef DEBUG_STACK
8628 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8629#endif
8630 xmlFree(oldname);
8631 }
8632 }
8633 xmlFree(name);
8634 ctxt->instate = XML_PARSER_CONTENT;
8635#ifdef DEBUG_PUSH
8636 xmlGenericError(xmlGenericErrorContext,
8637 "PP: entering CONTENT\n");
8638#endif
8639 break;
8640 }
8641 case XML_PARSER_CONTENT: {
8642 const xmlChar *test;
8643 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008644 if ((avail < 2) && (ctxt->inputNr == 1))
8645 goto done;
8646 cur = ctxt->input->cur[0];
8647 next = ctxt->input->cur[1];
8648
8649 test = CUR_PTR;
8650 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008651 if ((cur == '<') && (next == '?')) {
8652 if ((!terminate) &&
8653 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8654 goto done;
8655#ifdef DEBUG_PUSH
8656 xmlGenericError(xmlGenericErrorContext,
8657 "PP: Parsing PI\n");
8658#endif
8659 xmlParsePI(ctxt);
8660 } else if ((cur == '<') && (next == '!') &&
8661 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8662 if ((!terminate) &&
8663 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8664 goto done;
8665#ifdef DEBUG_PUSH
8666 xmlGenericError(xmlGenericErrorContext,
8667 "PP: Parsing Comment\n");
8668#endif
8669 xmlParseComment(ctxt);
8670 ctxt->instate = XML_PARSER_CONTENT;
8671 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8672 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8673 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8674 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8675 (ctxt->input->cur[8] == '[')) {
8676 SKIP(9);
8677 ctxt->instate = XML_PARSER_CDATA_SECTION;
8678#ifdef DEBUG_PUSH
8679 xmlGenericError(xmlGenericErrorContext,
8680 "PP: entering CDATA_SECTION\n");
8681#endif
8682 break;
8683 } else if ((cur == '<') && (next == '!') &&
8684 (avail < 9)) {
8685 goto done;
8686 } else if ((cur == '<') && (next == '/')) {
8687 ctxt->instate = XML_PARSER_END_TAG;
8688#ifdef DEBUG_PUSH
8689 xmlGenericError(xmlGenericErrorContext,
8690 "PP: entering END_TAG\n");
8691#endif
8692 break;
8693 } else if (cur == '<') {
8694 ctxt->instate = XML_PARSER_START_TAG;
8695#ifdef DEBUG_PUSH
8696 xmlGenericError(xmlGenericErrorContext,
8697 "PP: entering START_TAG\n");
8698#endif
8699 break;
8700 } else if (cur == '&') {
8701 if ((!terminate) &&
8702 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8703 goto done;
8704#ifdef DEBUG_PUSH
8705 xmlGenericError(xmlGenericErrorContext,
8706 "PP: Parsing Reference\n");
8707#endif
8708 xmlParseReference(ctxt);
8709 } else {
8710 /* TODO Avoid the extra copy, handle directly !!! */
8711 /*
8712 * Goal of the following test is:
8713 * - minimize calls to the SAX 'character' callback
8714 * when they are mergeable
8715 * - handle an problem for isBlank when we only parse
8716 * a sequence of blank chars and the next one is
8717 * not available to check against '<' presence.
8718 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008719 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008720 * of the parser.
8721 */
8722 if ((ctxt->inputNr == 1) &&
8723 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8724 if ((!terminate) &&
8725 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8726 goto done;
8727 }
8728 ctxt->checkIndex = 0;
8729#ifdef DEBUG_PUSH
8730 xmlGenericError(xmlGenericErrorContext,
8731 "PP: Parsing char data\n");
8732#endif
8733 xmlParseCharData(ctxt, 0);
8734 }
8735 /*
8736 * Pop-up of finished entities.
8737 */
8738 while ((RAW == 0) && (ctxt->inputNr > 1))
8739 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008740 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008741 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8743 ctxt->sax->error(ctxt->userData,
8744 "detected an error in element content\n");
8745 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008746 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008747 ctxt->instate = XML_PARSER_EOF;
8748 break;
8749 }
8750 break;
8751 }
8752 case XML_PARSER_CDATA_SECTION: {
8753 /*
8754 * The Push mode need to have the SAX callback for
8755 * cdataBlock merge back contiguous callbacks.
8756 */
8757 int base;
8758
8759 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8760 if (base < 0) {
8761 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8762 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8763 if (ctxt->sax->cdataBlock != NULL)
8764 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8765 XML_PARSER_BIG_BUFFER_SIZE);
8766 }
8767 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8768 ctxt->checkIndex = 0;
8769 }
8770 goto done;
8771 } else {
8772 if ((ctxt->sax != NULL) && (base > 0) &&
8773 (!ctxt->disableSAX)) {
8774 if (ctxt->sax->cdataBlock != NULL)
8775 ctxt->sax->cdataBlock(ctxt->userData,
8776 ctxt->input->cur, base);
8777 }
8778 SKIP(base + 3);
8779 ctxt->checkIndex = 0;
8780 ctxt->instate = XML_PARSER_CONTENT;
8781#ifdef DEBUG_PUSH
8782 xmlGenericError(xmlGenericErrorContext,
8783 "PP: entering CONTENT\n");
8784#endif
8785 }
8786 break;
8787 }
8788 case XML_PARSER_END_TAG:
8789 if (avail < 2)
8790 goto done;
8791 if ((!terminate) &&
8792 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8793 goto done;
8794 xmlParseEndTag(ctxt);
8795 if (ctxt->name == NULL) {
8796 ctxt->instate = XML_PARSER_EPILOG;
8797#ifdef DEBUG_PUSH
8798 xmlGenericError(xmlGenericErrorContext,
8799 "PP: entering EPILOG\n");
8800#endif
8801 } else {
8802 ctxt->instate = XML_PARSER_CONTENT;
8803#ifdef DEBUG_PUSH
8804 xmlGenericError(xmlGenericErrorContext,
8805 "PP: entering CONTENT\n");
8806#endif
8807 }
8808 break;
8809 case XML_PARSER_DTD: {
8810 /*
8811 * Sorry but progressive parsing of the internal subset
8812 * is not expected to be supported. We first check that
8813 * the full content of the internal subset is available and
8814 * the parsing is launched only at that point.
8815 * Internal subset ends up with "']' S? '>'" in an unescaped
8816 * section and not in a ']]>' sequence which are conditional
8817 * sections (whoever argued to keep that crap in XML deserve
8818 * a place in hell !).
8819 */
8820 int base, i;
8821 xmlChar *buf;
8822 xmlChar quote = 0;
8823
8824 base = ctxt->input->cur - ctxt->input->base;
8825 if (base < 0) return(0);
8826 if (ctxt->checkIndex > base)
8827 base = ctxt->checkIndex;
8828 buf = ctxt->input->buf->buffer->content;
8829 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8830 base++) {
8831 if (quote != 0) {
8832 if (buf[base] == quote)
8833 quote = 0;
8834 continue;
8835 }
8836 if (buf[base] == '"') {
8837 quote = '"';
8838 continue;
8839 }
8840 if (buf[base] == '\'') {
8841 quote = '\'';
8842 continue;
8843 }
8844 if (buf[base] == ']') {
8845 if ((unsigned int) base +1 >=
8846 ctxt->input->buf->buffer->use)
8847 break;
8848 if (buf[base + 1] == ']') {
8849 /* conditional crap, skip both ']' ! */
8850 base++;
8851 continue;
8852 }
8853 for (i = 0;
8854 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8855 i++) {
8856 if (buf[base + i] == '>')
8857 goto found_end_int_subset;
8858 }
8859 break;
8860 }
8861 }
8862 /*
8863 * We didn't found the end of the Internal subset
8864 */
8865 if (quote == 0)
8866 ctxt->checkIndex = base;
8867#ifdef DEBUG_PUSH
8868 if (next == 0)
8869 xmlGenericError(xmlGenericErrorContext,
8870 "PP: lookup of int subset end filed\n");
8871#endif
8872 goto done;
8873
8874found_end_int_subset:
8875 xmlParseInternalSubset(ctxt);
8876 ctxt->inSubset = 2;
8877 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8878 (ctxt->sax->externalSubset != NULL))
8879 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8880 ctxt->extSubSystem, ctxt->extSubURI);
8881 ctxt->inSubset = 0;
8882 ctxt->instate = XML_PARSER_PROLOG;
8883 ctxt->checkIndex = 0;
8884#ifdef DEBUG_PUSH
8885 xmlGenericError(xmlGenericErrorContext,
8886 "PP: entering PROLOG\n");
8887#endif
8888 break;
8889 }
8890 case XML_PARSER_COMMENT:
8891 xmlGenericError(xmlGenericErrorContext,
8892 "PP: internal error, state == COMMENT\n");
8893 ctxt->instate = XML_PARSER_CONTENT;
8894#ifdef DEBUG_PUSH
8895 xmlGenericError(xmlGenericErrorContext,
8896 "PP: entering CONTENT\n");
8897#endif
8898 break;
8899 case XML_PARSER_PI:
8900 xmlGenericError(xmlGenericErrorContext,
8901 "PP: internal error, state == PI\n");
8902 ctxt->instate = XML_PARSER_CONTENT;
8903#ifdef DEBUG_PUSH
8904 xmlGenericError(xmlGenericErrorContext,
8905 "PP: entering CONTENT\n");
8906#endif
8907 break;
8908 case XML_PARSER_ENTITY_DECL:
8909 xmlGenericError(xmlGenericErrorContext,
8910 "PP: internal error, state == ENTITY_DECL\n");
8911 ctxt->instate = XML_PARSER_DTD;
8912#ifdef DEBUG_PUSH
8913 xmlGenericError(xmlGenericErrorContext,
8914 "PP: entering DTD\n");
8915#endif
8916 break;
8917 case XML_PARSER_ENTITY_VALUE:
8918 xmlGenericError(xmlGenericErrorContext,
8919 "PP: internal error, state == ENTITY_VALUE\n");
8920 ctxt->instate = XML_PARSER_CONTENT;
8921#ifdef DEBUG_PUSH
8922 xmlGenericError(xmlGenericErrorContext,
8923 "PP: entering DTD\n");
8924#endif
8925 break;
8926 case XML_PARSER_ATTRIBUTE_VALUE:
8927 xmlGenericError(xmlGenericErrorContext,
8928 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8929 ctxt->instate = XML_PARSER_START_TAG;
8930#ifdef DEBUG_PUSH
8931 xmlGenericError(xmlGenericErrorContext,
8932 "PP: entering START_TAG\n");
8933#endif
8934 break;
8935 case XML_PARSER_SYSTEM_LITERAL:
8936 xmlGenericError(xmlGenericErrorContext,
8937 "PP: internal error, state == SYSTEM_LITERAL\n");
8938 ctxt->instate = XML_PARSER_START_TAG;
8939#ifdef DEBUG_PUSH
8940 xmlGenericError(xmlGenericErrorContext,
8941 "PP: entering START_TAG\n");
8942#endif
8943 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008944 case XML_PARSER_PUBLIC_LITERAL:
8945 xmlGenericError(xmlGenericErrorContext,
8946 "PP: internal error, state == PUBLIC_LITERAL\n");
8947 ctxt->instate = XML_PARSER_START_TAG;
8948#ifdef DEBUG_PUSH
8949 xmlGenericError(xmlGenericErrorContext,
8950 "PP: entering START_TAG\n");
8951#endif
8952 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008953 }
8954 }
8955done:
8956#ifdef DEBUG_PUSH
8957 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8958#endif
8959 return(ret);
8960}
8961
8962/**
Owen Taylor3473f882001-02-23 17:55:21 +00008963 * xmlParseChunk:
8964 * @ctxt: an XML parser context
8965 * @chunk: an char array
8966 * @size: the size in byte of the chunk
8967 * @terminate: last chunk indicator
8968 *
8969 * Parse a Chunk of memory
8970 *
8971 * Returns zero if no error, the xmlParserErrors otherwise.
8972 */
8973int
8974xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8975 int terminate) {
8976 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8977 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8978 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8979 int cur = ctxt->input->cur - ctxt->input->base;
8980
8981 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8982 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8983 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008984 ctxt->input->end =
8985 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008986#ifdef DEBUG_PUSH
8987 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8988#endif
8989
8990 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8991 xmlParseTryOrFinish(ctxt, terminate);
8992 } else if (ctxt->instate != XML_PARSER_EOF) {
8993 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8994 xmlParserInputBufferPtr in = ctxt->input->buf;
8995 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8996 (in->raw != NULL)) {
8997 int nbchars;
8998
8999 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9000 if (nbchars < 0) {
9001 xmlGenericError(xmlGenericErrorContext,
9002 "xmlParseChunk: encoder error\n");
9003 return(XML_ERR_INVALID_ENCODING);
9004 }
9005 }
9006 }
9007 }
9008 xmlParseTryOrFinish(ctxt, terminate);
9009 if (terminate) {
9010 /*
9011 * Check for termination
9012 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009013 int avail = 0;
9014 if (ctxt->input->buf == NULL)
9015 avail = ctxt->input->length -
9016 (ctxt->input->cur - ctxt->input->base);
9017 else
9018 avail = ctxt->input->buf->buffer->use -
9019 (ctxt->input->cur - ctxt->input->base);
9020
Owen Taylor3473f882001-02-23 17:55:21 +00009021 if ((ctxt->instate != XML_PARSER_EOF) &&
9022 (ctxt->instate != XML_PARSER_EPILOG)) {
9023 ctxt->errNo = XML_ERR_DOCUMENT_END;
9024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9025 ctxt->sax->error(ctxt->userData,
9026 "Extra content at the end of the document\n");
9027 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009028 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009029 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009030 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9031 ctxt->errNo = XML_ERR_DOCUMENT_END;
9032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9033 ctxt->sax->error(ctxt->userData,
9034 "Extra content at the end of the document\n");
9035 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009036 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009037
9038 }
Owen Taylor3473f882001-02-23 17:55:21 +00009039 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009040 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009041 ctxt->sax->endDocument(ctxt->userData);
9042 }
9043 ctxt->instate = XML_PARSER_EOF;
9044 }
9045 return((xmlParserErrors) ctxt->errNo);
9046}
9047
9048/************************************************************************
9049 * *
9050 * I/O front end functions to the parser *
9051 * *
9052 ************************************************************************/
9053
9054/**
9055 * xmlStopParser:
9056 * @ctxt: an XML parser context
9057 *
9058 * Blocks further parser processing
9059 */
9060void
9061xmlStopParser(xmlParserCtxtPtr ctxt) {
9062 ctxt->instate = XML_PARSER_EOF;
9063 if (ctxt->input != NULL)
9064 ctxt->input->cur = BAD_CAST"";
9065}
9066
9067/**
9068 * xmlCreatePushParserCtxt:
9069 * @sax: a SAX handler
9070 * @user_data: The user data returned on SAX callbacks
9071 * @chunk: a pointer to an array of chars
9072 * @size: number of chars in the array
9073 * @filename: an optional file name or URI
9074 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009075 * Create a parser context for using the XML parser in push mode.
9076 * If @buffer and @size are non-NULL, the data is used to detect
9077 * the encoding. The remaining characters will be parsed so they
9078 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009079 * To allow content encoding detection, @size should be >= 4
9080 * The value of @filename is used for fetching external entities
9081 * and error/warning reports.
9082 *
9083 * Returns the new parser context or NULL
9084 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009085
Owen Taylor3473f882001-02-23 17:55:21 +00009086xmlParserCtxtPtr
9087xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9088 const char *chunk, int size, const char *filename) {
9089 xmlParserCtxtPtr ctxt;
9090 xmlParserInputPtr inputStream;
9091 xmlParserInputBufferPtr buf;
9092 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9093
9094 /*
9095 * plug some encoding conversion routines
9096 */
9097 if ((chunk != NULL) && (size >= 4))
9098 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9099
9100 buf = xmlAllocParserInputBuffer(enc);
9101 if (buf == NULL) return(NULL);
9102
9103 ctxt = xmlNewParserCtxt();
9104 if (ctxt == NULL) {
9105 xmlFree(buf);
9106 return(NULL);
9107 }
9108 if (sax != NULL) {
9109 if (ctxt->sax != &xmlDefaultSAXHandler)
9110 xmlFree(ctxt->sax);
9111 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9112 if (ctxt->sax == NULL) {
9113 xmlFree(buf);
9114 xmlFree(ctxt);
9115 return(NULL);
9116 }
9117 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9118 if (user_data != NULL)
9119 ctxt->userData = user_data;
9120 }
9121 if (filename == NULL) {
9122 ctxt->directory = NULL;
9123 } else {
9124 ctxt->directory = xmlParserGetDirectory(filename);
9125 }
9126
9127 inputStream = xmlNewInputStream(ctxt);
9128 if (inputStream == NULL) {
9129 xmlFreeParserCtxt(ctxt);
9130 return(NULL);
9131 }
9132
9133 if (filename == NULL)
9134 inputStream->filename = NULL;
9135 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009136 inputStream->filename = (char *)
9137 xmlNormalizeWindowsPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009138 inputStream->buf = buf;
9139 inputStream->base = inputStream->buf->buffer->content;
9140 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009141 inputStream->end =
9142 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009143
9144 inputPush(ctxt, inputStream);
9145
9146 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9147 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009148 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9149 int cur = ctxt->input->cur - ctxt->input->base;
9150
Owen Taylor3473f882001-02-23 17:55:21 +00009151 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009152
9153 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9154 ctxt->input->cur = ctxt->input->base + cur;
9155 ctxt->input->end =
9156 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009157#ifdef DEBUG_PUSH
9158 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9159#endif
9160 }
9161
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009162 if (enc != XML_CHAR_ENCODING_NONE) {
9163 xmlSwitchEncoding(ctxt, enc);
9164 }
9165
Owen Taylor3473f882001-02-23 17:55:21 +00009166 return(ctxt);
9167}
9168
9169/**
9170 * xmlCreateIOParserCtxt:
9171 * @sax: a SAX handler
9172 * @user_data: The user data returned on SAX callbacks
9173 * @ioread: an I/O read function
9174 * @ioclose: an I/O close function
9175 * @ioctx: an I/O handler
9176 * @enc: the charset encoding if known
9177 *
9178 * Create a parser context for using the XML parser with an existing
9179 * I/O stream
9180 *
9181 * Returns the new parser context or NULL
9182 */
9183xmlParserCtxtPtr
9184xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9185 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9186 void *ioctx, xmlCharEncoding enc) {
9187 xmlParserCtxtPtr ctxt;
9188 xmlParserInputPtr inputStream;
9189 xmlParserInputBufferPtr buf;
9190
9191 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9192 if (buf == NULL) return(NULL);
9193
9194 ctxt = xmlNewParserCtxt();
9195 if (ctxt == NULL) {
9196 xmlFree(buf);
9197 return(NULL);
9198 }
9199 if (sax != NULL) {
9200 if (ctxt->sax != &xmlDefaultSAXHandler)
9201 xmlFree(ctxt->sax);
9202 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9203 if (ctxt->sax == NULL) {
9204 xmlFree(buf);
9205 xmlFree(ctxt);
9206 return(NULL);
9207 }
9208 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9209 if (user_data != NULL)
9210 ctxt->userData = user_data;
9211 }
9212
9213 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9214 if (inputStream == NULL) {
9215 xmlFreeParserCtxt(ctxt);
9216 return(NULL);
9217 }
9218 inputPush(ctxt, inputStream);
9219
9220 return(ctxt);
9221}
9222
9223/************************************************************************
9224 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009225 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009226 * *
9227 ************************************************************************/
9228
9229/**
9230 * xmlIOParseDTD:
9231 * @sax: the SAX handler block or NULL
9232 * @input: an Input Buffer
9233 * @enc: the charset encoding if known
9234 *
9235 * Load and parse a DTD
9236 *
9237 * Returns the resulting xmlDtdPtr or NULL in case of error.
9238 * @input will be freed at parsing end.
9239 */
9240
9241xmlDtdPtr
9242xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9243 xmlCharEncoding enc) {
9244 xmlDtdPtr ret = NULL;
9245 xmlParserCtxtPtr ctxt;
9246 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009247 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009248
9249 if (input == NULL)
9250 return(NULL);
9251
9252 ctxt = xmlNewParserCtxt();
9253 if (ctxt == NULL) {
9254 return(NULL);
9255 }
9256
9257 /*
9258 * Set-up the SAX context
9259 */
9260 if (sax != NULL) {
9261 if (ctxt->sax != NULL)
9262 xmlFree(ctxt->sax);
9263 ctxt->sax = sax;
9264 ctxt->userData = NULL;
9265 }
9266
9267 /*
9268 * generate a parser input from the I/O handler
9269 */
9270
9271 pinput = xmlNewIOInputStream(ctxt, input, enc);
9272 if (pinput == NULL) {
9273 if (sax != NULL) ctxt->sax = NULL;
9274 xmlFreeParserCtxt(ctxt);
9275 return(NULL);
9276 }
9277
9278 /*
9279 * plug some encoding conversion routines here.
9280 */
9281 xmlPushInput(ctxt, pinput);
9282
9283 pinput->filename = NULL;
9284 pinput->line = 1;
9285 pinput->col = 1;
9286 pinput->base = ctxt->input->cur;
9287 pinput->cur = ctxt->input->cur;
9288 pinput->free = NULL;
9289
9290 /*
9291 * let's parse that entity knowing it's an external subset.
9292 */
9293 ctxt->inSubset = 2;
9294 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9295 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9296 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009297
9298 if (enc == XML_CHAR_ENCODING_NONE) {
9299 /*
9300 * Get the 4 first bytes and decode the charset
9301 * if enc != XML_CHAR_ENCODING_NONE
9302 * plug some encoding conversion routines.
9303 */
9304 start[0] = RAW;
9305 start[1] = NXT(1);
9306 start[2] = NXT(2);
9307 start[3] = NXT(3);
9308 enc = xmlDetectCharEncoding(start, 4);
9309 if (enc != XML_CHAR_ENCODING_NONE) {
9310 xmlSwitchEncoding(ctxt, enc);
9311 }
9312 }
9313
Owen Taylor3473f882001-02-23 17:55:21 +00009314 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9315
9316 if (ctxt->myDoc != NULL) {
9317 if (ctxt->wellFormed) {
9318 ret = ctxt->myDoc->extSubset;
9319 ctxt->myDoc->extSubset = NULL;
9320 } else {
9321 ret = NULL;
9322 }
9323 xmlFreeDoc(ctxt->myDoc);
9324 ctxt->myDoc = NULL;
9325 }
9326 if (sax != NULL) ctxt->sax = NULL;
9327 xmlFreeParserCtxt(ctxt);
9328
9329 return(ret);
9330}
9331
9332/**
9333 * xmlSAXParseDTD:
9334 * @sax: the SAX handler block
9335 * @ExternalID: a NAME* containing the External ID of the DTD
9336 * @SystemID: a NAME* containing the URL to the DTD
9337 *
9338 * Load and parse an external subset.
9339 *
9340 * Returns the resulting xmlDtdPtr or NULL in case of error.
9341 */
9342
9343xmlDtdPtr
9344xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9345 const xmlChar *SystemID) {
9346 xmlDtdPtr ret = NULL;
9347 xmlParserCtxtPtr ctxt;
9348 xmlParserInputPtr input = NULL;
9349 xmlCharEncoding enc;
9350
9351 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9352
9353 ctxt = xmlNewParserCtxt();
9354 if (ctxt == NULL) {
9355 return(NULL);
9356 }
9357
9358 /*
9359 * Set-up the SAX context
9360 */
9361 if (sax != NULL) {
9362 if (ctxt->sax != NULL)
9363 xmlFree(ctxt->sax);
9364 ctxt->sax = sax;
9365 ctxt->userData = NULL;
9366 }
9367
9368 /*
9369 * Ask the Entity resolver to load the damn thing
9370 */
9371
9372 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9373 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9374 if (input == NULL) {
9375 if (sax != NULL) ctxt->sax = NULL;
9376 xmlFreeParserCtxt(ctxt);
9377 return(NULL);
9378 }
9379
9380 /*
9381 * plug some encoding conversion routines here.
9382 */
9383 xmlPushInput(ctxt, input);
9384 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9385 xmlSwitchEncoding(ctxt, enc);
9386
9387 if (input->filename == NULL)
9388 input->filename = (char *) xmlStrdup(SystemID);
9389 input->line = 1;
9390 input->col = 1;
9391 input->base = ctxt->input->cur;
9392 input->cur = ctxt->input->cur;
9393 input->free = NULL;
9394
9395 /*
9396 * let's parse that entity knowing it's an external subset.
9397 */
9398 ctxt->inSubset = 2;
9399 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9400 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9401 ExternalID, SystemID);
9402 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9403
9404 if (ctxt->myDoc != NULL) {
9405 if (ctxt->wellFormed) {
9406 ret = ctxt->myDoc->extSubset;
9407 ctxt->myDoc->extSubset = NULL;
9408 } else {
9409 ret = NULL;
9410 }
9411 xmlFreeDoc(ctxt->myDoc);
9412 ctxt->myDoc = NULL;
9413 }
9414 if (sax != NULL) ctxt->sax = NULL;
9415 xmlFreeParserCtxt(ctxt);
9416
9417 return(ret);
9418}
9419
9420/**
9421 * xmlParseDTD:
9422 * @ExternalID: a NAME* containing the External ID of the DTD
9423 * @SystemID: a NAME* containing the URL to the DTD
9424 *
9425 * Load and parse an external subset.
9426 *
9427 * Returns the resulting xmlDtdPtr or NULL in case of error.
9428 */
9429
9430xmlDtdPtr
9431xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9432 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9433}
9434
9435/************************************************************************
9436 * *
9437 * Front ends when parsing an Entity *
9438 * *
9439 ************************************************************************/
9440
9441/**
Owen Taylor3473f882001-02-23 17:55:21 +00009442 * xmlParseCtxtExternalEntity:
9443 * @ctx: the existing parsing context
9444 * @URL: the URL for the entity to load
9445 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009446 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009447 *
9448 * Parse an external general entity within an existing parsing context
9449 * An external general parsed entity is well-formed if it matches the
9450 * production labeled extParsedEnt.
9451 *
9452 * [78] extParsedEnt ::= TextDecl? content
9453 *
9454 * Returns 0 if the entity is well formed, -1 in case of args problem and
9455 * the parser error code otherwise
9456 */
9457
9458int
9459xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009460 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009461 xmlParserCtxtPtr ctxt;
9462 xmlDocPtr newDoc;
9463 xmlSAXHandlerPtr oldsax = NULL;
9464 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009465 xmlChar start[4];
9466 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009467
9468 if (ctx->depth > 40) {
9469 return(XML_ERR_ENTITY_LOOP);
9470 }
9471
Daniel Veillardcda96922001-08-21 10:56:31 +00009472 if (lst != NULL)
9473 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009474 if ((URL == NULL) && (ID == NULL))
9475 return(-1);
9476 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9477 return(-1);
9478
9479
9480 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9481 if (ctxt == NULL) return(-1);
9482 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +00009483 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +00009484 oldsax = ctxt->sax;
9485 ctxt->sax = ctx->sax;
9486 newDoc = xmlNewDoc(BAD_CAST "1.0");
9487 if (newDoc == NULL) {
9488 xmlFreeParserCtxt(ctxt);
9489 return(-1);
9490 }
9491 if (ctx->myDoc != NULL) {
9492 newDoc->intSubset = ctx->myDoc->intSubset;
9493 newDoc->extSubset = ctx->myDoc->extSubset;
9494 }
9495 if (ctx->myDoc->URL != NULL) {
9496 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9497 }
9498 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9499 if (newDoc->children == NULL) {
9500 ctxt->sax = oldsax;
9501 xmlFreeParserCtxt(ctxt);
9502 newDoc->intSubset = NULL;
9503 newDoc->extSubset = NULL;
9504 xmlFreeDoc(newDoc);
9505 return(-1);
9506 }
9507 nodePush(ctxt, newDoc->children);
9508 if (ctx->myDoc == NULL) {
9509 ctxt->myDoc = newDoc;
9510 } else {
9511 ctxt->myDoc = ctx->myDoc;
9512 newDoc->children->doc = ctx->myDoc;
9513 }
9514
Daniel Veillard87a764e2001-06-20 17:41:10 +00009515 /*
9516 * Get the 4 first bytes and decode the charset
9517 * if enc != XML_CHAR_ENCODING_NONE
9518 * plug some encoding conversion routines.
9519 */
9520 GROW
9521 start[0] = RAW;
9522 start[1] = NXT(1);
9523 start[2] = NXT(2);
9524 start[3] = NXT(3);
9525 enc = xmlDetectCharEncoding(start, 4);
9526 if (enc != XML_CHAR_ENCODING_NONE) {
9527 xmlSwitchEncoding(ctxt, enc);
9528 }
9529
Owen Taylor3473f882001-02-23 17:55:21 +00009530 /*
9531 * Parse a possible text declaration first
9532 */
Owen Taylor3473f882001-02-23 17:55:21 +00009533 if ((RAW == '<') && (NXT(1) == '?') &&
9534 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9535 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9536 xmlParseTextDecl(ctxt);
9537 }
9538
9539 /*
9540 * Doing validity checking on chunk doesn't make sense
9541 */
9542 ctxt->instate = XML_PARSER_CONTENT;
9543 ctxt->validate = ctx->validate;
9544 ctxt->loadsubset = ctx->loadsubset;
9545 ctxt->depth = ctx->depth + 1;
9546 ctxt->replaceEntities = ctx->replaceEntities;
9547 if (ctxt->validate) {
9548 ctxt->vctxt.error = ctx->vctxt.error;
9549 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009550 } else {
9551 ctxt->vctxt.error = NULL;
9552 ctxt->vctxt.warning = NULL;
9553 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009554 ctxt->vctxt.nodeTab = NULL;
9555 ctxt->vctxt.nodeNr = 0;
9556 ctxt->vctxt.nodeMax = 0;
9557 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009558
9559 xmlParseContent(ctxt);
9560
9561 if ((RAW == '<') && (NXT(1) == '/')) {
9562 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9564 ctxt->sax->error(ctxt->userData,
9565 "chunk is not well balanced\n");
9566 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009567 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009568 } else if (RAW != 0) {
9569 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9571 ctxt->sax->error(ctxt->userData,
9572 "extra content at the end of well balanced chunk\n");
9573 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009574 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009575 }
9576 if (ctxt->node != newDoc->children) {
9577 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9578 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9579 ctxt->sax->error(ctxt->userData,
9580 "chunk is not well balanced\n");
9581 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009582 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009583 }
9584
9585 if (!ctxt->wellFormed) {
9586 if (ctxt->errNo == 0)
9587 ret = 1;
9588 else
9589 ret = ctxt->errNo;
9590 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009591 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009592 xmlNodePtr cur;
9593
9594 /*
9595 * Return the newly created nodeset after unlinking it from
9596 * they pseudo parent.
9597 */
9598 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009599 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009600 while (cur != NULL) {
9601 cur->parent = NULL;
9602 cur = cur->next;
9603 }
9604 newDoc->children->children = NULL;
9605 }
9606 ret = 0;
9607 }
9608 ctxt->sax = oldsax;
9609 xmlFreeParserCtxt(ctxt);
9610 newDoc->intSubset = NULL;
9611 newDoc->extSubset = NULL;
9612 xmlFreeDoc(newDoc);
9613
9614 return(ret);
9615}
9616
9617/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009618 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009619 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009620 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009621 * @sax: the SAX handler bloc (possibly NULL)
9622 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9623 * @depth: Used for loop detection, use 0
9624 * @URL: the URL for the entity to load
9625 * @ID: the System ID for the entity to load
9626 * @list: the return value for the set of parsed nodes
9627 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009628 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009629 *
9630 * Returns 0 if the entity is well formed, -1 in case of args problem and
9631 * the parser error code otherwise
9632 */
9633
Daniel Veillard257d9102001-05-08 10:41:44 +00009634static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009635xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9636 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009637 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009638 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009639 xmlParserCtxtPtr ctxt;
9640 xmlDocPtr newDoc;
9641 xmlSAXHandlerPtr oldsax = NULL;
9642 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009643 xmlChar start[4];
9644 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009645
9646 if (depth > 40) {
9647 return(XML_ERR_ENTITY_LOOP);
9648 }
9649
9650
9651
9652 if (list != NULL)
9653 *list = NULL;
9654 if ((URL == NULL) && (ID == NULL))
9655 return(-1);
9656 if (doc == NULL) /* @@ relax but check for dereferences */
9657 return(-1);
9658
9659
9660 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9661 if (ctxt == NULL) return(-1);
9662 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009663 if (oldctxt != NULL) {
9664 ctxt->_private = oldctxt->_private;
9665 ctxt->loadsubset = oldctxt->loadsubset;
9666 ctxt->validate = oldctxt->validate;
9667 ctxt->external = oldctxt->external;
9668 } else {
9669 /*
9670 * Doing validity checking on chunk without context
9671 * doesn't make sense
9672 */
9673 ctxt->_private = NULL;
9674 ctxt->validate = 0;
9675 ctxt->external = 2;
9676 ctxt->loadsubset = 0;
9677 }
Owen Taylor3473f882001-02-23 17:55:21 +00009678 if (sax != NULL) {
9679 oldsax = ctxt->sax;
9680 ctxt->sax = sax;
9681 if (user_data != NULL)
9682 ctxt->userData = user_data;
9683 }
9684 newDoc = xmlNewDoc(BAD_CAST "1.0");
9685 if (newDoc == NULL) {
9686 xmlFreeParserCtxt(ctxt);
9687 return(-1);
9688 }
9689 if (doc != NULL) {
9690 newDoc->intSubset = doc->intSubset;
9691 newDoc->extSubset = doc->extSubset;
9692 }
9693 if (doc->URL != NULL) {
9694 newDoc->URL = xmlStrdup(doc->URL);
9695 }
9696 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9697 if (newDoc->children == NULL) {
9698 if (sax != NULL)
9699 ctxt->sax = oldsax;
9700 xmlFreeParserCtxt(ctxt);
9701 newDoc->intSubset = NULL;
9702 newDoc->extSubset = NULL;
9703 xmlFreeDoc(newDoc);
9704 return(-1);
9705 }
9706 nodePush(ctxt, newDoc->children);
9707 if (doc == NULL) {
9708 ctxt->myDoc = newDoc;
9709 } else {
9710 ctxt->myDoc = doc;
9711 newDoc->children->doc = doc;
9712 }
9713
Daniel Veillard87a764e2001-06-20 17:41:10 +00009714 /*
9715 * Get the 4 first bytes and decode the charset
9716 * if enc != XML_CHAR_ENCODING_NONE
9717 * plug some encoding conversion routines.
9718 */
9719 GROW;
9720 start[0] = RAW;
9721 start[1] = NXT(1);
9722 start[2] = NXT(2);
9723 start[3] = NXT(3);
9724 enc = xmlDetectCharEncoding(start, 4);
9725 if (enc != XML_CHAR_ENCODING_NONE) {
9726 xmlSwitchEncoding(ctxt, enc);
9727 }
9728
Owen Taylor3473f882001-02-23 17:55:21 +00009729 /*
9730 * Parse a possible text declaration first
9731 */
Owen Taylor3473f882001-02-23 17:55:21 +00009732 if ((RAW == '<') && (NXT(1) == '?') &&
9733 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9734 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9735 xmlParseTextDecl(ctxt);
9736 }
9737
Owen Taylor3473f882001-02-23 17:55:21 +00009738 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009739 ctxt->depth = depth;
9740
9741 xmlParseContent(ctxt);
9742
Daniel Veillard561b7f82002-03-20 21:55:57 +00009743 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009744 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9746 ctxt->sax->error(ctxt->userData,
9747 "chunk is not well balanced\n");
9748 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009749 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009750 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009751 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9753 ctxt->sax->error(ctxt->userData,
9754 "extra content at the end of well balanced chunk\n");
9755 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009756 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009757 }
9758 if (ctxt->node != newDoc->children) {
9759 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9761 ctxt->sax->error(ctxt->userData,
9762 "chunk is not well balanced\n");
9763 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009764 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009765 }
9766
9767 if (!ctxt->wellFormed) {
9768 if (ctxt->errNo == 0)
9769 ret = 1;
9770 else
9771 ret = ctxt->errNo;
9772 } else {
9773 if (list != NULL) {
9774 xmlNodePtr cur;
9775
9776 /*
9777 * Return the newly created nodeset after unlinking it from
9778 * they pseudo parent.
9779 */
9780 cur = newDoc->children->children;
9781 *list = cur;
9782 while (cur != NULL) {
9783 cur->parent = NULL;
9784 cur = cur->next;
9785 }
9786 newDoc->children->children = NULL;
9787 }
9788 ret = 0;
9789 }
9790 if (sax != NULL)
9791 ctxt->sax = oldsax;
9792 xmlFreeParserCtxt(ctxt);
9793 newDoc->intSubset = NULL;
9794 newDoc->extSubset = NULL;
9795 xmlFreeDoc(newDoc);
9796
9797 return(ret);
9798}
9799
9800/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009801 * xmlParseExternalEntity:
9802 * @doc: the document the chunk pertains to
9803 * @sax: the SAX handler bloc (possibly NULL)
9804 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9805 * @depth: Used for loop detection, use 0
9806 * @URL: the URL for the entity to load
9807 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009808 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009809 *
9810 * Parse an external general entity
9811 * An external general parsed entity is well-formed if it matches the
9812 * production labeled extParsedEnt.
9813 *
9814 * [78] extParsedEnt ::= TextDecl? content
9815 *
9816 * Returns 0 if the entity is well formed, -1 in case of args problem and
9817 * the parser error code otherwise
9818 */
9819
9820int
9821xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009822 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009823 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009824 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009825}
9826
9827/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009828 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009829 * @doc: the document the chunk pertains to
9830 * @sax: the SAX handler bloc (possibly NULL)
9831 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9832 * @depth: Used for loop detection, use 0
9833 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009834 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009835 *
9836 * Parse a well-balanced chunk of an XML document
9837 * called by the parser
9838 * The allowed sequence for the Well Balanced Chunk is the one defined by
9839 * the content production in the XML grammar:
9840 *
9841 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9842 *
9843 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9844 * the parser error code otherwise
9845 */
9846
9847int
9848xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009849 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009850 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9851 depth, string, lst, 0 );
9852}
9853
9854/**
Daniel Veillard328f48c2002-11-15 15:24:34 +00009855 * xmlParseBalancedChunkMemoryInternal:
9856 * @oldctxt: the existing parsing context
9857 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9858 * @user_data: the user data field for the parser context
9859 * @lst: the return value for the set of parsed nodes
9860 *
9861 *
9862 * Parse a well-balanced chunk of an XML document
9863 * called by the parser
9864 * The allowed sequence for the Well Balanced Chunk is the one defined by
9865 * the content production in the XML grammar:
9866 *
9867 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9868 *
9869 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9870 * the parser error code otherwise
9871 *
9872 * In case recover is set to 1, the nodelist will not be empty even if
9873 * the parsed chunk is not well balanced.
9874 */
9875static int
9876xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9877 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9878 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009879 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009880 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009881 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009882 int size;
9883 int ret = 0;
9884
9885 if (oldctxt->depth > 40) {
9886 return(XML_ERR_ENTITY_LOOP);
9887 }
9888
9889
9890 if (lst != NULL)
9891 *lst = NULL;
9892 if (string == NULL)
9893 return(-1);
9894
9895 size = xmlStrlen(string);
9896
9897 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9898 if (ctxt == NULL) return(-1);
9899 if (user_data != NULL)
9900 ctxt->userData = user_data;
9901 else
9902 ctxt->userData = ctxt;
9903
9904 oldsax = ctxt->sax;
9905 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +00009906 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009907 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +00009908 newDoc = xmlNewDoc(BAD_CAST "1.0");
9909 if (newDoc == NULL) {
9910 ctxt->sax = oldsax;
9911 xmlFreeParserCtxt(ctxt);
9912 return(-1);
9913 }
Daniel Veillard328f48c2002-11-15 15:24:34 +00009914 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009915 } else {
9916 ctxt->myDoc = oldctxt->myDoc;
9917 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009918 }
Daniel Veillard9bc53102002-11-25 13:20:04 +00009919 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +00009920 BAD_CAST "pseudoroot", NULL);
9921 if (ctxt->myDoc->children == NULL) {
9922 ctxt->sax = oldsax;
9923 xmlFreeParserCtxt(ctxt);
9924 if (newDoc != NULL)
9925 xmlFreeDoc(newDoc);
9926 return(-1);
9927 }
9928 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009929 ctxt->instate = XML_PARSER_CONTENT;
9930 ctxt->depth = oldctxt->depth + 1;
9931
9932 /*
9933 * Doing validity checking on chunk doesn't make sense
9934 */
9935 ctxt->validate = 0;
9936 ctxt->loadsubset = oldctxt->loadsubset;
9937
Daniel Veillard68e9e742002-11-16 15:35:11 +00009938 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009939 if ((RAW == '<') && (NXT(1) == '/')) {
9940 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9941 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9942 ctxt->sax->error(ctxt->userData,
9943 "chunk is not well balanced\n");
9944 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009945 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009946 } else if (RAW != 0) {
9947 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9949 ctxt->sax->error(ctxt->userData,
9950 "extra content at the end of well balanced chunk\n");
9951 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009952 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009953 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009954 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +00009955 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9957 ctxt->sax->error(ctxt->userData,
9958 "chunk is not well balanced\n");
9959 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009960 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009961 }
9962
9963 if (!ctxt->wellFormed) {
9964 if (ctxt->errNo == 0)
9965 ret = 1;
9966 else
9967 ret = ctxt->errNo;
9968 } else {
9969 ret = 0;
9970 }
9971
9972 if ((lst != NULL) && (ret == 0)) {
9973 xmlNodePtr cur;
9974
9975 /*
9976 * Return the newly created nodeset after unlinking it from
9977 * they pseudo parent.
9978 */
Daniel Veillard68e9e742002-11-16 15:35:11 +00009979 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009980 *lst = cur;
9981 while (cur != NULL) {
9982 cur->parent = NULL;
9983 cur = cur->next;
9984 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009985 ctxt->myDoc->children->children = NULL;
9986 }
9987 if (ctxt->myDoc != NULL) {
9988 xmlFreeNode(ctxt->myDoc->children);
9989 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009990 }
9991
9992 ctxt->sax = oldsax;
9993 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +00009994 if (newDoc != NULL)
9995 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009996
9997 return(ret);
9998}
9999
10000/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010001 * xmlParseBalancedChunkMemoryRecover:
10002 * @doc: the document the chunk pertains to
10003 * @sax: the SAX handler bloc (possibly NULL)
10004 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10005 * @depth: Used for loop detection, use 0
10006 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10007 * @lst: the return value for the set of parsed nodes
10008 * @recover: return nodes even if the data is broken (use 0)
10009 *
10010 *
10011 * Parse a well-balanced chunk of an XML document
10012 * called by the parser
10013 * The allowed sequence for the Well Balanced Chunk is the one defined by
10014 * the content production in the XML grammar:
10015 *
10016 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10017 *
10018 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10019 * the parser error code otherwise
10020 *
10021 * In case recover is set to 1, the nodelist will not be empty even if
10022 * the parsed chunk is not well balanced.
10023 */
10024int
10025xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10026 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10027 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010028 xmlParserCtxtPtr ctxt;
10029 xmlDocPtr newDoc;
10030 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010031 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010032 int size;
10033 int ret = 0;
10034
10035 if (depth > 40) {
10036 return(XML_ERR_ENTITY_LOOP);
10037 }
10038
10039
Daniel Veillardcda96922001-08-21 10:56:31 +000010040 if (lst != NULL)
10041 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010042 if (string == NULL)
10043 return(-1);
10044
10045 size = xmlStrlen(string);
10046
10047 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10048 if (ctxt == NULL) return(-1);
10049 ctxt->userData = ctxt;
10050 if (sax != NULL) {
10051 oldsax = ctxt->sax;
10052 ctxt->sax = sax;
10053 if (user_data != NULL)
10054 ctxt->userData = user_data;
10055 }
10056 newDoc = xmlNewDoc(BAD_CAST "1.0");
10057 if (newDoc == NULL) {
10058 xmlFreeParserCtxt(ctxt);
10059 return(-1);
10060 }
10061 if (doc != NULL) {
10062 newDoc->intSubset = doc->intSubset;
10063 newDoc->extSubset = doc->extSubset;
10064 }
10065 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10066 if (newDoc->children == NULL) {
10067 if (sax != NULL)
10068 ctxt->sax = oldsax;
10069 xmlFreeParserCtxt(ctxt);
10070 newDoc->intSubset = NULL;
10071 newDoc->extSubset = NULL;
10072 xmlFreeDoc(newDoc);
10073 return(-1);
10074 }
10075 nodePush(ctxt, newDoc->children);
10076 if (doc == NULL) {
10077 ctxt->myDoc = newDoc;
10078 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010079 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010080 newDoc->children->doc = doc;
10081 }
10082 ctxt->instate = XML_PARSER_CONTENT;
10083 ctxt->depth = depth;
10084
10085 /*
10086 * Doing validity checking on chunk doesn't make sense
10087 */
10088 ctxt->validate = 0;
10089 ctxt->loadsubset = 0;
10090
Daniel Veillardb39bc392002-10-26 19:29:51 +000010091 if ( doc != NULL ){
10092 content = doc->children;
10093 doc->children = NULL;
10094 xmlParseContent(ctxt);
10095 doc->children = content;
10096 }
10097 else {
10098 xmlParseContent(ctxt);
10099 }
Owen Taylor3473f882001-02-23 17:55:21 +000010100 if ((RAW == '<') && (NXT(1) == '/')) {
10101 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10103 ctxt->sax->error(ctxt->userData,
10104 "chunk is not well balanced\n");
10105 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010106 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010107 } else if (RAW != 0) {
10108 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10110 ctxt->sax->error(ctxt->userData,
10111 "extra content at the end of well balanced chunk\n");
10112 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010113 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010114 }
10115 if (ctxt->node != newDoc->children) {
10116 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10118 ctxt->sax->error(ctxt->userData,
10119 "chunk is not well balanced\n");
10120 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010121 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010122 }
10123
10124 if (!ctxt->wellFormed) {
10125 if (ctxt->errNo == 0)
10126 ret = 1;
10127 else
10128 ret = ctxt->errNo;
10129 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010130 ret = 0;
10131 }
10132
10133 if (lst != NULL && (ret == 0 || recover == 1)) {
10134 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010135
10136 /*
10137 * Return the newly created nodeset after unlinking it from
10138 * they pseudo parent.
10139 */
10140 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010141 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010142 while (cur != NULL) {
10143 cur->parent = NULL;
10144 cur = cur->next;
10145 }
10146 newDoc->children->children = NULL;
10147 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010148
Owen Taylor3473f882001-02-23 17:55:21 +000010149 if (sax != NULL)
10150 ctxt->sax = oldsax;
10151 xmlFreeParserCtxt(ctxt);
10152 newDoc->intSubset = NULL;
10153 newDoc->extSubset = NULL;
10154 xmlFreeDoc(newDoc);
10155
10156 return(ret);
10157}
10158
10159/**
10160 * xmlSAXParseEntity:
10161 * @sax: the SAX handler block
10162 * @filename: the filename
10163 *
10164 * parse an XML external entity out of context and build a tree.
10165 * It use the given SAX function block to handle the parsing callback.
10166 * If sax is NULL, fallback to the default DOM tree building routines.
10167 *
10168 * [78] extParsedEnt ::= TextDecl? content
10169 *
10170 * This correspond to a "Well Balanced" chunk
10171 *
10172 * Returns the resulting document tree
10173 */
10174
10175xmlDocPtr
10176xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10177 xmlDocPtr ret;
10178 xmlParserCtxtPtr ctxt;
10179 char *directory = NULL;
10180
10181 ctxt = xmlCreateFileParserCtxt(filename);
10182 if (ctxt == NULL) {
10183 return(NULL);
10184 }
10185 if (sax != NULL) {
10186 if (ctxt->sax != NULL)
10187 xmlFree(ctxt->sax);
10188 ctxt->sax = sax;
10189 ctxt->userData = NULL;
10190 }
10191
10192 if ((ctxt->directory == NULL) && (directory == NULL))
10193 directory = xmlParserGetDirectory(filename);
10194
10195 xmlParseExtParsedEnt(ctxt);
10196
10197 if (ctxt->wellFormed)
10198 ret = ctxt->myDoc;
10199 else {
10200 ret = NULL;
10201 xmlFreeDoc(ctxt->myDoc);
10202 ctxt->myDoc = NULL;
10203 }
10204 if (sax != NULL)
10205 ctxt->sax = NULL;
10206 xmlFreeParserCtxt(ctxt);
10207
10208 return(ret);
10209}
10210
10211/**
10212 * xmlParseEntity:
10213 * @filename: the filename
10214 *
10215 * parse an XML external entity out of context and build a tree.
10216 *
10217 * [78] extParsedEnt ::= TextDecl? content
10218 *
10219 * This correspond to a "Well Balanced" chunk
10220 *
10221 * Returns the resulting document tree
10222 */
10223
10224xmlDocPtr
10225xmlParseEntity(const char *filename) {
10226 return(xmlSAXParseEntity(NULL, filename));
10227}
10228
10229/**
10230 * xmlCreateEntityParserCtxt:
10231 * @URL: the entity URL
10232 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010233 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010234 *
10235 * Create a parser context for an external entity
10236 * Automatic support for ZLIB/Compress compressed document is provided
10237 * by default if found at compile-time.
10238 *
10239 * Returns the new parser context or NULL
10240 */
10241xmlParserCtxtPtr
10242xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10243 const xmlChar *base) {
10244 xmlParserCtxtPtr ctxt;
10245 xmlParserInputPtr inputStream;
10246 char *directory = NULL;
10247 xmlChar *uri;
10248
10249 ctxt = xmlNewParserCtxt();
10250 if (ctxt == NULL) {
10251 return(NULL);
10252 }
10253
10254 uri = xmlBuildURI(URL, base);
10255
10256 if (uri == NULL) {
10257 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10258 if (inputStream == NULL) {
10259 xmlFreeParserCtxt(ctxt);
10260 return(NULL);
10261 }
10262
10263 inputPush(ctxt, inputStream);
10264
10265 if ((ctxt->directory == NULL) && (directory == NULL))
10266 directory = xmlParserGetDirectory((char *)URL);
10267 if ((ctxt->directory == NULL) && (directory != NULL))
10268 ctxt->directory = directory;
10269 } else {
10270 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10271 if (inputStream == NULL) {
10272 xmlFree(uri);
10273 xmlFreeParserCtxt(ctxt);
10274 return(NULL);
10275 }
10276
10277 inputPush(ctxt, inputStream);
10278
10279 if ((ctxt->directory == NULL) && (directory == NULL))
10280 directory = xmlParserGetDirectory((char *)uri);
10281 if ((ctxt->directory == NULL) && (directory != NULL))
10282 ctxt->directory = directory;
10283 xmlFree(uri);
10284 }
10285
10286 return(ctxt);
10287}
10288
10289/************************************************************************
10290 * *
10291 * Front ends when parsing from a file *
10292 * *
10293 ************************************************************************/
10294
10295/**
10296 * xmlCreateFileParserCtxt:
10297 * @filename: the filename
10298 *
10299 * Create a parser context for a file content.
10300 * Automatic support for ZLIB/Compress compressed document is provided
10301 * by default if found at compile-time.
10302 *
10303 * Returns the new parser context or NULL
10304 */
10305xmlParserCtxtPtr
10306xmlCreateFileParserCtxt(const char *filename)
10307{
10308 xmlParserCtxtPtr ctxt;
10309 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010310 char *directory = NULL;
Daniel Veillardf4862f02002-09-10 11:13:43 +000010311 xmlChar *normalized;
Owen Taylor3473f882001-02-23 17:55:21 +000010312
Owen Taylor3473f882001-02-23 17:55:21 +000010313 ctxt = xmlNewParserCtxt();
10314 if (ctxt == NULL) {
10315 if (xmlDefaultSAXHandler.error != NULL) {
10316 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10317 }
10318 return(NULL);
10319 }
10320
Daniel Veillardf4862f02002-09-10 11:13:43 +000010321 normalized = xmlNormalizeWindowsPath((const xmlChar *) filename);
10322 if (normalized == NULL) {
10323 xmlFreeParserCtxt(ctxt);
10324 return(NULL);
10325 }
10326 inputStream = xmlLoadExternalEntity((char *) normalized, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010327 if (inputStream == NULL) {
10328 xmlFreeParserCtxt(ctxt);
Daniel Veillardf4862f02002-09-10 11:13:43 +000010329 xmlFree(normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010330 return(NULL);
10331 }
10332
Owen Taylor3473f882001-02-23 17:55:21 +000010333 inputPush(ctxt, inputStream);
10334 if ((ctxt->directory == NULL) && (directory == NULL))
Daniel Veillardf4862f02002-09-10 11:13:43 +000010335 directory = xmlParserGetDirectory((char *) normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010336 if ((ctxt->directory == NULL) && (directory != NULL))
10337 ctxt->directory = directory;
10338
Daniel Veillardf4862f02002-09-10 11:13:43 +000010339 xmlFree(normalized);
10340
Owen Taylor3473f882001-02-23 17:55:21 +000010341 return(ctxt);
10342}
10343
10344/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010345 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010346 * @sax: the SAX handler block
10347 * @filename: the filename
10348 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10349 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010350 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010351 *
10352 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10353 * compressed document is provided by default if found at compile-time.
10354 * It use the given SAX function block to handle the parsing callback.
10355 * If sax is NULL, fallback to the default DOM tree building routines.
10356 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010357 * User data (void *) is stored within the parser context in the
10358 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010359 *
Owen Taylor3473f882001-02-23 17:55:21 +000010360 * Returns the resulting document tree
10361 */
10362
10363xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010364xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10365 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010366 xmlDocPtr ret;
10367 xmlParserCtxtPtr ctxt;
10368 char *directory = NULL;
10369
Daniel Veillard635ef722001-10-29 11:48:19 +000010370 xmlInitParser();
10371
Owen Taylor3473f882001-02-23 17:55:21 +000010372 ctxt = xmlCreateFileParserCtxt(filename);
10373 if (ctxt == NULL) {
10374 return(NULL);
10375 }
10376 if (sax != NULL) {
10377 if (ctxt->sax != NULL)
10378 xmlFree(ctxt->sax);
10379 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010380 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010381 if (data!=NULL) {
10382 ctxt->_private=data;
10383 }
Owen Taylor3473f882001-02-23 17:55:21 +000010384
10385 if ((ctxt->directory == NULL) && (directory == NULL))
10386 directory = xmlParserGetDirectory(filename);
10387 if ((ctxt->directory == NULL) && (directory != NULL))
10388 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10389
Daniel Veillarddad3f682002-11-17 16:47:27 +000010390 ctxt->recovery = recovery;
10391
Owen Taylor3473f882001-02-23 17:55:21 +000010392 xmlParseDocument(ctxt);
10393
10394 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10395 else {
10396 ret = NULL;
10397 xmlFreeDoc(ctxt->myDoc);
10398 ctxt->myDoc = NULL;
10399 }
10400 if (sax != NULL)
10401 ctxt->sax = NULL;
10402 xmlFreeParserCtxt(ctxt);
10403
10404 return(ret);
10405}
10406
10407/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010408 * xmlSAXParseFile:
10409 * @sax: the SAX handler block
10410 * @filename: the filename
10411 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10412 * documents
10413 *
10414 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10415 * compressed document is provided by default if found at compile-time.
10416 * It use the given SAX function block to handle the parsing callback.
10417 * If sax is NULL, fallback to the default DOM tree building routines.
10418 *
10419 * Returns the resulting document tree
10420 */
10421
10422xmlDocPtr
10423xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10424 int recovery) {
10425 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10426}
10427
10428/**
Owen Taylor3473f882001-02-23 17:55:21 +000010429 * xmlRecoverDoc:
10430 * @cur: a pointer to an array of xmlChar
10431 *
10432 * parse an XML in-memory document and build a tree.
10433 * In the case the document is not Well Formed, a tree is built anyway
10434 *
10435 * Returns the resulting document tree
10436 */
10437
10438xmlDocPtr
10439xmlRecoverDoc(xmlChar *cur) {
10440 return(xmlSAXParseDoc(NULL, cur, 1));
10441}
10442
10443/**
10444 * xmlParseFile:
10445 * @filename: the filename
10446 *
10447 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10448 * compressed document is provided by default if found at compile-time.
10449 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010450 * Returns the resulting document tree if the file was wellformed,
10451 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010452 */
10453
10454xmlDocPtr
10455xmlParseFile(const char *filename) {
10456 return(xmlSAXParseFile(NULL, filename, 0));
10457}
10458
10459/**
10460 * xmlRecoverFile:
10461 * @filename: the filename
10462 *
10463 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10464 * compressed document is provided by default if found at compile-time.
10465 * In the case the document is not Well Formed, a tree is built anyway
10466 *
10467 * Returns the resulting document tree
10468 */
10469
10470xmlDocPtr
10471xmlRecoverFile(const char *filename) {
10472 return(xmlSAXParseFile(NULL, filename, 1));
10473}
10474
10475
10476/**
10477 * xmlSetupParserForBuffer:
10478 * @ctxt: an XML parser context
10479 * @buffer: a xmlChar * buffer
10480 * @filename: a file name
10481 *
10482 * Setup the parser context to parse a new buffer; Clears any prior
10483 * contents from the parser context. The buffer parameter must not be
10484 * NULL, but the filename parameter can be
10485 */
10486void
10487xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10488 const char* filename)
10489{
10490 xmlParserInputPtr input;
10491
10492 input = xmlNewInputStream(ctxt);
10493 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010494 xmlGenericError(xmlGenericErrorContext,
10495 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010496 xmlFree(ctxt);
10497 return;
10498 }
10499
10500 xmlClearParserCtxt(ctxt);
10501 if (filename != NULL)
10502 input->filename = xmlMemStrdup(filename);
10503 input->base = buffer;
10504 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010505 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010506 inputPush(ctxt, input);
10507}
10508
10509/**
10510 * xmlSAXUserParseFile:
10511 * @sax: a SAX handler
10512 * @user_data: The user data returned on SAX callbacks
10513 * @filename: a file name
10514 *
10515 * parse an XML file and call the given SAX handler routines.
10516 * Automatic support for ZLIB/Compress compressed document is provided
10517 *
10518 * Returns 0 in case of success or a error number otherwise
10519 */
10520int
10521xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10522 const char *filename) {
10523 int ret = 0;
10524 xmlParserCtxtPtr ctxt;
10525
10526 ctxt = xmlCreateFileParserCtxt(filename);
10527 if (ctxt == NULL) return -1;
10528 if (ctxt->sax != &xmlDefaultSAXHandler)
10529 xmlFree(ctxt->sax);
10530 ctxt->sax = sax;
10531 if (user_data != NULL)
10532 ctxt->userData = user_data;
10533
10534 xmlParseDocument(ctxt);
10535
10536 if (ctxt->wellFormed)
10537 ret = 0;
10538 else {
10539 if (ctxt->errNo != 0)
10540 ret = ctxt->errNo;
10541 else
10542 ret = -1;
10543 }
10544 if (sax != NULL)
10545 ctxt->sax = NULL;
10546 xmlFreeParserCtxt(ctxt);
10547
10548 return ret;
10549}
10550
10551/************************************************************************
10552 * *
10553 * Front ends when parsing from memory *
10554 * *
10555 ************************************************************************/
10556
10557/**
10558 * xmlCreateMemoryParserCtxt:
10559 * @buffer: a pointer to a char array
10560 * @size: the size of the array
10561 *
10562 * Create a parser context for an XML in-memory document.
10563 *
10564 * Returns the new parser context or NULL
10565 */
10566xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010567xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010568 xmlParserCtxtPtr ctxt;
10569 xmlParserInputPtr input;
10570 xmlParserInputBufferPtr buf;
10571
10572 if (buffer == NULL)
10573 return(NULL);
10574 if (size <= 0)
10575 return(NULL);
10576
10577 ctxt = xmlNewParserCtxt();
10578 if (ctxt == NULL)
10579 return(NULL);
10580
10581 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010582 if (buf == NULL) {
10583 xmlFreeParserCtxt(ctxt);
10584 return(NULL);
10585 }
Owen Taylor3473f882001-02-23 17:55:21 +000010586
10587 input = xmlNewInputStream(ctxt);
10588 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010589 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010590 xmlFreeParserCtxt(ctxt);
10591 return(NULL);
10592 }
10593
10594 input->filename = NULL;
10595 input->buf = buf;
10596 input->base = input->buf->buffer->content;
10597 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010598 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010599
10600 inputPush(ctxt, input);
10601 return(ctxt);
10602}
10603
10604/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010605 * xmlSAXParseMemoryWithData:
10606 * @sax: the SAX handler block
10607 * @buffer: an pointer to a char array
10608 * @size: the size of the array
10609 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10610 * documents
10611 * @data: the userdata
10612 *
10613 * parse an XML in-memory block and use the given SAX function block
10614 * to handle the parsing callback. If sax is NULL, fallback to the default
10615 * DOM tree building routines.
10616 *
10617 * User data (void *) is stored within the parser context in the
10618 * context's _private member, so it is available nearly everywhere in libxml
10619 *
10620 * Returns the resulting document tree
10621 */
10622
10623xmlDocPtr
10624xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10625 int size, int recovery, void *data) {
10626 xmlDocPtr ret;
10627 xmlParserCtxtPtr ctxt;
10628
10629 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10630 if (ctxt == NULL) return(NULL);
10631 if (sax != NULL) {
10632 if (ctxt->sax != NULL)
10633 xmlFree(ctxt->sax);
10634 ctxt->sax = sax;
10635 }
10636 if (data!=NULL) {
10637 ctxt->_private=data;
10638 }
10639
10640 xmlParseDocument(ctxt);
10641
10642 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10643 else {
10644 ret = NULL;
10645 xmlFreeDoc(ctxt->myDoc);
10646 ctxt->myDoc = NULL;
10647 }
10648 if (sax != NULL)
10649 ctxt->sax = NULL;
10650 xmlFreeParserCtxt(ctxt);
10651
10652 return(ret);
10653}
10654
10655/**
Owen Taylor3473f882001-02-23 17:55:21 +000010656 * xmlSAXParseMemory:
10657 * @sax: the SAX handler block
10658 * @buffer: an pointer to a char array
10659 * @size: the size of the array
10660 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10661 * documents
10662 *
10663 * parse an XML in-memory block and use the given SAX function block
10664 * to handle the parsing callback. If sax is NULL, fallback to the default
10665 * DOM tree building routines.
10666 *
10667 * Returns the resulting document tree
10668 */
10669xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010670xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10671 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010672 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010673}
10674
10675/**
10676 * xmlParseMemory:
10677 * @buffer: an pointer to a char array
10678 * @size: the size of the array
10679 *
10680 * parse an XML in-memory block and build a tree.
10681 *
10682 * Returns the resulting document tree
10683 */
10684
Daniel Veillard50822cb2001-07-26 20:05:51 +000010685xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010686 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10687}
10688
10689/**
10690 * xmlRecoverMemory:
10691 * @buffer: an pointer to a char array
10692 * @size: the size of the array
10693 *
10694 * parse an XML in-memory block and build a tree.
10695 * In the case the document is not Well Formed, a tree is built anyway
10696 *
10697 * Returns the resulting document tree
10698 */
10699
Daniel Veillard50822cb2001-07-26 20:05:51 +000010700xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010701 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10702}
10703
10704/**
10705 * xmlSAXUserParseMemory:
10706 * @sax: a SAX handler
10707 * @user_data: The user data returned on SAX callbacks
10708 * @buffer: an in-memory XML document input
10709 * @size: the length of the XML document in bytes
10710 *
10711 * A better SAX parsing routine.
10712 * parse an XML in-memory buffer and call the given SAX handler routines.
10713 *
10714 * Returns 0 in case of success or a error number otherwise
10715 */
10716int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010717 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010718 int ret = 0;
10719 xmlParserCtxtPtr ctxt;
10720 xmlSAXHandlerPtr oldsax = NULL;
10721
Daniel Veillard9e923512002-08-14 08:48:52 +000010722 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010723 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10724 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010725 oldsax = ctxt->sax;
10726 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010727 if (user_data != NULL)
10728 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010729
10730 xmlParseDocument(ctxt);
10731
10732 if (ctxt->wellFormed)
10733 ret = 0;
10734 else {
10735 if (ctxt->errNo != 0)
10736 ret = ctxt->errNo;
10737 else
10738 ret = -1;
10739 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010740 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010741 xmlFreeParserCtxt(ctxt);
10742
10743 return ret;
10744}
10745
10746/**
10747 * xmlCreateDocParserCtxt:
10748 * @cur: a pointer to an array of xmlChar
10749 *
10750 * Creates a parser context for an XML in-memory document.
10751 *
10752 * Returns the new parser context or NULL
10753 */
10754xmlParserCtxtPtr
10755xmlCreateDocParserCtxt(xmlChar *cur) {
10756 int len;
10757
10758 if (cur == NULL)
10759 return(NULL);
10760 len = xmlStrlen(cur);
10761 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10762}
10763
10764/**
10765 * xmlSAXParseDoc:
10766 * @sax: the SAX handler block
10767 * @cur: a pointer to an array of xmlChar
10768 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10769 * documents
10770 *
10771 * parse an XML in-memory document and build a tree.
10772 * It use the given SAX function block to handle the parsing callback.
10773 * If sax is NULL, fallback to the default DOM tree building routines.
10774 *
10775 * Returns the resulting document tree
10776 */
10777
10778xmlDocPtr
10779xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10780 xmlDocPtr ret;
10781 xmlParserCtxtPtr ctxt;
10782
10783 if (cur == NULL) return(NULL);
10784
10785
10786 ctxt = xmlCreateDocParserCtxt(cur);
10787 if (ctxt == NULL) return(NULL);
10788 if (sax != NULL) {
10789 ctxt->sax = sax;
10790 ctxt->userData = NULL;
10791 }
10792
10793 xmlParseDocument(ctxt);
10794 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10795 else {
10796 ret = NULL;
10797 xmlFreeDoc(ctxt->myDoc);
10798 ctxt->myDoc = NULL;
10799 }
10800 if (sax != NULL)
10801 ctxt->sax = NULL;
10802 xmlFreeParserCtxt(ctxt);
10803
10804 return(ret);
10805}
10806
10807/**
10808 * xmlParseDoc:
10809 * @cur: a pointer to an array of xmlChar
10810 *
10811 * parse an XML in-memory document and build a tree.
10812 *
10813 * Returns the resulting document tree
10814 */
10815
10816xmlDocPtr
10817xmlParseDoc(xmlChar *cur) {
10818 return(xmlSAXParseDoc(NULL, cur, 0));
10819}
10820
Daniel Veillard8107a222002-01-13 14:10:10 +000010821/************************************************************************
10822 * *
10823 * Specific function to keep track of entities references *
10824 * and used by the XSLT debugger *
10825 * *
10826 ************************************************************************/
10827
10828static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10829
10830/**
10831 * xmlAddEntityReference:
10832 * @ent : A valid entity
10833 * @firstNode : A valid first node for children of entity
10834 * @lastNode : A valid last node of children entity
10835 *
10836 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10837 */
10838static void
10839xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10840 xmlNodePtr lastNode)
10841{
10842 if (xmlEntityRefFunc != NULL) {
10843 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10844 }
10845}
10846
10847
10848/**
10849 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000010850 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000010851 *
10852 * Set the function to call call back when a xml reference has been made
10853 */
10854void
10855xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10856{
10857 xmlEntityRefFunc = func;
10858}
Owen Taylor3473f882001-02-23 17:55:21 +000010859
10860/************************************************************************
10861 * *
10862 * Miscellaneous *
10863 * *
10864 ************************************************************************/
10865
10866#ifdef LIBXML_XPATH_ENABLED
10867#include <libxml/xpath.h>
10868#endif
10869
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010870extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010871static int xmlParserInitialized = 0;
10872
10873/**
10874 * xmlInitParser:
10875 *
10876 * Initialization function for the XML parser.
10877 * This is not reentrant. Call once before processing in case of
10878 * use in multithreaded programs.
10879 */
10880
10881void
10882xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010883 if (xmlParserInitialized != 0)
10884 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010885
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010886 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10887 (xmlGenericError == NULL))
10888 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010889 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010890 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010891 xmlInitCharEncodingHandlers();
10892 xmlInitializePredefinedEntities();
10893 xmlDefaultSAXHandlerInit();
10894 xmlRegisterDefaultInputCallbacks();
10895 xmlRegisterDefaultOutputCallbacks();
10896#ifdef LIBXML_HTML_ENABLED
10897 htmlInitAutoClose();
10898 htmlDefaultSAXHandlerInit();
10899#endif
10900#ifdef LIBXML_XPATH_ENABLED
10901 xmlXPathInit();
10902#endif
10903 xmlParserInitialized = 1;
10904}
10905
10906/**
10907 * xmlCleanupParser:
10908 *
10909 * Cleanup function for the XML parser. It tries to reclaim all
10910 * parsing related global memory allocated for the parser processing.
10911 * It doesn't deallocate any document related memory. Calling this
10912 * function should not prevent reusing the parser.
10913 */
10914
10915void
10916xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010917 xmlCleanupCharEncodingHandlers();
10918 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010919#ifdef LIBXML_CATALOG_ENABLED
10920 xmlCatalogCleanup();
10921#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010922 xmlCleanupThreads();
10923 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010924}