blob: 54159d3ec5d298413664665e204778a1c49ed2ce [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Daniel Veillard328f48c2002-11-15 15:24:34 +0000108static int
109xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
110 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000111/************************************************************************
112 * *
113 * Parser stacks related functions and macros *
114 * *
115 ************************************************************************/
116
117xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
118 const xmlChar ** str);
119
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000120/**
121 * inputPush:
122 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000123 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000124 *
125 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000126 *
127 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000129extern int
130inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
131{
132 if (ctxt->inputNr >= ctxt->inputMax) {
133 ctxt->inputMax *= 2;
134 ctxt->inputTab =
135 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
136 ctxt->inputMax *
137 sizeof(ctxt->inputTab[0]));
138 if (ctxt->inputTab == NULL) {
139 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
140 return (0);
141 }
142 }
143 ctxt->inputTab[ctxt->inputNr] = value;
144 ctxt->input = value;
145 return (ctxt->inputNr++);
146}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000148 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000149 * @ctxt: an XML parser context
150 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000151 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000152 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000153 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000154 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000155extern xmlParserInputPtr
156inputPop(xmlParserCtxtPtr ctxt)
157{
158 xmlParserInputPtr ret;
159
160 if (ctxt->inputNr <= 0)
161 return (0);
162 ctxt->inputNr--;
163 if (ctxt->inputNr > 0)
164 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
165 else
166 ctxt->input = NULL;
167 ret = ctxt->inputTab[ctxt->inputNr];
168 ctxt->inputTab[ctxt->inputNr] = 0;
169 return (ret);
170}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000171/**
172 * nodePush:
173 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000174 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000175 *
176 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000177 *
178 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000180extern int
181nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
182{
183 if (ctxt->nodeNr >= ctxt->nodeMax) {
184 ctxt->nodeMax *= 2;
185 ctxt->nodeTab =
186 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
187 ctxt->nodeMax *
188 sizeof(ctxt->nodeTab[0]));
189 if (ctxt->nodeTab == NULL) {
190 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
191 return (0);
192 }
193 }
194 ctxt->nodeTab[ctxt->nodeNr] = value;
195 ctxt->node = value;
196 return (ctxt->nodeNr++);
197}
198/**
199 * nodePop:
200 * @ctxt: an XML parser context
201 *
202 * Pops the top element node from the node stack
203 *
204 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000205 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000206extern xmlNodePtr
207nodePop(xmlParserCtxtPtr ctxt)
208{
209 xmlNodePtr ret;
210
211 if (ctxt->nodeNr <= 0)
212 return (0);
213 ctxt->nodeNr--;
214 if (ctxt->nodeNr > 0)
215 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
216 else
217 ctxt->node = NULL;
218 ret = ctxt->nodeTab[ctxt->nodeNr];
219 ctxt->nodeTab[ctxt->nodeNr] = 0;
220 return (ret);
221}
222/**
223 * namePush:
224 * @ctxt: an XML parser context
225 * @value: the element name
226 *
227 * Pushes a new element name on top of the name stack
228 *
229 * Returns 0 in case of error, the index in the stack otherwise
230 */
231extern int
232namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
233{
234 if (ctxt->nameNr >= ctxt->nameMax) {
235 ctxt->nameMax *= 2;
236 ctxt->nameTab =
237 (xmlChar * *)xmlRealloc(ctxt->nameTab,
238 ctxt->nameMax *
239 sizeof(ctxt->nameTab[0]));
240 if (ctxt->nameTab == NULL) {
241 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
242 return (0);
243 }
244 }
245 ctxt->nameTab[ctxt->nameNr] = value;
246 ctxt->name = value;
247 return (ctxt->nameNr++);
248}
249/**
250 * namePop:
251 * @ctxt: an XML parser context
252 *
253 * Pops the top element name from the name stack
254 *
255 * Returns the name just removed
256 */
257extern xmlChar *
258namePop(xmlParserCtxtPtr ctxt)
259{
260 xmlChar *ret;
261
262 if (ctxt->nameNr <= 0)
263 return (0);
264 ctxt->nameNr--;
265 if (ctxt->nameNr > 0)
266 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
267 else
268 ctxt->name = NULL;
269 ret = ctxt->nameTab[ctxt->nameNr];
270 ctxt->nameTab[ctxt->nameNr] = 0;
271 return (ret);
272}
Owen Taylor3473f882001-02-23 17:55:21 +0000273
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000274static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000275 if (ctxt->spaceNr >= ctxt->spaceMax) {
276 ctxt->spaceMax *= 2;
277 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
278 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
279 if (ctxt->spaceTab == NULL) {
280 xmlGenericError(xmlGenericErrorContext,
281 "realloc failed !\n");
282 return(0);
283 }
284 }
285 ctxt->spaceTab[ctxt->spaceNr] = val;
286 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
287 return(ctxt->spaceNr++);
288}
289
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000290static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000291 int ret;
292 if (ctxt->spaceNr <= 0) return(0);
293 ctxt->spaceNr--;
294 if (ctxt->spaceNr > 0)
295 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
296 else
297 ctxt->space = NULL;
298 ret = ctxt->spaceTab[ctxt->spaceNr];
299 ctxt->spaceTab[ctxt->spaceNr] = -1;
300 return(ret);
301}
302
303/*
304 * Macros for accessing the content. Those should be used only by the parser,
305 * and not exported.
306 *
307 * Dirty macros, i.e. one often need to make assumption on the context to
308 * use them
309 *
310 * CUR_PTR return the current pointer to the xmlChar to be parsed.
311 * To be used with extreme caution since operations consuming
312 * characters may move the input buffer to a different location !
313 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
314 * This should be used internally by the parser
315 * only to compare to ASCII values otherwise it would break when
316 * running with UTF-8 encoding.
317 * RAW same as CUR but in the input buffer, bypass any token
318 * extraction that may have been done
319 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
320 * to compare on ASCII based substring.
321 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
322 * strings within the parser.
323 *
324 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
325 *
326 * NEXT Skip to the next character, this does the proper decoding
327 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000328 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000329 * CUR_CHAR(l) returns the current unicode character (int), set l
330 * to the number of xmlChars used for the encoding [0-5].
331 * CUR_SCHAR same but operate on a string instead of the context
332 * COPY_BUF copy the current unicode char to the target buffer, increment
333 * the index
334 * GROW, SHRINK handling of input buffers
335 */
336
Daniel Veillardfdc91562002-07-01 21:52:03 +0000337#define RAW (*ctxt->input->cur)
338#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000339#define NXT(val) ctxt->input->cur[(val)]
340#define CUR_PTR ctxt->input->cur
341
342#define SKIP(val) do { \
343 ctxt->nbChars += (val),ctxt->input->cur += (val); \
344 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000345 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000346 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
347 xmlPopInput(ctxt); \
348 } while (0)
349
Daniel Veillard46de64e2002-05-29 08:21:33 +0000350#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
351 xmlSHRINK (ctxt);
352
353static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
354 xmlParserInputShrink(ctxt->input);
355 if ((*ctxt->input->cur == 0) &&
356 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
357 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000358 }
Owen Taylor3473f882001-02-23 17:55:21 +0000359
Daniel Veillard46de64e2002-05-29 08:21:33 +0000360#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
361 xmlGROW (ctxt);
362
363static void xmlGROW (xmlParserCtxtPtr ctxt) {
364 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
365 if ((*ctxt->input->cur == 0) &&
366 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
367 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000368 }
Owen Taylor3473f882001-02-23 17:55:21 +0000369
370#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
371
372#define NEXT xmlNextChar(ctxt)
373
Daniel Veillard21a0f912001-02-25 19:54:14 +0000374#define NEXT1 { \
375 ctxt->input->cur++; \
376 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000377 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000378 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
379 }
380
Owen Taylor3473f882001-02-23 17:55:21 +0000381#define NEXTL(l) do { \
382 if (*(ctxt->input->cur) == '\n') { \
383 ctxt->input->line++; ctxt->input->col = 1; \
384 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000385 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000386 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000387 } while (0)
388
389#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
390#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
391
392#define COPY_BUF(l,b,i,v) \
393 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000394 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000395
396/**
397 * xmlSkipBlankChars:
398 * @ctxt: the XML parser context
399 *
400 * skip all blanks character found at that point in the input streams.
401 * It pops up finished entities in the process if allowable at that point.
402 *
403 * Returns the number of space chars skipped
404 */
405
406int
407xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000408 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000409
410 /*
411 * It's Okay to use CUR/NEXT here since all the blanks are on
412 * the ASCII range.
413 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000414 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
415 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000416 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000417 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000418 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000419 cur = ctxt->input->cur;
420 while (IS_BLANK(*cur)) {
421 if (*cur == '\n') {
422 ctxt->input->line++; ctxt->input->col = 1;
423 }
424 cur++;
425 res++;
426 if (*cur == 0) {
427 ctxt->input->cur = cur;
428 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
429 cur = ctxt->input->cur;
430 }
431 }
432 ctxt->input->cur = cur;
433 } else {
434 int cur;
435 do {
436 cur = CUR;
437 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
438 NEXT;
439 cur = CUR;
440 res++;
441 }
442 while ((cur == 0) && (ctxt->inputNr > 1) &&
443 (ctxt->instate != XML_PARSER_COMMENT)) {
444 xmlPopInput(ctxt);
445 cur = CUR;
446 }
447 /*
448 * Need to handle support of entities branching here
449 */
450 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
451 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
452 }
Owen Taylor3473f882001-02-23 17:55:21 +0000453 return(res);
454}
455
456/************************************************************************
457 * *
458 * Commodity functions to handle entities *
459 * *
460 ************************************************************************/
461
462/**
463 * xmlPopInput:
464 * @ctxt: an XML parser context
465 *
466 * xmlPopInput: the current input pointed by ctxt->input came to an end
467 * pop it and return the next char.
468 *
469 * Returns the current xmlChar in the parser context
470 */
471xmlChar
472xmlPopInput(xmlParserCtxtPtr ctxt) {
473 if (ctxt->inputNr == 1) return(0); /* End of main Input */
474 if (xmlParserDebugEntities)
475 xmlGenericError(xmlGenericErrorContext,
476 "Popping input %d\n", ctxt->inputNr);
477 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000478 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000479 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
480 return(xmlPopInput(ctxt));
481 return(CUR);
482}
483
484/**
485 * xmlPushInput:
486 * @ctxt: an XML parser context
487 * @input: an XML parser input fragment (entity, XML fragment ...).
488 *
489 * xmlPushInput: switch to a new input stream which is stacked on top
490 * of the previous one(s).
491 */
492void
493xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
494 if (input == NULL) return;
495
496 if (xmlParserDebugEntities) {
497 if ((ctxt->input != NULL) && (ctxt->input->filename))
498 xmlGenericError(xmlGenericErrorContext,
499 "%s(%d): ", ctxt->input->filename,
500 ctxt->input->line);
501 xmlGenericError(xmlGenericErrorContext,
502 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
503 }
504 inputPush(ctxt, input);
505 GROW;
506}
507
508/**
509 * xmlParseCharRef:
510 * @ctxt: an XML parser context
511 *
512 * parse Reference declarations
513 *
514 * [66] CharRef ::= '&#' [0-9]+ ';' |
515 * '&#x' [0-9a-fA-F]+ ';'
516 *
517 * [ WFC: Legal Character ]
518 * Characters referred to using character references must match the
519 * production for Char.
520 *
521 * Returns the value parsed (as an int), 0 in case of error
522 */
523int
524xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000525 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000526 int count = 0;
527
Owen Taylor3473f882001-02-23 17:55:21 +0000528 /*
529 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
530 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000531 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000532 (NXT(2) == 'x')) {
533 SKIP(3);
534 GROW;
535 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000536 if (count++ > 20) {
537 count = 0;
538 GROW;
539 }
540 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000541 val = val * 16 + (CUR - '0');
542 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
543 val = val * 16 + (CUR - 'a') + 10;
544 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
545 val = val * 16 + (CUR - 'A') + 10;
546 else {
547 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
549 ctxt->sax->error(ctxt->userData,
550 "xmlParseCharRef: invalid hexadecimal value\n");
551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000553 val = 0;
554 break;
555 }
556 NEXT;
557 count++;
558 }
559 if (RAW == ';') {
560 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
561 ctxt->nbChars ++;
562 ctxt->input->cur++;
563 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000564 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000565 SKIP(2);
566 GROW;
567 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000568 if (count++ > 20) {
569 count = 0;
570 GROW;
571 }
572 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000573 val = val * 10 + (CUR - '0');
574 else {
575 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
577 ctxt->sax->error(ctxt->userData,
578 "xmlParseCharRef: invalid decimal value\n");
579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000581 val = 0;
582 break;
583 }
584 NEXT;
585 count++;
586 }
587 if (RAW == ';') {
588 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
589 ctxt->nbChars ++;
590 ctxt->input->cur++;
591 }
592 } else {
593 ctxt->errNo = XML_ERR_INVALID_CHARREF;
594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
595 ctxt->sax->error(ctxt->userData,
596 "xmlParseCharRef: invalid value\n");
597 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000599 }
600
601 /*
602 * [ WFC: Legal Character ]
603 * Characters referred to using character references must match the
604 * production for Char.
605 */
606 if (IS_CHAR(val)) {
607 return(val);
608 } else {
609 ctxt->errNo = XML_ERR_INVALID_CHAR;
610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000611 ctxt->sax->error(ctxt->userData,
612 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000613 val);
614 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000615 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000616 }
617 return(0);
618}
619
620/**
621 * xmlParseStringCharRef:
622 * @ctxt: an XML parser context
623 * @str: a pointer to an index in the string
624 *
625 * parse Reference declarations, variant parsing from a string rather
626 * than an an input flow.
627 *
628 * [66] CharRef ::= '&#' [0-9]+ ';' |
629 * '&#x' [0-9a-fA-F]+ ';'
630 *
631 * [ WFC: Legal Character ]
632 * Characters referred to using character references must match the
633 * production for Char.
634 *
635 * Returns the value parsed (as an int), 0 in case of error, str will be
636 * updated to the current value of the index
637 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000638static int
Owen Taylor3473f882001-02-23 17:55:21 +0000639xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
640 const xmlChar *ptr;
641 xmlChar cur;
642 int val = 0;
643
644 if ((str == NULL) || (*str == NULL)) return(0);
645 ptr = *str;
646 cur = *ptr;
647 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
648 ptr += 3;
649 cur = *ptr;
650 while (cur != ';') { /* Non input consuming loop */
651 if ((cur >= '0') && (cur <= '9'))
652 val = val * 16 + (cur - '0');
653 else if ((cur >= 'a') && (cur <= 'f'))
654 val = val * 16 + (cur - 'a') + 10;
655 else if ((cur >= 'A') && (cur <= 'F'))
656 val = val * 16 + (cur - 'A') + 10;
657 else {
658 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
660 ctxt->sax->error(ctxt->userData,
661 "xmlParseStringCharRef: invalid hexadecimal value\n");
662 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000663 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000664 val = 0;
665 break;
666 }
667 ptr++;
668 cur = *ptr;
669 }
670 if (cur == ';')
671 ptr++;
672 } else if ((cur == '&') && (ptr[1] == '#')){
673 ptr += 2;
674 cur = *ptr;
675 while (cur != ';') { /* Non input consuming loops */
676 if ((cur >= '0') && (cur <= '9'))
677 val = val * 10 + (cur - '0');
678 else {
679 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
681 ctxt->sax->error(ctxt->userData,
682 "xmlParseStringCharRef: invalid decimal value\n");
683 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000684 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000685 val = 0;
686 break;
687 }
688 ptr++;
689 cur = *ptr;
690 }
691 if (cur == ';')
692 ptr++;
693 } else {
694 ctxt->errNo = XML_ERR_INVALID_CHARREF;
695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
696 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000697 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000698 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000699 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000700 return(0);
701 }
702 *str = ptr;
703
704 /*
705 * [ WFC: Legal Character ]
706 * Characters referred to using character references must match the
707 * production for Char.
708 */
709 if (IS_CHAR(val)) {
710 return(val);
711 } else {
712 ctxt->errNo = XML_ERR_INVALID_CHAR;
713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
714 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000715 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000716 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000717 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000718 }
719 return(0);
720}
721
722/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000723 * xmlNewBlanksWrapperInputStream:
724 * @ctxt: an XML parser context
725 * @entity: an Entity pointer
726 *
727 * Create a new input stream for wrapping
728 * blanks around a PEReference
729 *
730 * Returns the new input stream or NULL
731 */
732
733static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
734
Daniel Veillardf4862f02002-09-10 11:13:43 +0000735static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000736xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
737 xmlParserInputPtr input;
738 xmlChar *buffer;
739 size_t length;
740 if (entity == NULL) {
741 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
743 ctxt->sax->error(ctxt->userData,
744 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
745 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
746 return(NULL);
747 }
748 if (xmlParserDebugEntities)
749 xmlGenericError(xmlGenericErrorContext,
750 "new blanks wrapper for entity: %s\n", entity->name);
751 input = xmlNewInputStream(ctxt);
752 if (input == NULL) {
753 return(NULL);
754 }
755 length = xmlStrlen(entity->name) + 5;
756 buffer = xmlMalloc(length);
757 if (buffer == NULL) {
758 return(NULL);
759 }
760 buffer [0] = ' ';
761 buffer [1] = '%';
762 buffer [length-3] = ';';
763 buffer [length-2] = ' ';
764 buffer [length-1] = 0;
765 memcpy(buffer + 2, entity->name, length - 5);
766 input->free = deallocblankswrapper;
767 input->base = buffer;
768 input->cur = buffer;
769 input->length = length;
770 input->end = &buffer[length];
771 return(input);
772}
773
774/**
Owen Taylor3473f882001-02-23 17:55:21 +0000775 * xmlParserHandlePEReference:
776 * @ctxt: the parser context
777 *
778 * [69] PEReference ::= '%' Name ';'
779 *
780 * [ WFC: No Recursion ]
781 * A parsed entity must not contain a recursive
782 * reference to itself, either directly or indirectly.
783 *
784 * [ WFC: Entity Declared ]
785 * In a document without any DTD, a document with only an internal DTD
786 * subset which contains no parameter entity references, or a document
787 * with "standalone='yes'", ... ... The declaration of a parameter
788 * entity must precede any reference to it...
789 *
790 * [ VC: Entity Declared ]
791 * In a document with an external subset or external parameter entities
792 * with "standalone='no'", ... ... The declaration of a parameter entity
793 * must precede any reference to it...
794 *
795 * [ WFC: In DTD ]
796 * Parameter-entity references may only appear in the DTD.
797 * NOTE: misleading but this is handled.
798 *
799 * A PEReference may have been detected in the current input stream
800 * the handling is done accordingly to
801 * http://www.w3.org/TR/REC-xml#entproc
802 * i.e.
803 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000804 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000805 */
806void
807xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
808 xmlChar *name;
809 xmlEntityPtr entity = NULL;
810 xmlParserInputPtr input;
811
Owen Taylor3473f882001-02-23 17:55:21 +0000812 if (RAW != '%') return;
813 switch(ctxt->instate) {
814 case XML_PARSER_CDATA_SECTION:
815 return;
816 case XML_PARSER_COMMENT:
817 return;
818 case XML_PARSER_START_TAG:
819 return;
820 case XML_PARSER_END_TAG:
821 return;
822 case XML_PARSER_EOF:
823 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
825 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
826 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000827 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000828 return;
829 case XML_PARSER_PROLOG:
830 case XML_PARSER_START:
831 case XML_PARSER_MISC:
832 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
834 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
835 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000836 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000837 return;
838 case XML_PARSER_ENTITY_DECL:
839 case XML_PARSER_CONTENT:
840 case XML_PARSER_ATTRIBUTE_VALUE:
841 case XML_PARSER_PI:
842 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000843 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000844 /* we just ignore it there */
845 return;
846 case XML_PARSER_EPILOG:
847 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
849 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
850 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000851 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000852 return;
853 case XML_PARSER_ENTITY_VALUE:
854 /*
855 * NOTE: in the case of entity values, we don't do the
856 * substitution here since we need the literal
857 * entity value to be able to save the internal
858 * subset of the document.
859 * This will be handled by xmlStringDecodeEntities
860 */
861 return;
862 case XML_PARSER_DTD:
863 /*
864 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
865 * In the internal DTD subset, parameter-entity references
866 * can occur only where markup declarations can occur, not
867 * within markup declarations.
868 * In that case this is handled in xmlParseMarkupDecl
869 */
870 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
871 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000872 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
873 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000874 break;
875 case XML_PARSER_IGNORE:
876 return;
877 }
878
879 NEXT;
880 name = xmlParseName(ctxt);
881 if (xmlParserDebugEntities)
882 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000883 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000884 if (name == NULL) {
885 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000887 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000888 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000889 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000890 } else {
891 if (RAW == ';') {
892 NEXT;
893 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
894 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
895 if (entity == NULL) {
896
897 /*
898 * [ WFC: Entity Declared ]
899 * In a document without any DTD, a document with only an
900 * internal DTD subset which contains no parameter entity
901 * references, or a document with "standalone='yes'", ...
902 * ... The declaration of a parameter entity must precede
903 * any reference to it...
904 */
905 if ((ctxt->standalone == 1) ||
906 ((ctxt->hasExternalSubset == 0) &&
907 (ctxt->hasPErefs == 0))) {
908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
909 ctxt->sax->error(ctxt->userData,
910 "PEReference: %%%s; not found\n", name);
911 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000912 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000913 } else {
914 /*
915 * [ VC: Entity Declared ]
916 * In a document with an external subset or external
917 * parameter entities with "standalone='no'", ...
918 * ... The declaration of a parameter entity must precede
919 * any reference to it...
920 */
921 if ((!ctxt->disableSAX) &&
922 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
923 ctxt->vctxt.error(ctxt->vctxt.userData,
924 "PEReference: %%%s; not found\n", name);
925 } else if ((!ctxt->disableSAX) &&
926 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
927 ctxt->sax->warning(ctxt->userData,
928 "PEReference: %%%s; not found\n", name);
929 ctxt->valid = 0;
930 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000931 } else if (ctxt->input->free != deallocblankswrapper) {
932 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
933 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000934 } else {
935 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
936 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000937 xmlChar start[4];
938 xmlCharEncoding enc;
939
Owen Taylor3473f882001-02-23 17:55:21 +0000940 /*
941 * handle the extra spaces added before and after
942 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000943 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000944 */
945 input = xmlNewEntityInputStream(ctxt, entity);
946 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000947
948 /*
949 * Get the 4 first bytes and decode the charset
950 * if enc != XML_CHAR_ENCODING_NONE
951 * plug some encoding conversion routines.
952 */
953 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000954 if (entity->length >= 4) {
955 start[0] = RAW;
956 start[1] = NXT(1);
957 start[2] = NXT(2);
958 start[3] = NXT(3);
959 enc = xmlDetectCharEncoding(start, 4);
960 if (enc != XML_CHAR_ENCODING_NONE) {
961 xmlSwitchEncoding(ctxt, enc);
962 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000963 }
964
Owen Taylor3473f882001-02-23 17:55:21 +0000965 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
966 (RAW == '<') && (NXT(1) == '?') &&
967 (NXT(2) == 'x') && (NXT(3) == 'm') &&
968 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
969 xmlParseTextDecl(ctxt);
970 }
Owen Taylor3473f882001-02-23 17:55:21 +0000971 } else {
972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
973 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000974 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000975 name);
976 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000977 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000978 }
979 }
980 } else {
981 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
983 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000984 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000985 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000986 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000987 }
988 xmlFree(name);
989 }
990}
991
992/*
993 * Macro used to grow the current buffer.
994 */
995#define growBuffer(buffer) { \
996 buffer##_size *= 2; \
997 buffer = (xmlChar *) \
998 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
999 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001000 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001001 return(NULL); \
1002 } \
1003}
1004
1005/**
1006 * xmlStringDecodeEntities:
1007 * @ctxt: the parser context
1008 * @str: the input string
1009 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1010 * @end: an end marker xmlChar, 0 if none
1011 * @end2: an end marker xmlChar, 0 if none
1012 * @end3: an end marker xmlChar, 0 if none
1013 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001014 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001015 *
1016 * [67] Reference ::= EntityRef | CharRef
1017 *
1018 * [69] PEReference ::= '%' Name ';'
1019 *
1020 * Returns A newly allocated string with the substitution done. The caller
1021 * must deallocate it !
1022 */
1023xmlChar *
1024xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1025 xmlChar end, xmlChar end2, xmlChar end3) {
1026 xmlChar *buffer = NULL;
1027 int buffer_size = 0;
1028
1029 xmlChar *current = NULL;
1030 xmlEntityPtr ent;
1031 int c,l;
1032 int nbchars = 0;
1033
1034 if (str == NULL)
1035 return(NULL);
1036
1037 if (ctxt->depth > 40) {
1038 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1040 ctxt->sax->error(ctxt->userData,
1041 "Detected entity reference loop\n");
1042 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001043 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001044 return(NULL);
1045 }
1046
1047 /*
1048 * allocate a translation buffer.
1049 */
1050 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1051 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1052 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001053 xmlGenericError(xmlGenericErrorContext,
1054 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001055 return(NULL);
1056 }
1057
1058 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001059 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001060 * we are operating on already parsed values.
1061 */
1062 c = CUR_SCHAR(str, l);
1063 while ((c != 0) && (c != end) && /* non input consuming loop */
1064 (c != end2) && (c != end3)) {
1065
1066 if (c == 0) break;
1067 if ((c == '&') && (str[1] == '#')) {
1068 int val = xmlParseStringCharRef(ctxt, &str);
1069 if (val != 0) {
1070 COPY_BUF(0,buffer,nbchars,val);
1071 }
1072 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1073 if (xmlParserDebugEntities)
1074 xmlGenericError(xmlGenericErrorContext,
1075 "String decoding Entity Reference: %.30s\n",
1076 str);
1077 ent = xmlParseStringEntityRef(ctxt, &str);
1078 if ((ent != NULL) &&
1079 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1080 if (ent->content != NULL) {
1081 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1082 } else {
1083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1084 ctxt->sax->error(ctxt->userData,
1085 "internal error entity has no content\n");
1086 }
1087 } else if ((ent != NULL) && (ent->content != NULL)) {
1088 xmlChar *rep;
1089
1090 ctxt->depth++;
1091 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1092 0, 0, 0);
1093 ctxt->depth--;
1094 if (rep != NULL) {
1095 current = rep;
1096 while (*current != 0) { /* non input consuming loop */
1097 buffer[nbchars++] = *current++;
1098 if (nbchars >
1099 buffer_size - XML_PARSER_BUFFER_SIZE) {
1100 growBuffer(buffer);
1101 }
1102 }
1103 xmlFree(rep);
1104 }
1105 } else if (ent != NULL) {
1106 int i = xmlStrlen(ent->name);
1107 const xmlChar *cur = ent->name;
1108
1109 buffer[nbchars++] = '&';
1110 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1111 growBuffer(buffer);
1112 }
1113 for (;i > 0;i--)
1114 buffer[nbchars++] = *cur++;
1115 buffer[nbchars++] = ';';
1116 }
1117 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1118 if (xmlParserDebugEntities)
1119 xmlGenericError(xmlGenericErrorContext,
1120 "String decoding PE Reference: %.30s\n", str);
1121 ent = xmlParseStringPEReference(ctxt, &str);
1122 if (ent != NULL) {
1123 xmlChar *rep;
1124
1125 ctxt->depth++;
1126 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1127 0, 0, 0);
1128 ctxt->depth--;
1129 if (rep != NULL) {
1130 current = rep;
1131 while (*current != 0) { /* non input consuming loop */
1132 buffer[nbchars++] = *current++;
1133 if (nbchars >
1134 buffer_size - XML_PARSER_BUFFER_SIZE) {
1135 growBuffer(buffer);
1136 }
1137 }
1138 xmlFree(rep);
1139 }
1140 }
1141 } else {
1142 COPY_BUF(l,buffer,nbchars,c);
1143 str += l;
1144 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1145 growBuffer(buffer);
1146 }
1147 }
1148 c = CUR_SCHAR(str, l);
1149 }
1150 buffer[nbchars++] = 0;
1151 return(buffer);
1152}
1153
1154
1155/************************************************************************
1156 * *
1157 * Commodity functions to handle xmlChars *
1158 * *
1159 ************************************************************************/
1160
1161/**
1162 * xmlStrndup:
1163 * @cur: the input xmlChar *
1164 * @len: the len of @cur
1165 *
1166 * a strndup for array of xmlChar's
1167 *
1168 * Returns a new xmlChar * or NULL
1169 */
1170xmlChar *
1171xmlStrndup(const xmlChar *cur, int len) {
1172 xmlChar *ret;
1173
1174 if ((cur == NULL) || (len < 0)) return(NULL);
1175 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1176 if (ret == NULL) {
1177 xmlGenericError(xmlGenericErrorContext,
1178 "malloc of %ld byte failed\n",
1179 (len + 1) * (long)sizeof(xmlChar));
1180 return(NULL);
1181 }
1182 memcpy(ret, cur, len * sizeof(xmlChar));
1183 ret[len] = 0;
1184 return(ret);
1185}
1186
1187/**
1188 * xmlStrdup:
1189 * @cur: the input xmlChar *
1190 *
1191 * a strdup for array of xmlChar's. Since they are supposed to be
1192 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1193 * a termination mark of '0'.
1194 *
1195 * Returns a new xmlChar * or NULL
1196 */
1197xmlChar *
1198xmlStrdup(const xmlChar *cur) {
1199 const xmlChar *p = cur;
1200
1201 if (cur == NULL) return(NULL);
1202 while (*p != 0) p++; /* non input consuming */
1203 return(xmlStrndup(cur, p - cur));
1204}
1205
1206/**
1207 * xmlCharStrndup:
1208 * @cur: the input char *
1209 * @len: the len of @cur
1210 *
1211 * a strndup for char's to xmlChar's
1212 *
1213 * Returns a new xmlChar * or NULL
1214 */
1215
1216xmlChar *
1217xmlCharStrndup(const char *cur, int len) {
1218 int i;
1219 xmlChar *ret;
1220
1221 if ((cur == NULL) || (len < 0)) return(NULL);
1222 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1223 if (ret == NULL) {
1224 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1225 (len + 1) * (long)sizeof(xmlChar));
1226 return(NULL);
1227 }
1228 for (i = 0;i < len;i++)
1229 ret[i] = (xmlChar) cur[i];
1230 ret[len] = 0;
1231 return(ret);
1232}
1233
1234/**
1235 * xmlCharStrdup:
1236 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001237 *
1238 * a strdup for char's to xmlChar's
1239 *
1240 * Returns a new xmlChar * or NULL
1241 */
1242
1243xmlChar *
1244xmlCharStrdup(const char *cur) {
1245 const char *p = cur;
1246
1247 if (cur == NULL) return(NULL);
1248 while (*p != '\0') p++; /* non input consuming */
1249 return(xmlCharStrndup(cur, p - cur));
1250}
1251
1252/**
1253 * xmlStrcmp:
1254 * @str1: the first xmlChar *
1255 * @str2: the second xmlChar *
1256 *
1257 * a strcmp for xmlChar's
1258 *
1259 * Returns the integer result of the comparison
1260 */
1261
1262int
1263xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1264 register int tmp;
1265
1266 if (str1 == str2) return(0);
1267 if (str1 == NULL) return(-1);
1268 if (str2 == NULL) return(1);
1269 do {
1270 tmp = *str1++ - *str2;
1271 if (tmp != 0) return(tmp);
1272 } while (*str2++ != 0);
1273 return 0;
1274}
1275
1276/**
1277 * xmlStrEqual:
1278 * @str1: the first xmlChar *
1279 * @str2: the second xmlChar *
1280 *
1281 * Check if both string are equal of have same content
1282 * Should be a bit more readable and faster than xmlStrEqual()
1283 *
1284 * Returns 1 if they are equal, 0 if they are different
1285 */
1286
1287int
1288xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1289 if (str1 == str2) return(1);
1290 if (str1 == NULL) return(0);
1291 if (str2 == NULL) return(0);
1292 do {
1293 if (*str1++ != *str2) return(0);
1294 } while (*str2++);
1295 return(1);
1296}
1297
1298/**
1299 * xmlStrncmp:
1300 * @str1: the first xmlChar *
1301 * @str2: the second xmlChar *
1302 * @len: the max comparison length
1303 *
1304 * a strncmp for xmlChar's
1305 *
1306 * Returns the integer result of the comparison
1307 */
1308
1309int
1310xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1311 register int tmp;
1312
1313 if (len <= 0) return(0);
1314 if (str1 == str2) return(0);
1315 if (str1 == NULL) return(-1);
1316 if (str2 == NULL) return(1);
1317 do {
1318 tmp = *str1++ - *str2;
1319 if (tmp != 0 || --len == 0) return(tmp);
1320 } while (*str2++ != 0);
1321 return 0;
1322}
1323
Daniel Veillardb44025c2001-10-11 22:55:55 +00001324static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001325 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1326 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1327 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1328 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1329 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1330 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1331 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1332 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1333 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1334 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1335 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1336 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1337 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1338 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1339 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1340 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1341 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1342 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1343 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1344 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1345 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1346 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1347 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1348 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1349 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1350 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1351 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1352 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1353 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1354 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1355 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1356 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1357};
1358
1359/**
1360 * xmlStrcasecmp:
1361 * @str1: the first xmlChar *
1362 * @str2: the second xmlChar *
1363 *
1364 * a strcasecmp for xmlChar's
1365 *
1366 * Returns the integer result of the comparison
1367 */
1368
1369int
1370xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1371 register int tmp;
1372
1373 if (str1 == str2) return(0);
1374 if (str1 == NULL) return(-1);
1375 if (str2 == NULL) return(1);
1376 do {
1377 tmp = casemap[*str1++] - casemap[*str2];
1378 if (tmp != 0) return(tmp);
1379 } while (*str2++ != 0);
1380 return 0;
1381}
1382
1383/**
1384 * xmlStrncasecmp:
1385 * @str1: the first xmlChar *
1386 * @str2: the second xmlChar *
1387 * @len: the max comparison length
1388 *
1389 * a strncasecmp for xmlChar's
1390 *
1391 * Returns the integer result of the comparison
1392 */
1393
1394int
1395xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1396 register int tmp;
1397
1398 if (len <= 0) return(0);
1399 if (str1 == str2) return(0);
1400 if (str1 == NULL) return(-1);
1401 if (str2 == NULL) return(1);
1402 do {
1403 tmp = casemap[*str1++] - casemap[*str2];
1404 if (tmp != 0 || --len == 0) return(tmp);
1405 } while (*str2++ != 0);
1406 return 0;
1407}
1408
1409/**
1410 * xmlStrchr:
1411 * @str: the xmlChar * array
1412 * @val: the xmlChar to search
1413 *
1414 * a strchr for xmlChar's
1415 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001416 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001417 */
1418
1419const xmlChar *
1420xmlStrchr(const xmlChar *str, xmlChar val) {
1421 if (str == NULL) return(NULL);
1422 while (*str != 0) { /* non input consuming */
1423 if (*str == val) return((xmlChar *) str);
1424 str++;
1425 }
1426 return(NULL);
1427}
1428
1429/**
1430 * xmlStrstr:
1431 * @str: the xmlChar * array (haystack)
1432 * @val: the xmlChar to search (needle)
1433 *
1434 * a strstr for xmlChar's
1435 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001436 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001437 */
1438
1439const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001440xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001441 int n;
1442
1443 if (str == NULL) return(NULL);
1444 if (val == NULL) return(NULL);
1445 n = xmlStrlen(val);
1446
1447 if (n == 0) return(str);
1448 while (*str != 0) { /* non input consuming */
1449 if (*str == *val) {
1450 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1451 }
1452 str++;
1453 }
1454 return(NULL);
1455}
1456
1457/**
1458 * xmlStrcasestr:
1459 * @str: the xmlChar * array (haystack)
1460 * @val: the xmlChar to search (needle)
1461 *
1462 * a case-ignoring strstr for xmlChar's
1463 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001464 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001465 */
1466
1467const xmlChar *
1468xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1469 int n;
1470
1471 if (str == NULL) return(NULL);
1472 if (val == NULL) return(NULL);
1473 n = xmlStrlen(val);
1474
1475 if (n == 0) return(str);
1476 while (*str != 0) { /* non input consuming */
1477 if (casemap[*str] == casemap[*val])
1478 if (!xmlStrncasecmp(str, val, n)) return(str);
1479 str++;
1480 }
1481 return(NULL);
1482}
1483
1484/**
1485 * xmlStrsub:
1486 * @str: the xmlChar * array (haystack)
1487 * @start: the index of the first char (zero based)
1488 * @len: the length of the substring
1489 *
1490 * Extract a substring of a given string
1491 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001492 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001493 */
1494
1495xmlChar *
1496xmlStrsub(const xmlChar *str, int start, int len) {
1497 int i;
1498
1499 if (str == NULL) return(NULL);
1500 if (start < 0) return(NULL);
1501 if (len < 0) return(NULL);
1502
1503 for (i = 0;i < start;i++) {
1504 if (*str == 0) return(NULL);
1505 str++;
1506 }
1507 if (*str == 0) return(NULL);
1508 return(xmlStrndup(str, len));
1509}
1510
1511/**
1512 * xmlStrlen:
1513 * @str: the xmlChar * array
1514 *
1515 * length of a xmlChar's string
1516 *
1517 * Returns the number of xmlChar contained in the ARRAY.
1518 */
1519
1520int
1521xmlStrlen(const xmlChar *str) {
1522 int len = 0;
1523
1524 if (str == NULL) return(0);
1525 while (*str != 0) { /* non input consuming */
1526 str++;
1527 len++;
1528 }
1529 return(len);
1530}
1531
1532/**
1533 * xmlStrncat:
1534 * @cur: the original xmlChar * array
1535 * @add: the xmlChar * array added
1536 * @len: the length of @add
1537 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001538 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001539 * first bytes of @add.
1540 *
1541 * Returns a new xmlChar *, the original @cur is reallocated if needed
1542 * and should not be freed
1543 */
1544
1545xmlChar *
1546xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1547 int size;
1548 xmlChar *ret;
1549
1550 if ((add == NULL) || (len == 0))
1551 return(cur);
1552 if (cur == NULL)
1553 return(xmlStrndup(add, len));
1554
1555 size = xmlStrlen(cur);
1556 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1557 if (ret == NULL) {
1558 xmlGenericError(xmlGenericErrorContext,
1559 "xmlStrncat: realloc of %ld byte failed\n",
1560 (size + len + 1) * (long)sizeof(xmlChar));
1561 return(cur);
1562 }
1563 memcpy(&ret[size], add, len * sizeof(xmlChar));
1564 ret[size + len] = 0;
1565 return(ret);
1566}
1567
1568/**
1569 * xmlStrcat:
1570 * @cur: the original xmlChar * array
1571 * @add: the xmlChar * array added
1572 *
1573 * a strcat for array of xmlChar's. Since they are supposed to be
1574 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1575 * a termination mark of '0'.
1576 *
1577 * Returns a new xmlChar * containing the concatenated string.
1578 */
1579xmlChar *
1580xmlStrcat(xmlChar *cur, const xmlChar *add) {
1581 const xmlChar *p = add;
1582
1583 if (add == NULL) return(cur);
1584 if (cur == NULL)
1585 return(xmlStrdup(add));
1586
1587 while (*p != 0) p++; /* non input consuming */
1588 return(xmlStrncat(cur, add, p - add));
1589}
1590
1591/************************************************************************
1592 * *
1593 * Commodity functions, cleanup needed ? *
1594 * *
1595 ************************************************************************/
1596
1597/**
1598 * areBlanks:
1599 * @ctxt: an XML parser context
1600 * @str: a xmlChar *
1601 * @len: the size of @str
1602 *
1603 * Is this a sequence of blank chars that one can ignore ?
1604 *
1605 * Returns 1 if ignorable 0 otherwise.
1606 */
1607
1608static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1609 int i, ret;
1610 xmlNodePtr lastChild;
1611
Daniel Veillard05c13a22001-09-09 08:38:09 +00001612 /*
1613 * Don't spend time trying to differentiate them, the same callback is
1614 * used !
1615 */
1616 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001617 return(0);
1618
Owen Taylor3473f882001-02-23 17:55:21 +00001619 /*
1620 * Check for xml:space value.
1621 */
1622 if (*(ctxt->space) == 1)
1623 return(0);
1624
1625 /*
1626 * Check that the string is made of blanks
1627 */
1628 for (i = 0;i < len;i++)
1629 if (!(IS_BLANK(str[i]))) return(0);
1630
1631 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001632 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001633 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001634 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001635 if (ctxt->myDoc != NULL) {
1636 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1637 if (ret == 0) return(1);
1638 if (ret == 1) return(0);
1639 }
1640
1641 /*
1642 * Otherwise, heuristic :-\
1643 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001644 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001645 if ((ctxt->node->children == NULL) &&
1646 (RAW == '<') && (NXT(1) == '/')) return(0);
1647
1648 lastChild = xmlGetLastChild(ctxt->node);
1649 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001650 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1651 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001652 } else if (xmlNodeIsText(lastChild))
1653 return(0);
1654 else if ((ctxt->node->children != NULL) &&
1655 (xmlNodeIsText(ctxt->node->children)))
1656 return(0);
1657 return(1);
1658}
1659
Owen Taylor3473f882001-02-23 17:55:21 +00001660/************************************************************************
1661 * *
1662 * Extra stuff for namespace support *
1663 * Relates to http://www.w3.org/TR/WD-xml-names *
1664 * *
1665 ************************************************************************/
1666
1667/**
1668 * xmlSplitQName:
1669 * @ctxt: an XML parser context
1670 * @name: an XML parser context
1671 * @prefix: a xmlChar **
1672 *
1673 * parse an UTF8 encoded XML qualified name string
1674 *
1675 * [NS 5] QName ::= (Prefix ':')? LocalPart
1676 *
1677 * [NS 6] Prefix ::= NCName
1678 *
1679 * [NS 7] LocalPart ::= NCName
1680 *
1681 * Returns the local part, and prefix is updated
1682 * to get the Prefix if any.
1683 */
1684
1685xmlChar *
1686xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1687 xmlChar buf[XML_MAX_NAMELEN + 5];
1688 xmlChar *buffer = NULL;
1689 int len = 0;
1690 int max = XML_MAX_NAMELEN;
1691 xmlChar *ret = NULL;
1692 const xmlChar *cur = name;
1693 int c;
1694
1695 *prefix = NULL;
1696
1697#ifndef XML_XML_NAMESPACE
1698 /* xml: prefix is not really a namespace */
1699 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1700 (cur[2] == 'l') && (cur[3] == ':'))
1701 return(xmlStrdup(name));
1702#endif
1703
1704 /* nasty but valid */
1705 if (cur[0] == ':')
1706 return(xmlStrdup(name));
1707
1708 c = *cur++;
1709 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1710 buf[len++] = c;
1711 c = *cur++;
1712 }
1713 if (len >= max) {
1714 /*
1715 * Okay someone managed to make a huge name, so he's ready to pay
1716 * for the processing speed.
1717 */
1718 max = len * 2;
1719
1720 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1721 if (buffer == NULL) {
1722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1723 ctxt->sax->error(ctxt->userData,
1724 "xmlSplitQName: out of memory\n");
1725 return(NULL);
1726 }
1727 memcpy(buffer, buf, len);
1728 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1729 if (len + 10 > max) {
1730 max *= 2;
1731 buffer = (xmlChar *) xmlRealloc(buffer,
1732 max * sizeof(xmlChar));
1733 if (buffer == NULL) {
1734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1735 ctxt->sax->error(ctxt->userData,
1736 "xmlSplitQName: out of memory\n");
1737 return(NULL);
1738 }
1739 }
1740 buffer[len++] = c;
1741 c = *cur++;
1742 }
1743 buffer[len] = 0;
1744 }
1745
1746 if (buffer == NULL)
1747 ret = xmlStrndup(buf, len);
1748 else {
1749 ret = buffer;
1750 buffer = NULL;
1751 max = XML_MAX_NAMELEN;
1752 }
1753
1754
1755 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001756 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001757 if (c == 0) return(ret);
1758 *prefix = ret;
1759 len = 0;
1760
Daniel Veillardbb284f42002-10-16 18:02:47 +00001761 /*
1762 * Check that the first character is proper to start
1763 * a new name
1764 */
1765 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1766 ((c >= 0x41) && (c <= 0x5A)) ||
1767 (c == '_') || (c == ':'))) {
1768 int l;
1769 int first = CUR_SCHAR(cur, l);
1770
1771 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001772 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1773 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001774 ctxt->sax->error(ctxt->userData,
1775 "Name %s is not XML Namespace compliant\n",
1776 name);
1777 }
1778 }
1779 cur++;
1780
Owen Taylor3473f882001-02-23 17:55:21 +00001781 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1782 buf[len++] = c;
1783 c = *cur++;
1784 }
1785 if (len >= max) {
1786 /*
1787 * Okay someone managed to make a huge name, so he's ready to pay
1788 * for the processing speed.
1789 */
1790 max = len * 2;
1791
1792 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1793 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001794 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1795 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001796 ctxt->sax->error(ctxt->userData,
1797 "xmlSplitQName: out of memory\n");
1798 return(NULL);
1799 }
1800 memcpy(buffer, buf, len);
1801 while (c != 0) { /* tested bigname2.xml */
1802 if (len + 10 > max) {
1803 max *= 2;
1804 buffer = (xmlChar *) xmlRealloc(buffer,
1805 max * sizeof(xmlChar));
1806 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001807 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1808 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001809 ctxt->sax->error(ctxt->userData,
1810 "xmlSplitQName: out of memory\n");
1811 return(NULL);
1812 }
1813 }
1814 buffer[len++] = c;
1815 c = *cur++;
1816 }
1817 buffer[len] = 0;
1818 }
1819
1820 if (buffer == NULL)
1821 ret = xmlStrndup(buf, len);
1822 else {
1823 ret = buffer;
1824 }
1825 }
1826
1827 return(ret);
1828}
1829
1830/************************************************************************
1831 * *
1832 * The parser itself *
1833 * Relates to http://www.w3.org/TR/REC-xml *
1834 * *
1835 ************************************************************************/
1836
Daniel Veillard76d66f42001-05-16 21:05:17 +00001837static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001838/**
1839 * xmlParseName:
1840 * @ctxt: an XML parser context
1841 *
1842 * parse an XML name.
1843 *
1844 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1845 * CombiningChar | Extender
1846 *
1847 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1848 *
1849 * [6] Names ::= Name (S Name)*
1850 *
1851 * Returns the Name parsed or NULL
1852 */
1853
1854xmlChar *
1855xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001856 const xmlChar *in;
1857 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001858 int count = 0;
1859
1860 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001861
1862 /*
1863 * Accelerator for simple ASCII names
1864 */
1865 in = ctxt->input->cur;
1866 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1867 ((*in >= 0x41) && (*in <= 0x5A)) ||
1868 (*in == '_') || (*in == ':')) {
1869 in++;
1870 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1871 ((*in >= 0x41) && (*in <= 0x5A)) ||
1872 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001873 (*in == '_') || (*in == '-') ||
1874 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001875 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001876 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001877 count = in - ctxt->input->cur;
1878 ret = xmlStrndup(ctxt->input->cur, count);
1879 ctxt->input->cur = in;
1880 return(ret);
1881 }
1882 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001883 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001884}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001885
Daniel Veillard46de64e2002-05-29 08:21:33 +00001886/**
1887 * xmlParseNameAndCompare:
1888 * @ctxt: an XML parser context
1889 *
1890 * parse an XML name and compares for match
1891 * (specialized for endtag parsing)
1892 *
1893 *
1894 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1895 * and the name for mismatch
1896 */
1897
Daniel Veillardf4862f02002-09-10 11:13:43 +00001898static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001899xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1900 const xmlChar *cmp = other;
1901 const xmlChar *in;
1902 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001903
1904 GROW;
1905
1906 in = ctxt->input->cur;
1907 while (*in != 0 && *in == *cmp) {
1908 ++in;
1909 ++cmp;
1910 }
1911 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1912 /* success */
1913 ctxt->input->cur = in;
1914 return (xmlChar*) 1;
1915 }
1916 /* failure (or end of input buffer), check with full function */
1917 ret = xmlParseName (ctxt);
1918 if (ret != 0 && xmlStrEqual (ret, other)) {
1919 xmlFree (ret);
1920 return (xmlChar*) 1;
1921 }
1922 return ret;
1923}
1924
Daniel Veillard76d66f42001-05-16 21:05:17 +00001925static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001926xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1927 xmlChar buf[XML_MAX_NAMELEN + 5];
1928 int len = 0, l;
1929 int c;
1930 int count = 0;
1931
1932 /*
1933 * Handler for more complex cases
1934 */
1935 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001936 c = CUR_CHAR(l);
1937 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1938 (!IS_LETTER(c) && (c != '_') &&
1939 (c != ':'))) {
1940 return(NULL);
1941 }
1942
1943 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1944 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1945 (c == '.') || (c == '-') ||
1946 (c == '_') || (c == ':') ||
1947 (IS_COMBINING(c)) ||
1948 (IS_EXTENDER(c)))) {
1949 if (count++ > 100) {
1950 count = 0;
1951 GROW;
1952 }
1953 COPY_BUF(l,buf,len,c);
1954 NEXTL(l);
1955 c = CUR_CHAR(l);
1956 if (len >= XML_MAX_NAMELEN) {
1957 /*
1958 * Okay someone managed to make a huge name, so he's ready to pay
1959 * for the processing speed.
1960 */
1961 xmlChar *buffer;
1962 int max = len * 2;
1963
1964 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1965 if (buffer == NULL) {
1966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1967 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001968 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001969 return(NULL);
1970 }
1971 memcpy(buffer, buf, len);
1972 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1973 (c == '.') || (c == '-') ||
1974 (c == '_') || (c == ':') ||
1975 (IS_COMBINING(c)) ||
1976 (IS_EXTENDER(c))) {
1977 if (count++ > 100) {
1978 count = 0;
1979 GROW;
1980 }
1981 if (len + 10 > max) {
1982 max *= 2;
1983 buffer = (xmlChar *) xmlRealloc(buffer,
1984 max * sizeof(xmlChar));
1985 if (buffer == NULL) {
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1987 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001988 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001989 return(NULL);
1990 }
1991 }
1992 COPY_BUF(l,buffer,len,c);
1993 NEXTL(l);
1994 c = CUR_CHAR(l);
1995 }
1996 buffer[len] = 0;
1997 return(buffer);
1998 }
1999 }
2000 return(xmlStrndup(buf, len));
2001}
2002
2003/**
2004 * xmlParseStringName:
2005 * @ctxt: an XML parser context
2006 * @str: a pointer to the string pointer (IN/OUT)
2007 *
2008 * parse an XML name.
2009 *
2010 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2011 * CombiningChar | Extender
2012 *
2013 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2014 *
2015 * [6] Names ::= Name (S Name)*
2016 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002017 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002018 * is updated to the current location in the string.
2019 */
2020
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002021static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002022xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2023 xmlChar buf[XML_MAX_NAMELEN + 5];
2024 const xmlChar *cur = *str;
2025 int len = 0, l;
2026 int c;
2027
2028 c = CUR_SCHAR(cur, l);
2029 if (!IS_LETTER(c) && (c != '_') &&
2030 (c != ':')) {
2031 return(NULL);
2032 }
2033
2034 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2035 (c == '.') || (c == '-') ||
2036 (c == '_') || (c == ':') ||
2037 (IS_COMBINING(c)) ||
2038 (IS_EXTENDER(c))) {
2039 COPY_BUF(l,buf,len,c);
2040 cur += l;
2041 c = CUR_SCHAR(cur, l);
2042 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2043 /*
2044 * Okay someone managed to make a huge name, so he's ready to pay
2045 * for the processing speed.
2046 */
2047 xmlChar *buffer;
2048 int max = len * 2;
2049
2050 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2051 if (buffer == NULL) {
2052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2053 ctxt->sax->error(ctxt->userData,
2054 "xmlParseStringName: out of memory\n");
2055 return(NULL);
2056 }
2057 memcpy(buffer, buf, len);
2058 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2059 (c == '.') || (c == '-') ||
2060 (c == '_') || (c == ':') ||
2061 (IS_COMBINING(c)) ||
2062 (IS_EXTENDER(c))) {
2063 if (len + 10 > max) {
2064 max *= 2;
2065 buffer = (xmlChar *) xmlRealloc(buffer,
2066 max * sizeof(xmlChar));
2067 if (buffer == NULL) {
2068 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2069 ctxt->sax->error(ctxt->userData,
2070 "xmlParseStringName: out of memory\n");
2071 return(NULL);
2072 }
2073 }
2074 COPY_BUF(l,buffer,len,c);
2075 cur += l;
2076 c = CUR_SCHAR(cur, l);
2077 }
2078 buffer[len] = 0;
2079 *str = cur;
2080 return(buffer);
2081 }
2082 }
2083 *str = cur;
2084 return(xmlStrndup(buf, len));
2085}
2086
2087/**
2088 * xmlParseNmtoken:
2089 * @ctxt: an XML parser context
2090 *
2091 * parse an XML Nmtoken.
2092 *
2093 * [7] Nmtoken ::= (NameChar)+
2094 *
2095 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2096 *
2097 * Returns the Nmtoken parsed or NULL
2098 */
2099
2100xmlChar *
2101xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2102 xmlChar buf[XML_MAX_NAMELEN + 5];
2103 int len = 0, l;
2104 int c;
2105 int count = 0;
2106
2107 GROW;
2108 c = CUR_CHAR(l);
2109
2110 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2111 (c == '.') || (c == '-') ||
2112 (c == '_') || (c == ':') ||
2113 (IS_COMBINING(c)) ||
2114 (IS_EXTENDER(c))) {
2115 if (count++ > 100) {
2116 count = 0;
2117 GROW;
2118 }
2119 COPY_BUF(l,buf,len,c);
2120 NEXTL(l);
2121 c = CUR_CHAR(l);
2122 if (len >= XML_MAX_NAMELEN) {
2123 /*
2124 * Okay someone managed to make a huge token, so he's ready to pay
2125 * for the processing speed.
2126 */
2127 xmlChar *buffer;
2128 int max = len * 2;
2129
2130 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2131 if (buffer == NULL) {
2132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2133 ctxt->sax->error(ctxt->userData,
2134 "xmlParseNmtoken: out of memory\n");
2135 return(NULL);
2136 }
2137 memcpy(buffer, buf, len);
2138 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2139 (c == '.') || (c == '-') ||
2140 (c == '_') || (c == ':') ||
2141 (IS_COMBINING(c)) ||
2142 (IS_EXTENDER(c))) {
2143 if (count++ > 100) {
2144 count = 0;
2145 GROW;
2146 }
2147 if (len + 10 > max) {
2148 max *= 2;
2149 buffer = (xmlChar *) xmlRealloc(buffer,
2150 max * sizeof(xmlChar));
2151 if (buffer == NULL) {
2152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2153 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002154 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002155 return(NULL);
2156 }
2157 }
2158 COPY_BUF(l,buffer,len,c);
2159 NEXTL(l);
2160 c = CUR_CHAR(l);
2161 }
2162 buffer[len] = 0;
2163 return(buffer);
2164 }
2165 }
2166 if (len == 0)
2167 return(NULL);
2168 return(xmlStrndup(buf, len));
2169}
2170
2171/**
2172 * xmlParseEntityValue:
2173 * @ctxt: an XML parser context
2174 * @orig: if non-NULL store a copy of the original entity value
2175 *
2176 * parse a value for ENTITY declarations
2177 *
2178 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2179 * "'" ([^%&'] | PEReference | Reference)* "'"
2180 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002181 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002182 */
2183
2184xmlChar *
2185xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2186 xmlChar *buf = NULL;
2187 int len = 0;
2188 int size = XML_PARSER_BUFFER_SIZE;
2189 int c, l;
2190 xmlChar stop;
2191 xmlChar *ret = NULL;
2192 const xmlChar *cur = NULL;
2193 xmlParserInputPtr input;
2194
2195 if (RAW == '"') stop = '"';
2196 else if (RAW == '\'') stop = '\'';
2197 else {
2198 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2200 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2201 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002202 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 return(NULL);
2204 }
2205 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2206 if (buf == NULL) {
2207 xmlGenericError(xmlGenericErrorContext,
2208 "malloc of %d byte failed\n", size);
2209 return(NULL);
2210 }
2211
2212 /*
2213 * The content of the entity definition is copied in a buffer.
2214 */
2215
2216 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2217 input = ctxt->input;
2218 GROW;
2219 NEXT;
2220 c = CUR_CHAR(l);
2221 /*
2222 * NOTE: 4.4.5 Included in Literal
2223 * When a parameter entity reference appears in a literal entity
2224 * value, ... a single or double quote character in the replacement
2225 * text is always treated as a normal data character and will not
2226 * terminate the literal.
2227 * In practice it means we stop the loop only when back at parsing
2228 * the initial entity and the quote is found
2229 */
2230 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2231 (ctxt->input != input))) {
2232 if (len + 5 >= size) {
2233 size *= 2;
2234 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2235 if (buf == NULL) {
2236 xmlGenericError(xmlGenericErrorContext,
2237 "realloc of %d byte failed\n", size);
2238 return(NULL);
2239 }
2240 }
2241 COPY_BUF(l,buf,len,c);
2242 NEXTL(l);
2243 /*
2244 * Pop-up of finished entities.
2245 */
2246 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2247 xmlPopInput(ctxt);
2248
2249 GROW;
2250 c = CUR_CHAR(l);
2251 if (c == 0) {
2252 GROW;
2253 c = CUR_CHAR(l);
2254 }
2255 }
2256 buf[len] = 0;
2257
2258 /*
2259 * Raise problem w.r.t. '&' and '%' being used in non-entities
2260 * reference constructs. Note Charref will be handled in
2261 * xmlStringDecodeEntities()
2262 */
2263 cur = buf;
2264 while (*cur != 0) { /* non input consuming */
2265 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2266 xmlChar *name;
2267 xmlChar tmp = *cur;
2268
2269 cur++;
2270 name = xmlParseStringName(ctxt, &cur);
2271 if ((name == NULL) || (*cur != ';')) {
2272 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2274 ctxt->sax->error(ctxt->userData,
2275 "EntityValue: '%c' forbidden except for entities references\n",
2276 tmp);
2277 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002278 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002279 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002280 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2281 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002282 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2284 ctxt->sax->error(ctxt->userData,
2285 "EntityValue: PEReferences forbidden in internal subset\n",
2286 tmp);
2287 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002288 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002289 }
2290 if (name != NULL)
2291 xmlFree(name);
2292 }
2293 cur++;
2294 }
2295
2296 /*
2297 * Then PEReference entities are substituted.
2298 */
2299 if (c != stop) {
2300 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2302 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2303 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002304 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002305 xmlFree(buf);
2306 } else {
2307 NEXT;
2308 /*
2309 * NOTE: 4.4.7 Bypassed
2310 * When a general entity reference appears in the EntityValue in
2311 * an entity declaration, it is bypassed and left as is.
2312 * so XML_SUBSTITUTE_REF is not set here.
2313 */
2314 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2315 0, 0, 0);
2316 if (orig != NULL)
2317 *orig = buf;
2318 else
2319 xmlFree(buf);
2320 }
2321
2322 return(ret);
2323}
2324
2325/**
2326 * xmlParseAttValue:
2327 * @ctxt: an XML parser context
2328 *
2329 * parse a value for an attribute
2330 * Note: the parser won't do substitution of entities here, this
2331 * will be handled later in xmlStringGetNodeList
2332 *
2333 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2334 * "'" ([^<&'] | Reference)* "'"
2335 *
2336 * 3.3.3 Attribute-Value Normalization:
2337 * Before the value of an attribute is passed to the application or
2338 * checked for validity, the XML processor must normalize it as follows:
2339 * - a character reference is processed by appending the referenced
2340 * character to the attribute value
2341 * - an entity reference is processed by recursively processing the
2342 * replacement text of the entity
2343 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2344 * appending #x20 to the normalized value, except that only a single
2345 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2346 * parsed entity or the literal entity value of an internal parsed entity
2347 * - other characters are processed by appending them to the normalized value
2348 * If the declared value is not CDATA, then the XML processor must further
2349 * process the normalized attribute value by discarding any leading and
2350 * trailing space (#x20) characters, and by replacing sequences of space
2351 * (#x20) characters by a single space (#x20) character.
2352 * All attributes for which no declaration has been read should be treated
2353 * by a non-validating parser as if declared CDATA.
2354 *
2355 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2356 */
2357
2358xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002359xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2360
2361xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002362xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2363 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002364 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002365 xmlChar *ret = NULL;
2366 SHRINK;
2367 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002368 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002369 if (*in != '"' && *in != '\'') {
2370 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2372 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2373 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002374 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002375 return(NULL);
2376 }
2377 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2378 limit = *in;
2379 ++in;
2380
2381 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2382 *in != '&' && *in != '<'
2383 ) {
2384 ++in;
2385 }
2386 if (*in != limit) {
2387 return xmlParseAttValueComplex(ctxt);
2388 }
2389 ++in;
2390 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2391 CUR_PTR = in;
2392 return ret;
2393}
2394
Daniel Veillard01c13b52002-12-10 15:19:08 +00002395/**
2396 * xmlParseAttValueComplex:
2397 * @ctxt: an XML parser context
2398 *
2399 * parse a value for an attribute, this is the fallback function
2400 * of xmlParseAttValue() when the attribute parsing requires handling
2401 * of non-ASCII characters.
2402 *
2403 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2404 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002405xmlChar *
2406xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2407 xmlChar limit = 0;
2408 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002409 int len = 0;
2410 int buf_size = 0;
2411 int c, l;
2412 xmlChar *current = NULL;
2413 xmlEntityPtr ent;
2414
2415
2416 SHRINK;
2417 if (NXT(0) == '"') {
2418 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2419 limit = '"';
2420 NEXT;
2421 } else if (NXT(0) == '\'') {
2422 limit = '\'';
2423 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2424 NEXT;
2425 } else {
2426 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2427 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2428 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2429 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002430 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002431 return(NULL);
2432 }
2433
2434 /*
2435 * allocate a translation buffer.
2436 */
2437 buf_size = XML_PARSER_BUFFER_SIZE;
2438 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2439 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002440 xmlGenericError(xmlGenericErrorContext,
2441 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002442 return(NULL);
2443 }
2444
2445 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002446 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002447 */
2448 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002449 while ((NXT(0) != limit) && /* checked */
2450 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002451 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002452 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002453 if (NXT(1) == '#') {
2454 int val = xmlParseCharRef(ctxt);
2455 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002456 if (ctxt->replaceEntities) {
2457 if (len > buf_size - 10) {
2458 growBuffer(buf);
2459 }
2460 buf[len++] = '&';
2461 } else {
2462 /*
2463 * The reparsing will be done in xmlStringGetNodeList()
2464 * called by the attribute() function in SAX.c
2465 */
2466 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002467
Daniel Veillard319a7422001-09-11 09:27:09 +00002468 if (len > buf_size - 10) {
2469 growBuffer(buf);
2470 }
2471 current = &buffer[0];
2472 while (*current != 0) { /* non input consuming */
2473 buf[len++] = *current++;
2474 }
Owen Taylor3473f882001-02-23 17:55:21 +00002475 }
2476 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002477 if (len > buf_size - 10) {
2478 growBuffer(buf);
2479 }
Owen Taylor3473f882001-02-23 17:55:21 +00002480 len += xmlCopyChar(0, &buf[len], val);
2481 }
2482 } else {
2483 ent = xmlParseEntityRef(ctxt);
2484 if ((ent != NULL) &&
2485 (ctxt->replaceEntities != 0)) {
2486 xmlChar *rep;
2487
2488 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2489 rep = xmlStringDecodeEntities(ctxt, ent->content,
2490 XML_SUBSTITUTE_REF, 0, 0, 0);
2491 if (rep != NULL) {
2492 current = rep;
2493 while (*current != 0) { /* non input consuming */
2494 buf[len++] = *current++;
2495 if (len > buf_size - 10) {
2496 growBuffer(buf);
2497 }
2498 }
2499 xmlFree(rep);
2500 }
2501 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002502 if (len > buf_size - 10) {
2503 growBuffer(buf);
2504 }
Owen Taylor3473f882001-02-23 17:55:21 +00002505 if (ent->content != NULL)
2506 buf[len++] = ent->content[0];
2507 }
2508 } else if (ent != NULL) {
2509 int i = xmlStrlen(ent->name);
2510 const xmlChar *cur = ent->name;
2511
2512 /*
2513 * This may look absurd but is needed to detect
2514 * entities problems
2515 */
2516 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2517 (ent->content != NULL)) {
2518 xmlChar *rep;
2519 rep = xmlStringDecodeEntities(ctxt, ent->content,
2520 XML_SUBSTITUTE_REF, 0, 0, 0);
2521 if (rep != NULL)
2522 xmlFree(rep);
2523 }
2524
2525 /*
2526 * Just output the reference
2527 */
2528 buf[len++] = '&';
2529 if (len > buf_size - i - 10) {
2530 growBuffer(buf);
2531 }
2532 for (;i > 0;i--)
2533 buf[len++] = *cur++;
2534 buf[len++] = ';';
2535 }
2536 }
2537 } else {
2538 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2539 COPY_BUF(l,buf,len,0x20);
2540 if (len > buf_size - 10) {
2541 growBuffer(buf);
2542 }
2543 } else {
2544 COPY_BUF(l,buf,len,c);
2545 if (len > buf_size - 10) {
2546 growBuffer(buf);
2547 }
2548 }
2549 NEXTL(l);
2550 }
2551 GROW;
2552 c = CUR_CHAR(l);
2553 }
2554 buf[len++] = 0;
2555 if (RAW == '<') {
2556 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2558 ctxt->sax->error(ctxt->userData,
2559 "Unescaped '<' not allowed in attributes values\n");
2560 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002561 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002562 } else if (RAW != limit) {
2563 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2565 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2566 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002567 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002568 } else
2569 NEXT;
2570 return(buf);
2571}
2572
2573/**
2574 * xmlParseSystemLiteral:
2575 * @ctxt: an XML parser context
2576 *
2577 * parse an XML Literal
2578 *
2579 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2580 *
2581 * Returns the SystemLiteral parsed or NULL
2582 */
2583
2584xmlChar *
2585xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2586 xmlChar *buf = NULL;
2587 int len = 0;
2588 int size = XML_PARSER_BUFFER_SIZE;
2589 int cur, l;
2590 xmlChar stop;
2591 int state = ctxt->instate;
2592 int count = 0;
2593
2594 SHRINK;
2595 if (RAW == '"') {
2596 NEXT;
2597 stop = '"';
2598 } else if (RAW == '\'') {
2599 NEXT;
2600 stop = '\'';
2601 } else {
2602 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2604 ctxt->sax->error(ctxt->userData,
2605 "SystemLiteral \" or ' expected\n");
2606 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002607 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002608 return(NULL);
2609 }
2610
2611 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2612 if (buf == NULL) {
2613 xmlGenericError(xmlGenericErrorContext,
2614 "malloc of %d byte failed\n", size);
2615 return(NULL);
2616 }
2617 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2618 cur = CUR_CHAR(l);
2619 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2620 if (len + 5 >= size) {
2621 size *= 2;
2622 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2623 if (buf == NULL) {
2624 xmlGenericError(xmlGenericErrorContext,
2625 "realloc of %d byte failed\n", size);
2626 ctxt->instate = (xmlParserInputState) state;
2627 return(NULL);
2628 }
2629 }
2630 count++;
2631 if (count > 50) {
2632 GROW;
2633 count = 0;
2634 }
2635 COPY_BUF(l,buf,len,cur);
2636 NEXTL(l);
2637 cur = CUR_CHAR(l);
2638 if (cur == 0) {
2639 GROW;
2640 SHRINK;
2641 cur = CUR_CHAR(l);
2642 }
2643 }
2644 buf[len] = 0;
2645 ctxt->instate = (xmlParserInputState) state;
2646 if (!IS_CHAR(cur)) {
2647 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2649 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2650 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002651 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002652 } else {
2653 NEXT;
2654 }
2655 return(buf);
2656}
2657
2658/**
2659 * xmlParsePubidLiteral:
2660 * @ctxt: an XML parser context
2661 *
2662 * parse an XML public literal
2663 *
2664 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2665 *
2666 * Returns the PubidLiteral parsed or NULL.
2667 */
2668
2669xmlChar *
2670xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2671 xmlChar *buf = NULL;
2672 int len = 0;
2673 int size = XML_PARSER_BUFFER_SIZE;
2674 xmlChar cur;
2675 xmlChar stop;
2676 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002677 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002678
2679 SHRINK;
2680 if (RAW == '"') {
2681 NEXT;
2682 stop = '"';
2683 } else if (RAW == '\'') {
2684 NEXT;
2685 stop = '\'';
2686 } else {
2687 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2689 ctxt->sax->error(ctxt->userData,
2690 "SystemLiteral \" or ' expected\n");
2691 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002692 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002693 return(NULL);
2694 }
2695 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2696 if (buf == NULL) {
2697 xmlGenericError(xmlGenericErrorContext,
2698 "malloc of %d byte failed\n", size);
2699 return(NULL);
2700 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002701 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002702 cur = CUR;
2703 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2704 if (len + 1 >= size) {
2705 size *= 2;
2706 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2707 if (buf == NULL) {
2708 xmlGenericError(xmlGenericErrorContext,
2709 "realloc of %d byte failed\n", size);
2710 return(NULL);
2711 }
2712 }
2713 buf[len++] = cur;
2714 count++;
2715 if (count > 50) {
2716 GROW;
2717 count = 0;
2718 }
2719 NEXT;
2720 cur = CUR;
2721 if (cur == 0) {
2722 GROW;
2723 SHRINK;
2724 cur = CUR;
2725 }
2726 }
2727 buf[len] = 0;
2728 if (cur != stop) {
2729 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2731 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2732 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002733 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002734 } else {
2735 NEXT;
2736 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002737 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002738 return(buf);
2739}
2740
Daniel Veillard48b2f892001-02-25 16:11:03 +00002741void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002742/**
2743 * xmlParseCharData:
2744 * @ctxt: an XML parser context
2745 * @cdata: int indicating whether we are within a CDATA section
2746 *
2747 * parse a CharData section.
2748 * if we are within a CDATA section ']]>' marks an end of section.
2749 *
2750 * The right angle bracket (>) may be represented using the string "&gt;",
2751 * and must, for compatibility, be escaped using "&gt;" or a character
2752 * reference when it appears in the string "]]>" in content, when that
2753 * string is not marking the end of a CDATA section.
2754 *
2755 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2756 */
2757
2758void
2759xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002760 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002761 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002762 int line = ctxt->input->line;
2763 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002764
2765 SHRINK;
2766 GROW;
2767 /*
2768 * Accelerated common case where input don't need to be
2769 * modified before passing it to the handler.
2770 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002771 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002772 in = ctxt->input->cur;
2773 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002774get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002775 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2776 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002777 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002778 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002779 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002780 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002781 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002782 ctxt->input->line++;
2783 in++;
2784 }
2785 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002786 }
2787 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002788 if ((in[1] == ']') && (in[2] == '>')) {
2789 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2791 ctxt->sax->error(ctxt->userData,
2792 "Sequence ']]>' not allowed in content\n");
2793 ctxt->input->cur = in;
2794 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002795 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002796 return;
2797 }
2798 in++;
2799 goto get_more;
2800 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002801 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002802 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002803 if (IS_BLANK(*ctxt->input->cur)) {
2804 const xmlChar *tmp = ctxt->input->cur;
2805 ctxt->input->cur = in;
2806 if (areBlanks(ctxt, tmp, nbchar)) {
2807 if (ctxt->sax->ignorableWhitespace != NULL)
2808 ctxt->sax->ignorableWhitespace(ctxt->userData,
2809 tmp, nbchar);
2810 } else {
2811 if (ctxt->sax->characters != NULL)
2812 ctxt->sax->characters(ctxt->userData,
2813 tmp, nbchar);
2814 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002815 line = ctxt->input->line;
2816 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002817 } else {
2818 if (ctxt->sax->characters != NULL)
2819 ctxt->sax->characters(ctxt->userData,
2820 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002821 line = ctxt->input->line;
2822 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002823 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002824 }
2825 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002826 if (*in == 0xD) {
2827 in++;
2828 if (*in == 0xA) {
2829 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002830 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002831 ctxt->input->line++;
2832 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002833 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002834 in--;
2835 }
2836 if (*in == '<') {
2837 return;
2838 }
2839 if (*in == '&') {
2840 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002841 }
2842 SHRINK;
2843 GROW;
2844 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002845 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002846 nbchar = 0;
2847 }
Daniel Veillard50582112001-03-26 22:52:16 +00002848 ctxt->input->line = line;
2849 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002850 xmlParseCharDataComplex(ctxt, cdata);
2851}
2852
Daniel Veillard01c13b52002-12-10 15:19:08 +00002853/**
2854 * xmlParseCharDataComplex:
2855 * @ctxt: an XML parser context
2856 * @cdata: int indicating whether we are within a CDATA section
2857 *
2858 * parse a CharData section.this is the fallback function
2859 * of xmlParseCharData() when the parsing requires handling
2860 * of non-ASCII characters.
2861 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002862void
2863xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002864 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2865 int nbchar = 0;
2866 int cur, l;
2867 int count = 0;
2868
2869 SHRINK;
2870 GROW;
2871 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002872 while ((cur != '<') && /* checked */
2873 (cur != '&') &&
2874 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002875 if ((cur == ']') && (NXT(1) == ']') &&
2876 (NXT(2) == '>')) {
2877 if (cdata) break;
2878 else {
2879 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2881 ctxt->sax->error(ctxt->userData,
2882 "Sequence ']]>' not allowed in content\n");
2883 /* Should this be relaxed ??? I see a "must here */
2884 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002885 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002886 }
2887 }
2888 COPY_BUF(l,buf,nbchar,cur);
2889 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2890 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002891 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002892 */
2893 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2894 if (areBlanks(ctxt, buf, nbchar)) {
2895 if (ctxt->sax->ignorableWhitespace != NULL)
2896 ctxt->sax->ignorableWhitespace(ctxt->userData,
2897 buf, nbchar);
2898 } else {
2899 if (ctxt->sax->characters != NULL)
2900 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2901 }
2902 }
2903 nbchar = 0;
2904 }
2905 count++;
2906 if (count > 50) {
2907 GROW;
2908 count = 0;
2909 }
2910 NEXTL(l);
2911 cur = CUR_CHAR(l);
2912 }
2913 if (nbchar != 0) {
2914 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002915 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002916 */
2917 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2918 if (areBlanks(ctxt, buf, nbchar)) {
2919 if (ctxt->sax->ignorableWhitespace != NULL)
2920 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2921 } else {
2922 if (ctxt->sax->characters != NULL)
2923 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2924 }
2925 }
2926 }
2927}
2928
2929/**
2930 * xmlParseExternalID:
2931 * @ctxt: an XML parser context
2932 * @publicID: a xmlChar** receiving PubidLiteral
2933 * @strict: indicate whether we should restrict parsing to only
2934 * production [75], see NOTE below
2935 *
2936 * Parse an External ID or a Public ID
2937 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002938 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002939 * 'PUBLIC' S PubidLiteral S SystemLiteral
2940 *
2941 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2942 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2943 *
2944 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2945 *
2946 * Returns the function returns SystemLiteral and in the second
2947 * case publicID receives PubidLiteral, is strict is off
2948 * it is possible to return NULL and have publicID set.
2949 */
2950
2951xmlChar *
2952xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2953 xmlChar *URI = NULL;
2954
2955 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002956
2957 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002958 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2959 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2960 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2961 SKIP(6);
2962 if (!IS_BLANK(CUR)) {
2963 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2965 ctxt->sax->error(ctxt->userData,
2966 "Space required after 'SYSTEM'\n");
2967 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002968 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002969 }
2970 SKIP_BLANKS;
2971 URI = xmlParseSystemLiteral(ctxt);
2972 if (URI == NULL) {
2973 ctxt->errNo = XML_ERR_URI_REQUIRED;
2974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2975 ctxt->sax->error(ctxt->userData,
2976 "xmlParseExternalID: SYSTEM, no URI\n");
2977 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002978 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002979 }
2980 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2981 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2982 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2983 SKIP(6);
2984 if (!IS_BLANK(CUR)) {
2985 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2987 ctxt->sax->error(ctxt->userData,
2988 "Space required after 'PUBLIC'\n");
2989 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002990 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002991 }
2992 SKIP_BLANKS;
2993 *publicID = xmlParsePubidLiteral(ctxt);
2994 if (*publicID == NULL) {
2995 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2997 ctxt->sax->error(ctxt->userData,
2998 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2999 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003000 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003001 }
3002 if (strict) {
3003 /*
3004 * We don't handle [83] so "S SystemLiteral" is required.
3005 */
3006 if (!IS_BLANK(CUR)) {
3007 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3009 ctxt->sax->error(ctxt->userData,
3010 "Space required after the Public Identifier\n");
3011 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003012 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003013 }
3014 } else {
3015 /*
3016 * We handle [83] so we return immediately, if
3017 * "S SystemLiteral" is not detected. From a purely parsing
3018 * point of view that's a nice mess.
3019 */
3020 const xmlChar *ptr;
3021 GROW;
3022
3023 ptr = CUR_PTR;
3024 if (!IS_BLANK(*ptr)) return(NULL);
3025
3026 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3027 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3028 }
3029 SKIP_BLANKS;
3030 URI = xmlParseSystemLiteral(ctxt);
3031 if (URI == NULL) {
3032 ctxt->errNo = XML_ERR_URI_REQUIRED;
3033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3034 ctxt->sax->error(ctxt->userData,
3035 "xmlParseExternalID: PUBLIC, no URI\n");
3036 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003037 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003038 }
3039 }
3040 return(URI);
3041}
3042
3043/**
3044 * xmlParseComment:
3045 * @ctxt: an XML parser context
3046 *
3047 * Skip an XML (SGML) comment <!-- .... -->
3048 * The spec says that "For compatibility, the string "--" (double-hyphen)
3049 * must not occur within comments. "
3050 *
3051 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3052 */
3053void
3054xmlParseComment(xmlParserCtxtPtr ctxt) {
3055 xmlChar *buf = NULL;
3056 int len;
3057 int size = XML_PARSER_BUFFER_SIZE;
3058 int q, ql;
3059 int r, rl;
3060 int cur, l;
3061 xmlParserInputState state;
3062 xmlParserInputPtr input = ctxt->input;
3063 int count = 0;
3064
3065 /*
3066 * Check that there is a comment right here.
3067 */
3068 if ((RAW != '<') || (NXT(1) != '!') ||
3069 (NXT(2) != '-') || (NXT(3) != '-')) return;
3070
3071 state = ctxt->instate;
3072 ctxt->instate = XML_PARSER_COMMENT;
3073 SHRINK;
3074 SKIP(4);
3075 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3076 if (buf == NULL) {
3077 xmlGenericError(xmlGenericErrorContext,
3078 "malloc of %d byte failed\n", size);
3079 ctxt->instate = state;
3080 return;
3081 }
3082 q = CUR_CHAR(ql);
3083 NEXTL(ql);
3084 r = CUR_CHAR(rl);
3085 NEXTL(rl);
3086 cur = CUR_CHAR(l);
3087 len = 0;
3088 while (IS_CHAR(cur) && /* checked */
3089 ((cur != '>') ||
3090 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003091 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003092 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3094 ctxt->sax->error(ctxt->userData,
3095 "Comment must not contain '--' (double-hyphen)`\n");
3096 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003097 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003098 }
3099 if (len + 5 >= size) {
3100 size *= 2;
3101 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3102 if (buf == NULL) {
3103 xmlGenericError(xmlGenericErrorContext,
3104 "realloc of %d byte failed\n", size);
3105 ctxt->instate = state;
3106 return;
3107 }
3108 }
3109 COPY_BUF(ql,buf,len,q);
3110 q = r;
3111 ql = rl;
3112 r = cur;
3113 rl = l;
3114
3115 count++;
3116 if (count > 50) {
3117 GROW;
3118 count = 0;
3119 }
3120 NEXTL(l);
3121 cur = CUR_CHAR(l);
3122 if (cur == 0) {
3123 SHRINK;
3124 GROW;
3125 cur = CUR_CHAR(l);
3126 }
3127 }
3128 buf[len] = 0;
3129 if (!IS_CHAR(cur)) {
3130 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3132 ctxt->sax->error(ctxt->userData,
3133 "Comment not terminated \n<!--%.50s\n", buf);
3134 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003135 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003136 xmlFree(buf);
3137 } else {
3138 if (input != ctxt->input) {
3139 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3141 ctxt->sax->error(ctxt->userData,
3142"Comment doesn't start and stop in the same entity\n");
3143 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003144 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003145 }
3146 NEXT;
3147 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3148 (!ctxt->disableSAX))
3149 ctxt->sax->comment(ctxt->userData, buf);
3150 xmlFree(buf);
3151 }
3152 ctxt->instate = state;
3153}
3154
3155/**
3156 * xmlParsePITarget:
3157 * @ctxt: an XML parser context
3158 *
3159 * parse the name of a PI
3160 *
3161 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3162 *
3163 * Returns the PITarget name or NULL
3164 */
3165
3166xmlChar *
3167xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3168 xmlChar *name;
3169
3170 name = xmlParseName(ctxt);
3171 if ((name != NULL) &&
3172 ((name[0] == 'x') || (name[0] == 'X')) &&
3173 ((name[1] == 'm') || (name[1] == 'M')) &&
3174 ((name[2] == 'l') || (name[2] == 'L'))) {
3175 int i;
3176 if ((name[0] == 'x') && (name[1] == 'm') &&
3177 (name[2] == 'l') && (name[3] == 0)) {
3178 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3179 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3180 ctxt->sax->error(ctxt->userData,
3181 "XML declaration allowed only at the start of the document\n");
3182 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003183 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003184 return(name);
3185 } else if (name[3] == 0) {
3186 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3188 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3189 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003190 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003191 return(name);
3192 }
3193 for (i = 0;;i++) {
3194 if (xmlW3CPIs[i] == NULL) break;
3195 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3196 return(name);
3197 }
3198 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3199 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3200 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003201 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003202 }
3203 }
3204 return(name);
3205}
3206
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003207#ifdef LIBXML_CATALOG_ENABLED
3208/**
3209 * xmlParseCatalogPI:
3210 * @ctxt: an XML parser context
3211 * @catalog: the PI value string
3212 *
3213 * parse an XML Catalog Processing Instruction.
3214 *
3215 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3216 *
3217 * Occurs only if allowed by the user and if happening in the Misc
3218 * part of the document before any doctype informations
3219 * This will add the given catalog to the parsing context in order
3220 * to be used if there is a resolution need further down in the document
3221 */
3222
3223static void
3224xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3225 xmlChar *URL = NULL;
3226 const xmlChar *tmp, *base;
3227 xmlChar marker;
3228
3229 tmp = catalog;
3230 while (IS_BLANK(*tmp)) tmp++;
3231 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3232 goto error;
3233 tmp += 7;
3234 while (IS_BLANK(*tmp)) tmp++;
3235 if (*tmp != '=') {
3236 return;
3237 }
3238 tmp++;
3239 while (IS_BLANK(*tmp)) tmp++;
3240 marker = *tmp;
3241 if ((marker != '\'') && (marker != '"'))
3242 goto error;
3243 tmp++;
3244 base = tmp;
3245 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3246 if (*tmp == 0)
3247 goto error;
3248 URL = xmlStrndup(base, tmp - base);
3249 tmp++;
3250 while (IS_BLANK(*tmp)) tmp++;
3251 if (*tmp != 0)
3252 goto error;
3253
3254 if (URL != NULL) {
3255 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3256 xmlFree(URL);
3257 }
3258 return;
3259
3260error:
3261 ctxt->errNo = XML_WAR_CATALOG_PI;
3262 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3263 ctxt->sax->warning(ctxt->userData,
3264 "Catalog PI syntax error: %s\n", catalog);
3265 if (URL != NULL)
3266 xmlFree(URL);
3267}
3268#endif
3269
Owen Taylor3473f882001-02-23 17:55:21 +00003270/**
3271 * xmlParsePI:
3272 * @ctxt: an XML parser context
3273 *
3274 * parse an XML Processing Instruction.
3275 *
3276 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3277 *
3278 * The processing is transfered to SAX once parsed.
3279 */
3280
3281void
3282xmlParsePI(xmlParserCtxtPtr ctxt) {
3283 xmlChar *buf = NULL;
3284 int len = 0;
3285 int size = XML_PARSER_BUFFER_SIZE;
3286 int cur, l;
3287 xmlChar *target;
3288 xmlParserInputState state;
3289 int count = 0;
3290
3291 if ((RAW == '<') && (NXT(1) == '?')) {
3292 xmlParserInputPtr input = ctxt->input;
3293 state = ctxt->instate;
3294 ctxt->instate = XML_PARSER_PI;
3295 /*
3296 * this is a Processing Instruction.
3297 */
3298 SKIP(2);
3299 SHRINK;
3300
3301 /*
3302 * Parse the target name and check for special support like
3303 * namespace.
3304 */
3305 target = xmlParsePITarget(ctxt);
3306 if (target != NULL) {
3307 if ((RAW == '?') && (NXT(1) == '>')) {
3308 if (input != ctxt->input) {
3309 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3311 ctxt->sax->error(ctxt->userData,
3312 "PI declaration doesn't start and stop in the same entity\n");
3313 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003314 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003315 }
3316 SKIP(2);
3317
3318 /*
3319 * SAX: PI detected.
3320 */
3321 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3322 (ctxt->sax->processingInstruction != NULL))
3323 ctxt->sax->processingInstruction(ctxt->userData,
3324 target, NULL);
3325 ctxt->instate = state;
3326 xmlFree(target);
3327 return;
3328 }
3329 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3330 if (buf == NULL) {
3331 xmlGenericError(xmlGenericErrorContext,
3332 "malloc of %d byte failed\n", size);
3333 ctxt->instate = state;
3334 return;
3335 }
3336 cur = CUR;
3337 if (!IS_BLANK(cur)) {
3338 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3340 ctxt->sax->error(ctxt->userData,
3341 "xmlParsePI: PI %s space expected\n", target);
3342 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003343 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003344 }
3345 SKIP_BLANKS;
3346 cur = CUR_CHAR(l);
3347 while (IS_CHAR(cur) && /* checked */
3348 ((cur != '?') || (NXT(1) != '>'))) {
3349 if (len + 5 >= size) {
3350 size *= 2;
3351 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3352 if (buf == NULL) {
3353 xmlGenericError(xmlGenericErrorContext,
3354 "realloc of %d byte failed\n", size);
3355 ctxt->instate = state;
3356 return;
3357 }
3358 }
3359 count++;
3360 if (count > 50) {
3361 GROW;
3362 count = 0;
3363 }
3364 COPY_BUF(l,buf,len,cur);
3365 NEXTL(l);
3366 cur = CUR_CHAR(l);
3367 if (cur == 0) {
3368 SHRINK;
3369 GROW;
3370 cur = CUR_CHAR(l);
3371 }
3372 }
3373 buf[len] = 0;
3374 if (cur != '?') {
3375 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3377 ctxt->sax->error(ctxt->userData,
3378 "xmlParsePI: PI %s never end ...\n", target);
3379 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003380 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003381 } else {
3382 if (input != ctxt->input) {
3383 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3385 ctxt->sax->error(ctxt->userData,
3386 "PI declaration doesn't start and stop in the same entity\n");
3387 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003388 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003389 }
3390 SKIP(2);
3391
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003392#ifdef LIBXML_CATALOG_ENABLED
3393 if (((state == XML_PARSER_MISC) ||
3394 (state == XML_PARSER_START)) &&
3395 (xmlStrEqual(target, XML_CATALOG_PI))) {
3396 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3397 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3398 (allow == XML_CATA_ALLOW_ALL))
3399 xmlParseCatalogPI(ctxt, buf);
3400 }
3401#endif
3402
3403
Owen Taylor3473f882001-02-23 17:55:21 +00003404 /*
3405 * SAX: PI detected.
3406 */
3407 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3408 (ctxt->sax->processingInstruction != NULL))
3409 ctxt->sax->processingInstruction(ctxt->userData,
3410 target, buf);
3411 }
3412 xmlFree(buf);
3413 xmlFree(target);
3414 } else {
3415 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3417 ctxt->sax->error(ctxt->userData,
3418 "xmlParsePI : no target name\n");
3419 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003420 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003421 }
3422 ctxt->instate = state;
3423 }
3424}
3425
3426/**
3427 * xmlParseNotationDecl:
3428 * @ctxt: an XML parser context
3429 *
3430 * parse a notation declaration
3431 *
3432 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3433 *
3434 * Hence there is actually 3 choices:
3435 * 'PUBLIC' S PubidLiteral
3436 * 'PUBLIC' S PubidLiteral S SystemLiteral
3437 * and 'SYSTEM' S SystemLiteral
3438 *
3439 * See the NOTE on xmlParseExternalID().
3440 */
3441
3442void
3443xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3444 xmlChar *name;
3445 xmlChar *Pubid;
3446 xmlChar *Systemid;
3447
3448 if ((RAW == '<') && (NXT(1) == '!') &&
3449 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3450 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3451 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3452 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3453 xmlParserInputPtr input = ctxt->input;
3454 SHRINK;
3455 SKIP(10);
3456 if (!IS_BLANK(CUR)) {
3457 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3459 ctxt->sax->error(ctxt->userData,
3460 "Space required after '<!NOTATION'\n");
3461 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003462 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003463 return;
3464 }
3465 SKIP_BLANKS;
3466
Daniel Veillard76d66f42001-05-16 21:05:17 +00003467 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003468 if (name == NULL) {
3469 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3471 ctxt->sax->error(ctxt->userData,
3472 "NOTATION: Name expected here\n");
3473 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003474 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003475 return;
3476 }
3477 if (!IS_BLANK(CUR)) {
3478 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3480 ctxt->sax->error(ctxt->userData,
3481 "Space required after the NOTATION name'\n");
3482 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003483 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003484 return;
3485 }
3486 SKIP_BLANKS;
3487
3488 /*
3489 * Parse the IDs.
3490 */
3491 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3492 SKIP_BLANKS;
3493
3494 if (RAW == '>') {
3495 if (input != ctxt->input) {
3496 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3498 ctxt->sax->error(ctxt->userData,
3499"Notation declaration doesn't start and stop in the same entity\n");
3500 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003501 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003502 }
3503 NEXT;
3504 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3505 (ctxt->sax->notationDecl != NULL))
3506 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3507 } else {
3508 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3510 ctxt->sax->error(ctxt->userData,
3511 "'>' required to close NOTATION declaration\n");
3512 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003513 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003514 }
3515 xmlFree(name);
3516 if (Systemid != NULL) xmlFree(Systemid);
3517 if (Pubid != NULL) xmlFree(Pubid);
3518 }
3519}
3520
3521/**
3522 * xmlParseEntityDecl:
3523 * @ctxt: an XML parser context
3524 *
3525 * parse <!ENTITY declarations
3526 *
3527 * [70] EntityDecl ::= GEDecl | PEDecl
3528 *
3529 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3530 *
3531 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3532 *
3533 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3534 *
3535 * [74] PEDef ::= EntityValue | ExternalID
3536 *
3537 * [76] NDataDecl ::= S 'NDATA' S Name
3538 *
3539 * [ VC: Notation Declared ]
3540 * The Name must match the declared name of a notation.
3541 */
3542
3543void
3544xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3545 xmlChar *name = NULL;
3546 xmlChar *value = NULL;
3547 xmlChar *URI = NULL, *literal = NULL;
3548 xmlChar *ndata = NULL;
3549 int isParameter = 0;
3550 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003551 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003552
3553 GROW;
3554 if ((RAW == '<') && (NXT(1) == '!') &&
3555 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3556 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3557 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3558 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003559 SHRINK;
3560 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003561 skipped = SKIP_BLANKS;
3562 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003563 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3565 ctxt->sax->error(ctxt->userData,
3566 "Space required after '<!ENTITY'\n");
3567 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003568 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003569 }
Owen Taylor3473f882001-02-23 17:55:21 +00003570
3571 if (RAW == '%') {
3572 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003573 skipped = SKIP_BLANKS;
3574 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003575 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3577 ctxt->sax->error(ctxt->userData,
3578 "Space required after '%'\n");
3579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003581 }
Owen Taylor3473f882001-02-23 17:55:21 +00003582 isParameter = 1;
3583 }
3584
Daniel Veillard76d66f42001-05-16 21:05:17 +00003585 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003586 if (name == NULL) {
3587 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3589 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3590 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003591 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003592 return;
3593 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003594 skipped = SKIP_BLANKS;
3595 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003596 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3598 ctxt->sax->error(ctxt->userData,
3599 "Space required after the entity name\n");
3600 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003601 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003602 }
Owen Taylor3473f882001-02-23 17:55:21 +00003603
Daniel Veillardf5582f12002-06-11 10:08:16 +00003604 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003605 /*
3606 * handle the various case of definitions...
3607 */
3608 if (isParameter) {
3609 if ((RAW == '"') || (RAW == '\'')) {
3610 value = xmlParseEntityValue(ctxt, &orig);
3611 if (value) {
3612 if ((ctxt->sax != NULL) &&
3613 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3614 ctxt->sax->entityDecl(ctxt->userData, name,
3615 XML_INTERNAL_PARAMETER_ENTITY,
3616 NULL, NULL, value);
3617 }
3618 } else {
3619 URI = xmlParseExternalID(ctxt, &literal, 1);
3620 if ((URI == NULL) && (literal == NULL)) {
3621 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3623 ctxt->sax->error(ctxt->userData,
3624 "Entity value required\n");
3625 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003626 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003627 }
3628 if (URI) {
3629 xmlURIPtr uri;
3630
3631 uri = xmlParseURI((const char *) URI);
3632 if (uri == NULL) {
3633 ctxt->errNo = XML_ERR_INVALID_URI;
3634 if ((ctxt->sax != NULL) &&
3635 (!ctxt->disableSAX) &&
3636 (ctxt->sax->error != NULL))
3637 ctxt->sax->error(ctxt->userData,
3638 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003639 /*
3640 * This really ought to be a well formedness error
3641 * but the XML Core WG decided otherwise c.f. issue
3642 * E26 of the XML erratas.
3643 */
Owen Taylor3473f882001-02-23 17:55:21 +00003644 } else {
3645 if (uri->fragment != NULL) {
3646 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3647 if ((ctxt->sax != NULL) &&
3648 (!ctxt->disableSAX) &&
3649 (ctxt->sax->error != NULL))
3650 ctxt->sax->error(ctxt->userData,
3651 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003652 /*
3653 * Okay this is foolish to block those but not
3654 * invalid URIs.
3655 */
Owen Taylor3473f882001-02-23 17:55:21 +00003656 ctxt->wellFormed = 0;
3657 } else {
3658 if ((ctxt->sax != NULL) &&
3659 (!ctxt->disableSAX) &&
3660 (ctxt->sax->entityDecl != NULL))
3661 ctxt->sax->entityDecl(ctxt->userData, name,
3662 XML_EXTERNAL_PARAMETER_ENTITY,
3663 literal, URI, NULL);
3664 }
3665 xmlFreeURI(uri);
3666 }
3667 }
3668 }
3669 } else {
3670 if ((RAW == '"') || (RAW == '\'')) {
3671 value = xmlParseEntityValue(ctxt, &orig);
3672 if ((ctxt->sax != NULL) &&
3673 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3674 ctxt->sax->entityDecl(ctxt->userData, name,
3675 XML_INTERNAL_GENERAL_ENTITY,
3676 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003677 /*
3678 * For expat compatibility in SAX mode.
3679 */
3680 if ((ctxt->myDoc == NULL) ||
3681 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3682 if (ctxt->myDoc == NULL) {
3683 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3684 }
3685 if (ctxt->myDoc->intSubset == NULL)
3686 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3687 BAD_CAST "fake", NULL, NULL);
3688
3689 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3690 NULL, NULL, value);
3691 }
Owen Taylor3473f882001-02-23 17:55:21 +00003692 } else {
3693 URI = xmlParseExternalID(ctxt, &literal, 1);
3694 if ((URI == NULL) && (literal == NULL)) {
3695 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3697 ctxt->sax->error(ctxt->userData,
3698 "Entity value required\n");
3699 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003700 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003701 }
3702 if (URI) {
3703 xmlURIPtr uri;
3704
3705 uri = xmlParseURI((const char *)URI);
3706 if (uri == NULL) {
3707 ctxt->errNo = XML_ERR_INVALID_URI;
3708 if ((ctxt->sax != NULL) &&
3709 (!ctxt->disableSAX) &&
3710 (ctxt->sax->error != NULL))
3711 ctxt->sax->error(ctxt->userData,
3712 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003713 /*
3714 * This really ought to be a well formedness error
3715 * but the XML Core WG decided otherwise c.f. issue
3716 * E26 of the XML erratas.
3717 */
Owen Taylor3473f882001-02-23 17:55:21 +00003718 } else {
3719 if (uri->fragment != NULL) {
3720 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3721 if ((ctxt->sax != NULL) &&
3722 (!ctxt->disableSAX) &&
3723 (ctxt->sax->error != NULL))
3724 ctxt->sax->error(ctxt->userData,
3725 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003726 /*
3727 * Okay this is foolish to block those but not
3728 * invalid URIs.
3729 */
Owen Taylor3473f882001-02-23 17:55:21 +00003730 ctxt->wellFormed = 0;
3731 }
3732 xmlFreeURI(uri);
3733 }
3734 }
3735 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3736 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3738 ctxt->sax->error(ctxt->userData,
3739 "Space required before 'NDATA'\n");
3740 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003741 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003742 }
3743 SKIP_BLANKS;
3744 if ((RAW == 'N') && (NXT(1) == 'D') &&
3745 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3746 (NXT(4) == 'A')) {
3747 SKIP(5);
3748 if (!IS_BLANK(CUR)) {
3749 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3751 ctxt->sax->error(ctxt->userData,
3752 "Space required after 'NDATA'\n");
3753 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003754 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003755 }
3756 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003757 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003758 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3759 (ctxt->sax->unparsedEntityDecl != NULL))
3760 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3761 literal, URI, ndata);
3762 } else {
3763 if ((ctxt->sax != NULL) &&
3764 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3765 ctxt->sax->entityDecl(ctxt->userData, name,
3766 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3767 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003768 /*
3769 * For expat compatibility in SAX mode.
3770 * assuming the entity repalcement was asked for
3771 */
3772 if ((ctxt->replaceEntities != 0) &&
3773 ((ctxt->myDoc == NULL) ||
3774 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3775 if (ctxt->myDoc == NULL) {
3776 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3777 }
3778
3779 if (ctxt->myDoc->intSubset == NULL)
3780 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3781 BAD_CAST "fake", NULL, NULL);
3782 entityDecl(ctxt, name,
3783 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3784 literal, URI, NULL);
3785 }
Owen Taylor3473f882001-02-23 17:55:21 +00003786 }
3787 }
3788 }
3789 SKIP_BLANKS;
3790 if (RAW != '>') {
3791 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3793 ctxt->sax->error(ctxt->userData,
3794 "xmlParseEntityDecl: entity %s not terminated\n", name);
3795 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003796 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003797 } else {
3798 if (input != ctxt->input) {
3799 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3801 ctxt->sax->error(ctxt->userData,
3802"Entity declaration doesn't start and stop in the same entity\n");
3803 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003804 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003805 }
3806 NEXT;
3807 }
3808 if (orig != NULL) {
3809 /*
3810 * Ugly mechanism to save the raw entity value.
3811 */
3812 xmlEntityPtr cur = NULL;
3813
3814 if (isParameter) {
3815 if ((ctxt->sax != NULL) &&
3816 (ctxt->sax->getParameterEntity != NULL))
3817 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3818 } else {
3819 if ((ctxt->sax != NULL) &&
3820 (ctxt->sax->getEntity != NULL))
3821 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003822 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3823 cur = getEntity(ctxt, name);
3824 }
Owen Taylor3473f882001-02-23 17:55:21 +00003825 }
3826 if (cur != NULL) {
3827 if (cur->orig != NULL)
3828 xmlFree(orig);
3829 else
3830 cur->orig = orig;
3831 } else
3832 xmlFree(orig);
3833 }
3834 if (name != NULL) xmlFree(name);
3835 if (value != NULL) xmlFree(value);
3836 if (URI != NULL) xmlFree(URI);
3837 if (literal != NULL) xmlFree(literal);
3838 if (ndata != NULL) xmlFree(ndata);
3839 }
3840}
3841
3842/**
3843 * xmlParseDefaultDecl:
3844 * @ctxt: an XML parser context
3845 * @value: Receive a possible fixed default value for the attribute
3846 *
3847 * Parse an attribute default declaration
3848 *
3849 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3850 *
3851 * [ VC: Required Attribute ]
3852 * if the default declaration is the keyword #REQUIRED, then the
3853 * attribute must be specified for all elements of the type in the
3854 * attribute-list declaration.
3855 *
3856 * [ VC: Attribute Default Legal ]
3857 * The declared default value must meet the lexical constraints of
3858 * the declared attribute type c.f. xmlValidateAttributeDecl()
3859 *
3860 * [ VC: Fixed Attribute Default ]
3861 * if an attribute has a default value declared with the #FIXED
3862 * keyword, instances of that attribute must match the default value.
3863 *
3864 * [ WFC: No < in Attribute Values ]
3865 * handled in xmlParseAttValue()
3866 *
3867 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3868 * or XML_ATTRIBUTE_FIXED.
3869 */
3870
3871int
3872xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3873 int val;
3874 xmlChar *ret;
3875
3876 *value = NULL;
3877 if ((RAW == '#') && (NXT(1) == 'R') &&
3878 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3879 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3880 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3881 (NXT(8) == 'D')) {
3882 SKIP(9);
3883 return(XML_ATTRIBUTE_REQUIRED);
3884 }
3885 if ((RAW == '#') && (NXT(1) == 'I') &&
3886 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3887 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3888 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3889 SKIP(8);
3890 return(XML_ATTRIBUTE_IMPLIED);
3891 }
3892 val = XML_ATTRIBUTE_NONE;
3893 if ((RAW == '#') && (NXT(1) == 'F') &&
3894 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3895 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3896 SKIP(6);
3897 val = XML_ATTRIBUTE_FIXED;
3898 if (!IS_BLANK(CUR)) {
3899 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3901 ctxt->sax->error(ctxt->userData,
3902 "Space required after '#FIXED'\n");
3903 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003904 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003905 }
3906 SKIP_BLANKS;
3907 }
3908 ret = xmlParseAttValue(ctxt);
3909 ctxt->instate = XML_PARSER_DTD;
3910 if (ret == NULL) {
3911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3912 ctxt->sax->error(ctxt->userData,
3913 "Attribute default value declaration error\n");
3914 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003915 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003916 } else
3917 *value = ret;
3918 return(val);
3919}
3920
3921/**
3922 * xmlParseNotationType:
3923 * @ctxt: an XML parser context
3924 *
3925 * parse an Notation attribute type.
3926 *
3927 * Note: the leading 'NOTATION' S part has already being parsed...
3928 *
3929 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3930 *
3931 * [ VC: Notation Attributes ]
3932 * Values of this type must match one of the notation names included
3933 * in the declaration; all notation names in the declaration must be declared.
3934 *
3935 * Returns: the notation attribute tree built while parsing
3936 */
3937
3938xmlEnumerationPtr
3939xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3940 xmlChar *name;
3941 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3942
3943 if (RAW != '(') {
3944 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3946 ctxt->sax->error(ctxt->userData,
3947 "'(' required to start 'NOTATION'\n");
3948 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003949 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003950 return(NULL);
3951 }
3952 SHRINK;
3953 do {
3954 NEXT;
3955 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003956 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003957 if (name == NULL) {
3958 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3960 ctxt->sax->error(ctxt->userData,
3961 "Name expected in NOTATION declaration\n");
3962 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003963 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003964 return(ret);
3965 }
3966 cur = xmlCreateEnumeration(name);
3967 xmlFree(name);
3968 if (cur == NULL) return(ret);
3969 if (last == NULL) ret = last = cur;
3970 else {
3971 last->next = cur;
3972 last = cur;
3973 }
3974 SKIP_BLANKS;
3975 } while (RAW == '|');
3976 if (RAW != ')') {
3977 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3979 ctxt->sax->error(ctxt->userData,
3980 "')' required to finish NOTATION declaration\n");
3981 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003982 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003983 if ((last != NULL) && (last != ret))
3984 xmlFreeEnumeration(last);
3985 return(ret);
3986 }
3987 NEXT;
3988 return(ret);
3989}
3990
3991/**
3992 * xmlParseEnumerationType:
3993 * @ctxt: an XML parser context
3994 *
3995 * parse an Enumeration attribute type.
3996 *
3997 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3998 *
3999 * [ VC: Enumeration ]
4000 * Values of this type must match one of the Nmtoken tokens in
4001 * the declaration
4002 *
4003 * Returns: the enumeration attribute tree built while parsing
4004 */
4005
4006xmlEnumerationPtr
4007xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4008 xmlChar *name;
4009 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4010
4011 if (RAW != '(') {
4012 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4014 ctxt->sax->error(ctxt->userData,
4015 "'(' required to start ATTLIST enumeration\n");
4016 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004017 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004018 return(NULL);
4019 }
4020 SHRINK;
4021 do {
4022 NEXT;
4023 SKIP_BLANKS;
4024 name = xmlParseNmtoken(ctxt);
4025 if (name == NULL) {
4026 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4028 ctxt->sax->error(ctxt->userData,
4029 "NmToken expected in ATTLIST enumeration\n");
4030 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004031 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004032 return(ret);
4033 }
4034 cur = xmlCreateEnumeration(name);
4035 xmlFree(name);
4036 if (cur == NULL) return(ret);
4037 if (last == NULL) ret = last = cur;
4038 else {
4039 last->next = cur;
4040 last = cur;
4041 }
4042 SKIP_BLANKS;
4043 } while (RAW == '|');
4044 if (RAW != ')') {
4045 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4047 ctxt->sax->error(ctxt->userData,
4048 "')' required to finish ATTLIST enumeration\n");
4049 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004050 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004051 return(ret);
4052 }
4053 NEXT;
4054 return(ret);
4055}
4056
4057/**
4058 * xmlParseEnumeratedType:
4059 * @ctxt: an XML parser context
4060 * @tree: the enumeration tree built while parsing
4061 *
4062 * parse an Enumerated attribute type.
4063 *
4064 * [57] EnumeratedType ::= NotationType | Enumeration
4065 *
4066 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4067 *
4068 *
4069 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4070 */
4071
4072int
4073xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4074 if ((RAW == 'N') && (NXT(1) == 'O') &&
4075 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4076 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4077 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4078 SKIP(8);
4079 if (!IS_BLANK(CUR)) {
4080 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4082 ctxt->sax->error(ctxt->userData,
4083 "Space required after 'NOTATION'\n");
4084 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004085 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004086 return(0);
4087 }
4088 SKIP_BLANKS;
4089 *tree = xmlParseNotationType(ctxt);
4090 if (*tree == NULL) return(0);
4091 return(XML_ATTRIBUTE_NOTATION);
4092 }
4093 *tree = xmlParseEnumerationType(ctxt);
4094 if (*tree == NULL) return(0);
4095 return(XML_ATTRIBUTE_ENUMERATION);
4096}
4097
4098/**
4099 * xmlParseAttributeType:
4100 * @ctxt: an XML parser context
4101 * @tree: the enumeration tree built while parsing
4102 *
4103 * parse the Attribute list def for an element
4104 *
4105 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4106 *
4107 * [55] StringType ::= 'CDATA'
4108 *
4109 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4110 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4111 *
4112 * Validity constraints for attribute values syntax are checked in
4113 * xmlValidateAttributeValue()
4114 *
4115 * [ VC: ID ]
4116 * Values of type ID must match the Name production. A name must not
4117 * appear more than once in an XML document as a value of this type;
4118 * i.e., ID values must uniquely identify the elements which bear them.
4119 *
4120 * [ VC: One ID per Element Type ]
4121 * No element type may have more than one ID attribute specified.
4122 *
4123 * [ VC: ID Attribute Default ]
4124 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4125 *
4126 * [ VC: IDREF ]
4127 * Values of type IDREF must match the Name production, and values
4128 * of type IDREFS must match Names; each IDREF Name must match the value
4129 * of an ID attribute on some element in the XML document; i.e. IDREF
4130 * values must match the value of some ID attribute.
4131 *
4132 * [ VC: Entity Name ]
4133 * Values of type ENTITY must match the Name production, values
4134 * of type ENTITIES must match Names; each Entity Name must match the
4135 * name of an unparsed entity declared in the DTD.
4136 *
4137 * [ VC: Name Token ]
4138 * Values of type NMTOKEN must match the Nmtoken production; values
4139 * of type NMTOKENS must match Nmtokens.
4140 *
4141 * Returns the attribute type
4142 */
4143int
4144xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4145 SHRINK;
4146 if ((RAW == 'C') && (NXT(1) == 'D') &&
4147 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4148 (NXT(4) == 'A')) {
4149 SKIP(5);
4150 return(XML_ATTRIBUTE_CDATA);
4151 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4152 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4153 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4154 SKIP(6);
4155 return(XML_ATTRIBUTE_IDREFS);
4156 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4157 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4158 (NXT(4) == 'F')) {
4159 SKIP(5);
4160 return(XML_ATTRIBUTE_IDREF);
4161 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4162 SKIP(2);
4163 return(XML_ATTRIBUTE_ID);
4164 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4165 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4166 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4167 SKIP(6);
4168 return(XML_ATTRIBUTE_ENTITY);
4169 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4170 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4171 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4172 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4173 SKIP(8);
4174 return(XML_ATTRIBUTE_ENTITIES);
4175 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4176 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4177 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4178 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4179 SKIP(8);
4180 return(XML_ATTRIBUTE_NMTOKENS);
4181 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4182 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4183 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4184 (NXT(6) == 'N')) {
4185 SKIP(7);
4186 return(XML_ATTRIBUTE_NMTOKEN);
4187 }
4188 return(xmlParseEnumeratedType(ctxt, tree));
4189}
4190
4191/**
4192 * xmlParseAttributeListDecl:
4193 * @ctxt: an XML parser context
4194 *
4195 * : parse the Attribute list def for an element
4196 *
4197 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4198 *
4199 * [53] AttDef ::= S Name S AttType S DefaultDecl
4200 *
4201 */
4202void
4203xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4204 xmlChar *elemName;
4205 xmlChar *attrName;
4206 xmlEnumerationPtr tree;
4207
4208 if ((RAW == '<') && (NXT(1) == '!') &&
4209 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4210 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4211 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4212 (NXT(8) == 'T')) {
4213 xmlParserInputPtr input = ctxt->input;
4214
4215 SKIP(9);
4216 if (!IS_BLANK(CUR)) {
4217 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4219 ctxt->sax->error(ctxt->userData,
4220 "Space required after '<!ATTLIST'\n");
4221 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004223 }
4224 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004225 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004226 if (elemName == NULL) {
4227 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4229 ctxt->sax->error(ctxt->userData,
4230 "ATTLIST: no name for Element\n");
4231 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004232 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004233 return;
4234 }
4235 SKIP_BLANKS;
4236 GROW;
4237 while (RAW != '>') {
4238 const xmlChar *check = CUR_PTR;
4239 int type;
4240 int def;
4241 xmlChar *defaultValue = NULL;
4242
4243 GROW;
4244 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004245 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004246 if (attrName == NULL) {
4247 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4249 ctxt->sax->error(ctxt->userData,
4250 "ATTLIST: no name for Attribute\n");
4251 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004252 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004253 break;
4254 }
4255 GROW;
4256 if (!IS_BLANK(CUR)) {
4257 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4258 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4259 ctxt->sax->error(ctxt->userData,
4260 "Space required after the attribute name\n");
4261 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004262 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004263 if (attrName != NULL)
4264 xmlFree(attrName);
4265 if (defaultValue != NULL)
4266 xmlFree(defaultValue);
4267 break;
4268 }
4269 SKIP_BLANKS;
4270
4271 type = xmlParseAttributeType(ctxt, &tree);
4272 if (type <= 0) {
4273 if (attrName != NULL)
4274 xmlFree(attrName);
4275 if (defaultValue != NULL)
4276 xmlFree(defaultValue);
4277 break;
4278 }
4279
4280 GROW;
4281 if (!IS_BLANK(CUR)) {
4282 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4284 ctxt->sax->error(ctxt->userData,
4285 "Space required after the attribute type\n");
4286 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004287 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004288 if (attrName != NULL)
4289 xmlFree(attrName);
4290 if (defaultValue != NULL)
4291 xmlFree(defaultValue);
4292 if (tree != NULL)
4293 xmlFreeEnumeration(tree);
4294 break;
4295 }
4296 SKIP_BLANKS;
4297
4298 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4299 if (def <= 0) {
4300 if (attrName != NULL)
4301 xmlFree(attrName);
4302 if (defaultValue != NULL)
4303 xmlFree(defaultValue);
4304 if (tree != NULL)
4305 xmlFreeEnumeration(tree);
4306 break;
4307 }
4308
4309 GROW;
4310 if (RAW != '>') {
4311 if (!IS_BLANK(CUR)) {
4312 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4313 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4314 ctxt->sax->error(ctxt->userData,
4315 "Space required after the attribute default value\n");
4316 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004317 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004318 if (attrName != NULL)
4319 xmlFree(attrName);
4320 if (defaultValue != NULL)
4321 xmlFree(defaultValue);
4322 if (tree != NULL)
4323 xmlFreeEnumeration(tree);
4324 break;
4325 }
4326 SKIP_BLANKS;
4327 }
4328 if (check == CUR_PTR) {
4329 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4331 ctxt->sax->error(ctxt->userData,
4332 "xmlParseAttributeListDecl: detected internal error\n");
4333 if (attrName != NULL)
4334 xmlFree(attrName);
4335 if (defaultValue != NULL)
4336 xmlFree(defaultValue);
4337 if (tree != NULL)
4338 xmlFreeEnumeration(tree);
4339 break;
4340 }
4341 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4342 (ctxt->sax->attributeDecl != NULL))
4343 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4344 type, def, defaultValue, tree);
4345 if (attrName != NULL)
4346 xmlFree(attrName);
4347 if (defaultValue != NULL)
4348 xmlFree(defaultValue);
4349 GROW;
4350 }
4351 if (RAW == '>') {
4352 if (input != ctxt->input) {
4353 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4355 ctxt->sax->error(ctxt->userData,
4356"Attribute list declaration doesn't start and stop in the same entity\n");
4357 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004358 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004359 }
4360 NEXT;
4361 }
4362
4363 xmlFree(elemName);
4364 }
4365}
4366
4367/**
4368 * xmlParseElementMixedContentDecl:
4369 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004370 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004371 *
4372 * parse the declaration for a Mixed Element content
4373 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4374 *
4375 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4376 * '(' S? '#PCDATA' S? ')'
4377 *
4378 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4379 *
4380 * [ VC: No Duplicate Types ]
4381 * The same name must not appear more than once in a single
4382 * mixed-content declaration.
4383 *
4384 * returns: the list of the xmlElementContentPtr describing the element choices
4385 */
4386xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004387xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004388 xmlElementContentPtr ret = NULL, cur = NULL, n;
4389 xmlChar *elem = NULL;
4390
4391 GROW;
4392 if ((RAW == '#') && (NXT(1) == 'P') &&
4393 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4394 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4395 (NXT(6) == 'A')) {
4396 SKIP(7);
4397 SKIP_BLANKS;
4398 SHRINK;
4399 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004400 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4401 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4402 if (ctxt->vctxt.error != NULL)
4403 ctxt->vctxt.error(ctxt->vctxt.userData,
4404"Element content declaration doesn't start and stop in the same entity\n");
4405 ctxt->valid = 0;
4406 }
Owen Taylor3473f882001-02-23 17:55:21 +00004407 NEXT;
4408 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4409 if (RAW == '*') {
4410 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4411 NEXT;
4412 }
4413 return(ret);
4414 }
4415 if ((RAW == '(') || (RAW == '|')) {
4416 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4417 if (ret == NULL) return(NULL);
4418 }
4419 while (RAW == '|') {
4420 NEXT;
4421 if (elem == NULL) {
4422 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4423 if (ret == NULL) return(NULL);
4424 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004425 if (cur != NULL)
4426 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004427 cur = ret;
4428 } else {
4429 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4430 if (n == NULL) return(NULL);
4431 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004432 if (n->c1 != NULL)
4433 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004434 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004435 if (n != NULL)
4436 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004437 cur = n;
4438 xmlFree(elem);
4439 }
4440 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004441 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004442 if (elem == NULL) {
4443 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4445 ctxt->sax->error(ctxt->userData,
4446 "xmlParseElementMixedContentDecl : Name expected\n");
4447 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004448 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004449 xmlFreeElementContent(cur);
4450 return(NULL);
4451 }
4452 SKIP_BLANKS;
4453 GROW;
4454 }
4455 if ((RAW == ')') && (NXT(1) == '*')) {
4456 if (elem != NULL) {
4457 cur->c2 = xmlNewElementContent(elem,
4458 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004459 if (cur->c2 != NULL)
4460 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004461 xmlFree(elem);
4462 }
4463 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004464 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4465 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4466 if (ctxt->vctxt.error != NULL)
4467 ctxt->vctxt.error(ctxt->vctxt.userData,
4468"Element content declaration doesn't start and stop in the same entity\n");
4469 ctxt->valid = 0;
4470 }
Owen Taylor3473f882001-02-23 17:55:21 +00004471 SKIP(2);
4472 } else {
4473 if (elem != NULL) xmlFree(elem);
4474 xmlFreeElementContent(ret);
4475 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4477 ctxt->sax->error(ctxt->userData,
4478 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4479 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004480 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004481 return(NULL);
4482 }
4483
4484 } else {
4485 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4487 ctxt->sax->error(ctxt->userData,
4488 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4489 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004490 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004491 }
4492 return(ret);
4493}
4494
4495/**
4496 * xmlParseElementChildrenContentDecl:
4497 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004498 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004499 *
4500 * parse the declaration for a Mixed Element content
4501 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4502 *
4503 *
4504 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4505 *
4506 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4507 *
4508 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4509 *
4510 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4511 *
4512 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4513 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004514 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004515 * opening or closing parentheses in a choice, seq, or Mixed
4516 * construct is contained in the replacement text for a parameter
4517 * entity, both must be contained in the same replacement text. For
4518 * interoperability, if a parameter-entity reference appears in a
4519 * choice, seq, or Mixed construct, its replacement text should not
4520 * be empty, and neither the first nor last non-blank character of
4521 * the replacement text should be a connector (| or ,).
4522 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004523 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004524 * hierarchy.
4525 */
4526xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004527xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004528(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004529 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4530 xmlChar *elem;
4531 xmlChar type = 0;
4532
4533 SKIP_BLANKS;
4534 GROW;
4535 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004536 xmlParserInputPtr input = ctxt->input;
4537
Owen Taylor3473f882001-02-23 17:55:21 +00004538 /* Recurse on first child */
4539 NEXT;
4540 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004541 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004542 SKIP_BLANKS;
4543 GROW;
4544 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004545 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004546 if (elem == NULL) {
4547 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4549 ctxt->sax->error(ctxt->userData,
4550 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004553 return(NULL);
4554 }
4555 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4556 GROW;
4557 if (RAW == '?') {
4558 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4559 NEXT;
4560 } else if (RAW == '*') {
4561 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4562 NEXT;
4563 } else if (RAW == '+') {
4564 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4565 NEXT;
4566 } else {
4567 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4568 }
4569 xmlFree(elem);
4570 GROW;
4571 }
4572 SKIP_BLANKS;
4573 SHRINK;
4574 while (RAW != ')') {
4575 /*
4576 * Each loop we parse one separator and one element.
4577 */
4578 if (RAW == ',') {
4579 if (type == 0) type = CUR;
4580
4581 /*
4582 * Detect "Name | Name , Name" error
4583 */
4584 else if (type != CUR) {
4585 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4587 ctxt->sax->error(ctxt->userData,
4588 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4589 type);
4590 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004591 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004592 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004593 xmlFreeElementContent(last);
4594 if (ret != NULL)
4595 xmlFreeElementContent(ret);
4596 return(NULL);
4597 }
4598 NEXT;
4599
4600 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4601 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004602 if ((last != NULL) && (last != ret))
4603 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004604 xmlFreeElementContent(ret);
4605 return(NULL);
4606 }
4607 if (last == NULL) {
4608 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004609 if (ret != NULL)
4610 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004611 ret = cur = op;
4612 } else {
4613 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004614 if (op != NULL)
4615 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004616 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004617 if (last != NULL)
4618 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004619 cur =op;
4620 last = NULL;
4621 }
4622 } else if (RAW == '|') {
4623 if (type == 0) type = CUR;
4624
4625 /*
4626 * Detect "Name , Name | Name" error
4627 */
4628 else if (type != CUR) {
4629 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4631 ctxt->sax->error(ctxt->userData,
4632 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4633 type);
4634 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004635 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004636 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004637 xmlFreeElementContent(last);
4638 if (ret != NULL)
4639 xmlFreeElementContent(ret);
4640 return(NULL);
4641 }
4642 NEXT;
4643
4644 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4645 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004646 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004647 xmlFreeElementContent(last);
4648 if (ret != NULL)
4649 xmlFreeElementContent(ret);
4650 return(NULL);
4651 }
4652 if (last == NULL) {
4653 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004654 if (ret != NULL)
4655 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004656 ret = cur = op;
4657 } else {
4658 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004659 if (op != NULL)
4660 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004661 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004662 if (last != NULL)
4663 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004664 cur =op;
4665 last = NULL;
4666 }
4667 } else {
4668 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4670 ctxt->sax->error(ctxt->userData,
4671 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4672 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004673 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004674 if (ret != NULL)
4675 xmlFreeElementContent(ret);
4676 return(NULL);
4677 }
4678 GROW;
4679 SKIP_BLANKS;
4680 GROW;
4681 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004682 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004683 /* Recurse on second child */
4684 NEXT;
4685 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004686 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004687 SKIP_BLANKS;
4688 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004689 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004690 if (elem == NULL) {
4691 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4693 ctxt->sax->error(ctxt->userData,
4694 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4695 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004696 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004697 if (ret != NULL)
4698 xmlFreeElementContent(ret);
4699 return(NULL);
4700 }
4701 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4702 xmlFree(elem);
4703 if (RAW == '?') {
4704 last->ocur = XML_ELEMENT_CONTENT_OPT;
4705 NEXT;
4706 } else if (RAW == '*') {
4707 last->ocur = XML_ELEMENT_CONTENT_MULT;
4708 NEXT;
4709 } else if (RAW == '+') {
4710 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4711 NEXT;
4712 } else {
4713 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4714 }
4715 }
4716 SKIP_BLANKS;
4717 GROW;
4718 }
4719 if ((cur != NULL) && (last != NULL)) {
4720 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004721 if (last != NULL)
4722 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004723 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004724 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4725 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4726 if (ctxt->vctxt.error != NULL)
4727 ctxt->vctxt.error(ctxt->vctxt.userData,
4728"Element content declaration doesn't start and stop in the same entity\n");
4729 ctxt->valid = 0;
4730 }
Owen Taylor3473f882001-02-23 17:55:21 +00004731 NEXT;
4732 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004733 if (ret != NULL)
4734 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004735 NEXT;
4736 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004737 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004738 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004739 cur = ret;
4740 /*
4741 * Some normalization:
4742 * (a | b* | c?)* == (a | b | c)*
4743 */
4744 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4745 if ((cur->c1 != NULL) &&
4746 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4747 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4748 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4749 if ((cur->c2 != NULL) &&
4750 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4751 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4752 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4753 cur = cur->c2;
4754 }
4755 }
Owen Taylor3473f882001-02-23 17:55:21 +00004756 NEXT;
4757 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004758 if (ret != NULL) {
4759 int found = 0;
4760
Daniel Veillarde470df72001-04-18 21:41:07 +00004761 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004762 /*
4763 * Some normalization:
4764 * (a | b*)+ == (a | b)*
4765 * (a | b?)+ == (a | b)*
4766 */
4767 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4768 if ((cur->c1 != NULL) &&
4769 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4770 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4771 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4772 found = 1;
4773 }
4774 if ((cur->c2 != NULL) &&
4775 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4776 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4777 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4778 found = 1;
4779 }
4780 cur = cur->c2;
4781 }
4782 if (found)
4783 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4784 }
Owen Taylor3473f882001-02-23 17:55:21 +00004785 NEXT;
4786 }
4787 return(ret);
4788}
4789
4790/**
4791 * xmlParseElementContentDecl:
4792 * @ctxt: an XML parser context
4793 * @name: the name of the element being defined.
4794 * @result: the Element Content pointer will be stored here if any
4795 *
4796 * parse the declaration for an Element content either Mixed or Children,
4797 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4798 *
4799 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4800 *
4801 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4802 */
4803
4804int
4805xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4806 xmlElementContentPtr *result) {
4807
4808 xmlElementContentPtr tree = NULL;
4809 xmlParserInputPtr input = ctxt->input;
4810 int res;
4811
4812 *result = NULL;
4813
4814 if (RAW != '(') {
4815 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4817 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004818 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004819 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004820 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004821 return(-1);
4822 }
4823 NEXT;
4824 GROW;
4825 SKIP_BLANKS;
4826 if ((RAW == '#') && (NXT(1) == 'P') &&
4827 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4828 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4829 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004830 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004831 res = XML_ELEMENT_TYPE_MIXED;
4832 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004833 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004834 res = XML_ELEMENT_TYPE_ELEMENT;
4835 }
Owen Taylor3473f882001-02-23 17:55:21 +00004836 SKIP_BLANKS;
4837 *result = tree;
4838 return(res);
4839}
4840
4841/**
4842 * xmlParseElementDecl:
4843 * @ctxt: an XML parser context
4844 *
4845 * parse an Element declaration.
4846 *
4847 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4848 *
4849 * [ VC: Unique Element Type Declaration ]
4850 * No element type may be declared more than once
4851 *
4852 * Returns the type of the element, or -1 in case of error
4853 */
4854int
4855xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4856 xmlChar *name;
4857 int ret = -1;
4858 xmlElementContentPtr content = NULL;
4859
4860 GROW;
4861 if ((RAW == '<') && (NXT(1) == '!') &&
4862 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4863 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4864 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4865 (NXT(8) == 'T')) {
4866 xmlParserInputPtr input = ctxt->input;
4867
4868 SKIP(9);
4869 if (!IS_BLANK(CUR)) {
4870 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4872 ctxt->sax->error(ctxt->userData,
4873 "Space required after 'ELEMENT'\n");
4874 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004875 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004876 }
4877 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004878 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004879 if (name == NULL) {
4880 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4882 ctxt->sax->error(ctxt->userData,
4883 "xmlParseElementDecl: no name for Element\n");
4884 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004885 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004886 return(-1);
4887 }
4888 while ((RAW == 0) && (ctxt->inputNr > 1))
4889 xmlPopInput(ctxt);
4890 if (!IS_BLANK(CUR)) {
4891 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4893 ctxt->sax->error(ctxt->userData,
4894 "Space required after the element name\n");
4895 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004896 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004897 }
4898 SKIP_BLANKS;
4899 if ((RAW == 'E') && (NXT(1) == 'M') &&
4900 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4901 (NXT(4) == 'Y')) {
4902 SKIP(5);
4903 /*
4904 * Element must always be empty.
4905 */
4906 ret = XML_ELEMENT_TYPE_EMPTY;
4907 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4908 (NXT(2) == 'Y')) {
4909 SKIP(3);
4910 /*
4911 * Element is a generic container.
4912 */
4913 ret = XML_ELEMENT_TYPE_ANY;
4914 } else if (RAW == '(') {
4915 ret = xmlParseElementContentDecl(ctxt, name, &content);
4916 } else {
4917 /*
4918 * [ WFC: PEs in Internal Subset ] error handling.
4919 */
4920 if ((RAW == '%') && (ctxt->external == 0) &&
4921 (ctxt->inputNr == 1)) {
4922 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4924 ctxt->sax->error(ctxt->userData,
4925 "PEReference: forbidden within markup decl in internal subset\n");
4926 } else {
4927 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4929 ctxt->sax->error(ctxt->userData,
4930 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4931 }
4932 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004933 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004934 if (name != NULL) xmlFree(name);
4935 return(-1);
4936 }
4937
4938 SKIP_BLANKS;
4939 /*
4940 * Pop-up of finished entities.
4941 */
4942 while ((RAW == 0) && (ctxt->inputNr > 1))
4943 xmlPopInput(ctxt);
4944 SKIP_BLANKS;
4945
4946 if (RAW != '>') {
4947 ctxt->errNo = XML_ERR_GT_REQUIRED;
4948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4949 ctxt->sax->error(ctxt->userData,
4950 "xmlParseElementDecl: expected '>' at the end\n");
4951 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004952 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004953 } else {
4954 if (input != ctxt->input) {
4955 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4957 ctxt->sax->error(ctxt->userData,
4958"Element declaration doesn't start and stop in the same entity\n");
4959 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004960 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004961 }
4962
4963 NEXT;
4964 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4965 (ctxt->sax->elementDecl != NULL))
4966 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4967 content);
4968 }
4969 if (content != NULL) {
4970 xmlFreeElementContent(content);
4971 }
4972 if (name != NULL) {
4973 xmlFree(name);
4974 }
4975 }
4976 return(ret);
4977}
4978
4979/**
Owen Taylor3473f882001-02-23 17:55:21 +00004980 * xmlParseConditionalSections
4981 * @ctxt: an XML parser context
4982 *
4983 * [61] conditionalSect ::= includeSect | ignoreSect
4984 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4985 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4986 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4987 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4988 */
4989
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004990static void
Owen Taylor3473f882001-02-23 17:55:21 +00004991xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4992 SKIP(3);
4993 SKIP_BLANKS;
4994 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4995 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4996 (NXT(6) == 'E')) {
4997 SKIP(7);
4998 SKIP_BLANKS;
4999 if (RAW != '[') {
5000 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5001 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5002 ctxt->sax->error(ctxt->userData,
5003 "XML conditional section '[' expected\n");
5004 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005005 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005006 } else {
5007 NEXT;
5008 }
5009 if (xmlParserDebugEntities) {
5010 if ((ctxt->input != NULL) && (ctxt->input->filename))
5011 xmlGenericError(xmlGenericErrorContext,
5012 "%s(%d): ", ctxt->input->filename,
5013 ctxt->input->line);
5014 xmlGenericError(xmlGenericErrorContext,
5015 "Entering INCLUDE Conditional Section\n");
5016 }
5017
5018 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5019 (NXT(2) != '>'))) {
5020 const xmlChar *check = CUR_PTR;
5021 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005022
5023 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5024 xmlParseConditionalSections(ctxt);
5025 } else if (IS_BLANK(CUR)) {
5026 NEXT;
5027 } else if (RAW == '%') {
5028 xmlParsePEReference(ctxt);
5029 } else
5030 xmlParseMarkupDecl(ctxt);
5031
5032 /*
5033 * Pop-up of finished entities.
5034 */
5035 while ((RAW == 0) && (ctxt->inputNr > 1))
5036 xmlPopInput(ctxt);
5037
Daniel Veillardfdc91562002-07-01 21:52:03 +00005038 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005039 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5041 ctxt->sax->error(ctxt->userData,
5042 "Content error in the external subset\n");
5043 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005044 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005045 break;
5046 }
5047 }
5048 if (xmlParserDebugEntities) {
5049 if ((ctxt->input != NULL) && (ctxt->input->filename))
5050 xmlGenericError(xmlGenericErrorContext,
5051 "%s(%d): ", ctxt->input->filename,
5052 ctxt->input->line);
5053 xmlGenericError(xmlGenericErrorContext,
5054 "Leaving INCLUDE Conditional Section\n");
5055 }
5056
5057 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5058 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5059 int state;
5060 int instate;
5061 int depth = 0;
5062
5063 SKIP(6);
5064 SKIP_BLANKS;
5065 if (RAW != '[') {
5066 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5067 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5068 ctxt->sax->error(ctxt->userData,
5069 "XML conditional section '[' expected\n");
5070 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005071 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005072 } else {
5073 NEXT;
5074 }
5075 if (xmlParserDebugEntities) {
5076 if ((ctxt->input != NULL) && (ctxt->input->filename))
5077 xmlGenericError(xmlGenericErrorContext,
5078 "%s(%d): ", ctxt->input->filename,
5079 ctxt->input->line);
5080 xmlGenericError(xmlGenericErrorContext,
5081 "Entering IGNORE Conditional Section\n");
5082 }
5083
5084 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005085 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005086 * But disable SAX event generating DTD building in the meantime
5087 */
5088 state = ctxt->disableSAX;
5089 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005090 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005091 ctxt->instate = XML_PARSER_IGNORE;
5092
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005093 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005094 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5095 depth++;
5096 SKIP(3);
5097 continue;
5098 }
5099 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5100 if (--depth >= 0) SKIP(3);
5101 continue;
5102 }
5103 NEXT;
5104 continue;
5105 }
5106
5107 ctxt->disableSAX = state;
5108 ctxt->instate = instate;
5109
5110 if (xmlParserDebugEntities) {
5111 if ((ctxt->input != NULL) && (ctxt->input->filename))
5112 xmlGenericError(xmlGenericErrorContext,
5113 "%s(%d): ", ctxt->input->filename,
5114 ctxt->input->line);
5115 xmlGenericError(xmlGenericErrorContext,
5116 "Leaving IGNORE Conditional Section\n");
5117 }
5118
5119 } else {
5120 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5122 ctxt->sax->error(ctxt->userData,
5123 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5124 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005125 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005126 }
5127
5128 if (RAW == 0)
5129 SHRINK;
5130
5131 if (RAW == 0) {
5132 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5133 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5134 ctxt->sax->error(ctxt->userData,
5135 "XML conditional section not closed\n");
5136 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005137 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005138 } else {
5139 SKIP(3);
5140 }
5141}
5142
5143/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005144 * xmlParseMarkupDecl:
5145 * @ctxt: an XML parser context
5146 *
5147 * parse Markup declarations
5148 *
5149 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5150 * NotationDecl | PI | Comment
5151 *
5152 * [ VC: Proper Declaration/PE Nesting ]
5153 * Parameter-entity replacement text must be properly nested with
5154 * markup declarations. That is to say, if either the first character
5155 * or the last character of a markup declaration (markupdecl above) is
5156 * contained in the replacement text for a parameter-entity reference,
5157 * both must be contained in the same replacement text.
5158 *
5159 * [ WFC: PEs in Internal Subset ]
5160 * In the internal DTD subset, parameter-entity references can occur
5161 * only where markup declarations can occur, not within markup declarations.
5162 * (This does not apply to references that occur in external parameter
5163 * entities or to the external subset.)
5164 */
5165void
5166xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5167 GROW;
5168 xmlParseElementDecl(ctxt);
5169 xmlParseAttributeListDecl(ctxt);
5170 xmlParseEntityDecl(ctxt);
5171 xmlParseNotationDecl(ctxt);
5172 xmlParsePI(ctxt);
5173 xmlParseComment(ctxt);
5174 /*
5175 * This is only for internal subset. On external entities,
5176 * the replacement is done before parsing stage
5177 */
5178 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5179 xmlParsePEReference(ctxt);
5180
5181 /*
5182 * Conditional sections are allowed from entities included
5183 * by PE References in the internal subset.
5184 */
5185 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5186 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5187 xmlParseConditionalSections(ctxt);
5188 }
5189 }
5190
5191 ctxt->instate = XML_PARSER_DTD;
5192}
5193
5194/**
5195 * xmlParseTextDecl:
5196 * @ctxt: an XML parser context
5197 *
5198 * parse an XML declaration header for external entities
5199 *
5200 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5201 *
5202 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5203 */
5204
5205void
5206xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5207 xmlChar *version;
5208
5209 /*
5210 * We know that '<?xml' is here.
5211 */
5212 if ((RAW == '<') && (NXT(1) == '?') &&
5213 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5214 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5215 SKIP(5);
5216 } else {
5217 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5219 ctxt->sax->error(ctxt->userData,
5220 "Text declaration '<?xml' required\n");
5221 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005223
5224 return;
5225 }
5226
5227 if (!IS_BLANK(CUR)) {
5228 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5230 ctxt->sax->error(ctxt->userData,
5231 "Space needed after '<?xml'\n");
5232 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005233 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005234 }
5235 SKIP_BLANKS;
5236
5237 /*
5238 * We may have the VersionInfo here.
5239 */
5240 version = xmlParseVersionInfo(ctxt);
5241 if (version == NULL)
5242 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005243 else {
5244 if (!IS_BLANK(CUR)) {
5245 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5247 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5248 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005249 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005250 }
5251 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005252 ctxt->input->version = version;
5253
5254 /*
5255 * We must have the encoding declaration
5256 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005257 xmlParseEncodingDecl(ctxt);
5258 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5259 /*
5260 * The XML REC instructs us to stop parsing right here
5261 */
5262 return;
5263 }
5264
5265 SKIP_BLANKS;
5266 if ((RAW == '?') && (NXT(1) == '>')) {
5267 SKIP(2);
5268 } else if (RAW == '>') {
5269 /* Deprecated old WD ... */
5270 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5272 ctxt->sax->error(ctxt->userData,
5273 "XML declaration must end-up with '?>'\n");
5274 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005275 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005276 NEXT;
5277 } else {
5278 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5280 ctxt->sax->error(ctxt->userData,
5281 "parsing XML declaration: '?>' expected\n");
5282 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005283 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005284 MOVETO_ENDTAG(CUR_PTR);
5285 NEXT;
5286 }
5287}
5288
5289/**
Owen Taylor3473f882001-02-23 17:55:21 +00005290 * xmlParseExternalSubset:
5291 * @ctxt: an XML parser context
5292 * @ExternalID: the external identifier
5293 * @SystemID: the system identifier (or URL)
5294 *
5295 * parse Markup declarations from an external subset
5296 *
5297 * [30] extSubset ::= textDecl? extSubsetDecl
5298 *
5299 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5300 */
5301void
5302xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5303 const xmlChar *SystemID) {
5304 GROW;
5305 if ((RAW == '<') && (NXT(1) == '?') &&
5306 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5307 (NXT(4) == 'l')) {
5308 xmlParseTextDecl(ctxt);
5309 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5310 /*
5311 * The XML REC instructs us to stop parsing right here
5312 */
5313 ctxt->instate = XML_PARSER_EOF;
5314 return;
5315 }
5316 }
5317 if (ctxt->myDoc == NULL) {
5318 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5319 }
5320 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5321 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5322
5323 ctxt->instate = XML_PARSER_DTD;
5324 ctxt->external = 1;
5325 while (((RAW == '<') && (NXT(1) == '?')) ||
5326 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005327 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005328 const xmlChar *check = CUR_PTR;
5329 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005330
5331 GROW;
5332 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5333 xmlParseConditionalSections(ctxt);
5334 } else if (IS_BLANK(CUR)) {
5335 NEXT;
5336 } else if (RAW == '%') {
5337 xmlParsePEReference(ctxt);
5338 } else
5339 xmlParseMarkupDecl(ctxt);
5340
5341 /*
5342 * Pop-up of finished entities.
5343 */
5344 while ((RAW == 0) && (ctxt->inputNr > 1))
5345 xmlPopInput(ctxt);
5346
Daniel Veillardfdc91562002-07-01 21:52:03 +00005347 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005348 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5350 ctxt->sax->error(ctxt->userData,
5351 "Content error in the external subset\n");
5352 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005353 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005354 break;
5355 }
5356 }
5357
5358 if (RAW != 0) {
5359 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5361 ctxt->sax->error(ctxt->userData,
5362 "Extra content at the end of the document\n");
5363 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005364 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005365 }
5366
5367}
5368
5369/**
5370 * xmlParseReference:
5371 * @ctxt: an XML parser context
5372 *
5373 * parse and handle entity references in content, depending on the SAX
5374 * interface, this may end-up in a call to character() if this is a
5375 * CharRef, a predefined entity, if there is no reference() callback.
5376 * or if the parser was asked to switch to that mode.
5377 *
5378 * [67] Reference ::= EntityRef | CharRef
5379 */
5380void
5381xmlParseReference(xmlParserCtxtPtr ctxt) {
5382 xmlEntityPtr ent;
5383 xmlChar *val;
5384 if (RAW != '&') return;
5385
5386 if (NXT(1) == '#') {
5387 int i = 0;
5388 xmlChar out[10];
5389 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005390 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005391
5392 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5393 /*
5394 * So we are using non-UTF-8 buffers
5395 * Check that the char fit on 8bits, if not
5396 * generate a CharRef.
5397 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005398 if (value <= 0xFF) {
5399 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005400 out[1] = 0;
5401 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5402 (!ctxt->disableSAX))
5403 ctxt->sax->characters(ctxt->userData, out, 1);
5404 } else {
5405 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005406 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005407 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005408 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005409 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5410 (!ctxt->disableSAX))
5411 ctxt->sax->reference(ctxt->userData, out);
5412 }
5413 } else {
5414 /*
5415 * Just encode the value in UTF-8
5416 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005417 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005418 out[i] = 0;
5419 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5420 (!ctxt->disableSAX))
5421 ctxt->sax->characters(ctxt->userData, out, i);
5422 }
5423 } else {
5424 ent = xmlParseEntityRef(ctxt);
5425 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005426 if (!ctxt->wellFormed)
5427 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005428 if ((ent->name != NULL) &&
5429 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5430 xmlNodePtr list = NULL;
5431 int ret;
5432
5433
5434 /*
5435 * The first reference to the entity trigger a parsing phase
5436 * where the ent->children is filled with the result from
5437 * the parsing.
5438 */
5439 if (ent->children == NULL) {
5440 xmlChar *value;
5441 value = ent->content;
5442
5443 /*
5444 * Check that this entity is well formed
5445 */
5446 if ((value != NULL) &&
5447 (value[1] == 0) && (value[0] == '<') &&
5448 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5449 /*
5450 * DONE: get definite answer on this !!!
5451 * Lots of entity decls are used to declare a single
5452 * char
5453 * <!ENTITY lt "<">
5454 * Which seems to be valid since
5455 * 2.4: The ampersand character (&) and the left angle
5456 * bracket (<) may appear in their literal form only
5457 * when used ... They are also legal within the literal
5458 * entity value of an internal entity declaration;i
5459 * see "4.3.2 Well-Formed Parsed Entities".
5460 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5461 * Looking at the OASIS test suite and James Clark
5462 * tests, this is broken. However the XML REC uses
5463 * it. Is the XML REC not well-formed ????
5464 * This is a hack to avoid this problem
5465 *
5466 * ANSWER: since lt gt amp .. are already defined,
5467 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005468 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005469 * is lousy but acceptable.
5470 */
5471 list = xmlNewDocText(ctxt->myDoc, value);
5472 if (list != NULL) {
5473 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5474 (ent->children == NULL)) {
5475 ent->children = list;
5476 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005477 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005478 list->parent = (xmlNodePtr) ent;
5479 } else {
5480 xmlFreeNodeList(list);
5481 }
5482 } else if (list != NULL) {
5483 xmlFreeNodeList(list);
5484 }
5485 } else {
5486 /*
5487 * 4.3.2: An internal general parsed entity is well-formed
5488 * if its replacement text matches the production labeled
5489 * content.
5490 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005491
5492 void *user_data;
5493 /*
5494 * This is a bit hackish but this seems the best
5495 * way to make sure both SAX and DOM entity support
5496 * behaves okay.
5497 */
5498 if (ctxt->userData == ctxt)
5499 user_data = NULL;
5500 else
5501 user_data = ctxt->userData;
5502
Owen Taylor3473f882001-02-23 17:55:21 +00005503 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5504 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005505 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5506 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005507 ctxt->depth--;
5508 } else if (ent->etype ==
5509 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5510 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005511 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005512 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005513 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005514 ctxt->depth--;
5515 } else {
5516 ret = -1;
5517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5518 ctxt->sax->error(ctxt->userData,
5519 "Internal: invalid entity type\n");
5520 }
5521 if (ret == XML_ERR_ENTITY_LOOP) {
5522 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5524 ctxt->sax->error(ctxt->userData,
5525 "Detected entity reference loop\n");
5526 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005527 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005528 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005529 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005530 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5531 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005532 (ent->children == NULL)) {
5533 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005534 if (ctxt->replaceEntities) {
5535 /*
5536 * Prune it directly in the generated document
5537 * except for single text nodes.
5538 */
5539 if ((list->type == XML_TEXT_NODE) &&
5540 (list->next == NULL)) {
5541 list->parent = (xmlNodePtr) ent;
5542 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005543 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005544 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005545 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005546 while (list != NULL) {
5547 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005548 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005549 if (list->next == NULL)
5550 ent->last = list;
5551 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005552 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005553 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005554 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5555 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005556 }
5557 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005558 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005559 while (list != NULL) {
5560 list->parent = (xmlNodePtr) ent;
5561 if (list->next == NULL)
5562 ent->last = list;
5563 list = list->next;
5564 }
Owen Taylor3473f882001-02-23 17:55:21 +00005565 }
5566 } else {
5567 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005568 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005569 }
5570 } else if (ret > 0) {
5571 ctxt->errNo = ret;
5572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5573 ctxt->sax->error(ctxt->userData,
5574 "Entity value required\n");
5575 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005576 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005577 } else if (list != NULL) {
5578 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005579 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005580 }
5581 }
5582 }
5583 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5584 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5585 /*
5586 * Create a node.
5587 */
5588 ctxt->sax->reference(ctxt->userData, ent->name);
5589 return;
5590 } else if (ctxt->replaceEntities) {
5591 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5592 /*
5593 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005594 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005595 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005596 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005597 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005598 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005599 cur = ent->children;
5600 while (cur != NULL) {
5601 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005602 if (firstChild == NULL){
5603 firstChild = new;
5604 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005605 xmlAddChild(ctxt->node, new);
5606 if (cur == ent->last)
5607 break;
5608 cur = cur->next;
5609 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005610 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5611 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005612 } else {
5613 /*
5614 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005615 * node with a possible previous text one which
5616 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005617 */
5618 if (ent->children->type == XML_TEXT_NODE)
5619 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5620 if ((ent->last != ent->children) &&
5621 (ent->last->type == XML_TEXT_NODE))
5622 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5623 xmlAddChildList(ctxt->node, ent->children);
5624 }
5625
Owen Taylor3473f882001-02-23 17:55:21 +00005626 /*
5627 * This is to avoid a nasty side effect, see
5628 * characters() in SAX.c
5629 */
5630 ctxt->nodemem = 0;
5631 ctxt->nodelen = 0;
5632 return;
5633 } else {
5634 /*
5635 * Probably running in SAX mode
5636 */
5637 xmlParserInputPtr input;
5638
5639 input = xmlNewEntityInputStream(ctxt, ent);
5640 xmlPushInput(ctxt, input);
5641 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5642 (RAW == '<') && (NXT(1) == '?') &&
5643 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5644 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5645 xmlParseTextDecl(ctxt);
5646 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5647 /*
5648 * The XML REC instructs us to stop parsing right here
5649 */
5650 ctxt->instate = XML_PARSER_EOF;
5651 return;
5652 }
5653 if (input->standalone == 1) {
5654 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5656 ctxt->sax->error(ctxt->userData,
5657 "external parsed entities cannot be standalone\n");
5658 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005659 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005660 }
5661 }
5662 return;
5663 }
5664 }
5665 } else {
5666 val = ent->content;
5667 if (val == NULL) return;
5668 /*
5669 * inline the entity.
5670 */
5671 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5672 (!ctxt->disableSAX))
5673 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5674 }
5675 }
5676}
5677
5678/**
5679 * xmlParseEntityRef:
5680 * @ctxt: an XML parser context
5681 *
5682 * parse ENTITY references declarations
5683 *
5684 * [68] EntityRef ::= '&' Name ';'
5685 *
5686 * [ WFC: Entity Declared ]
5687 * In a document without any DTD, a document with only an internal DTD
5688 * subset which contains no parameter entity references, or a document
5689 * with "standalone='yes'", the Name given in the entity reference
5690 * must match that in an entity declaration, except that well-formed
5691 * documents need not declare any of the following entities: amp, lt,
5692 * gt, apos, quot. The declaration of a parameter entity must precede
5693 * any reference to it. Similarly, the declaration of a general entity
5694 * must precede any reference to it which appears in a default value in an
5695 * attribute-list declaration. Note that if entities are declared in the
5696 * external subset or in external parameter entities, a non-validating
5697 * processor is not obligated to read and process their declarations;
5698 * for such documents, the rule that an entity must be declared is a
5699 * well-formedness constraint only if standalone='yes'.
5700 *
5701 * [ WFC: Parsed Entity ]
5702 * An entity reference must not contain the name of an unparsed entity
5703 *
5704 * Returns the xmlEntityPtr if found, or NULL otherwise.
5705 */
5706xmlEntityPtr
5707xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5708 xmlChar *name;
5709 xmlEntityPtr ent = NULL;
5710
5711 GROW;
5712
5713 if (RAW == '&') {
5714 NEXT;
5715 name = xmlParseName(ctxt);
5716 if (name == NULL) {
5717 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5718 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5719 ctxt->sax->error(ctxt->userData,
5720 "xmlParseEntityRef: no name\n");
5721 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005722 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005723 } else {
5724 if (RAW == ';') {
5725 NEXT;
5726 /*
5727 * Ask first SAX for entity resolution, otherwise try the
5728 * predefined set.
5729 */
5730 if (ctxt->sax != NULL) {
5731 if (ctxt->sax->getEntity != NULL)
5732 ent = ctxt->sax->getEntity(ctxt->userData, name);
5733 if (ent == NULL)
5734 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005735 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5736 ent = getEntity(ctxt, name);
5737 }
Owen Taylor3473f882001-02-23 17:55:21 +00005738 }
5739 /*
5740 * [ WFC: Entity Declared ]
5741 * In a document without any DTD, a document with only an
5742 * internal DTD subset which contains no parameter entity
5743 * references, or a document with "standalone='yes'", the
5744 * Name given in the entity reference must match that in an
5745 * entity declaration, except that well-formed documents
5746 * need not declare any of the following entities: amp, lt,
5747 * gt, apos, quot.
5748 * The declaration of a parameter entity must precede any
5749 * reference to it.
5750 * Similarly, the declaration of a general entity must
5751 * precede any reference to it which appears in a default
5752 * value in an attribute-list declaration. Note that if
5753 * entities are declared in the external subset or in
5754 * external parameter entities, a non-validating processor
5755 * is not obligated to read and process their declarations;
5756 * for such documents, the rule that an entity must be
5757 * declared is a well-formedness constraint only if
5758 * standalone='yes'.
5759 */
5760 if (ent == NULL) {
5761 if ((ctxt->standalone == 1) ||
5762 ((ctxt->hasExternalSubset == 0) &&
5763 (ctxt->hasPErefs == 0))) {
5764 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5766 ctxt->sax->error(ctxt->userData,
5767 "Entity '%s' not defined\n", name);
5768 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005769 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005770 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005771 } else {
5772 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005774 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005775 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005776 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005777 }
5778 }
5779
5780 /*
5781 * [ WFC: Parsed Entity ]
5782 * An entity reference must not contain the name of an
5783 * unparsed entity
5784 */
5785 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5786 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5788 ctxt->sax->error(ctxt->userData,
5789 "Entity reference to unparsed entity %s\n", name);
5790 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005791 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005792 }
5793
5794 /*
5795 * [ WFC: No External Entity References ]
5796 * Attribute values cannot contain direct or indirect
5797 * entity references to external entities.
5798 */
5799 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5800 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5801 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5803 ctxt->sax->error(ctxt->userData,
5804 "Attribute references external entity '%s'\n", name);
5805 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005806 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005807 }
5808 /*
5809 * [ WFC: No < in Attribute Values ]
5810 * The replacement text of any entity referred to directly or
5811 * indirectly in an attribute value (other than "&lt;") must
5812 * not contain a <.
5813 */
5814 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5815 (ent != NULL) &&
5816 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5817 (ent->content != NULL) &&
5818 (xmlStrchr(ent->content, '<'))) {
5819 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5821 ctxt->sax->error(ctxt->userData,
5822 "'<' in entity '%s' is not allowed in attributes values\n", name);
5823 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005824 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005825 }
5826
5827 /*
5828 * Internal check, no parameter entities here ...
5829 */
5830 else {
5831 switch (ent->etype) {
5832 case XML_INTERNAL_PARAMETER_ENTITY:
5833 case XML_EXTERNAL_PARAMETER_ENTITY:
5834 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5836 ctxt->sax->error(ctxt->userData,
5837 "Attempt to reference the parameter entity '%s'\n", name);
5838 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005839 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005840 break;
5841 default:
5842 break;
5843 }
5844 }
5845
5846 /*
5847 * [ WFC: No Recursion ]
5848 * A parsed entity must not contain a recursive reference
5849 * to itself, either directly or indirectly.
5850 * Done somewhere else
5851 */
5852
5853 } else {
5854 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5856 ctxt->sax->error(ctxt->userData,
5857 "xmlParseEntityRef: expecting ';'\n");
5858 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005859 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005860 }
5861 xmlFree(name);
5862 }
5863 }
5864 return(ent);
5865}
5866
5867/**
5868 * xmlParseStringEntityRef:
5869 * @ctxt: an XML parser context
5870 * @str: a pointer to an index in the string
5871 *
5872 * parse ENTITY references declarations, but this version parses it from
5873 * a string value.
5874 *
5875 * [68] EntityRef ::= '&' Name ';'
5876 *
5877 * [ WFC: Entity Declared ]
5878 * In a document without any DTD, a document with only an internal DTD
5879 * subset which contains no parameter entity references, or a document
5880 * with "standalone='yes'", the Name given in the entity reference
5881 * must match that in an entity declaration, except that well-formed
5882 * documents need not declare any of the following entities: amp, lt,
5883 * gt, apos, quot. The declaration of a parameter entity must precede
5884 * any reference to it. Similarly, the declaration of a general entity
5885 * must precede any reference to it which appears in a default value in an
5886 * attribute-list declaration. Note that if entities are declared in the
5887 * external subset or in external parameter entities, a non-validating
5888 * processor is not obligated to read and process their declarations;
5889 * for such documents, the rule that an entity must be declared is a
5890 * well-formedness constraint only if standalone='yes'.
5891 *
5892 * [ WFC: Parsed Entity ]
5893 * An entity reference must not contain the name of an unparsed entity
5894 *
5895 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5896 * is updated to the current location in the string.
5897 */
5898xmlEntityPtr
5899xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5900 xmlChar *name;
5901 const xmlChar *ptr;
5902 xmlChar cur;
5903 xmlEntityPtr ent = NULL;
5904
5905 if ((str == NULL) || (*str == NULL))
5906 return(NULL);
5907 ptr = *str;
5908 cur = *ptr;
5909 if (cur == '&') {
5910 ptr++;
5911 cur = *ptr;
5912 name = xmlParseStringName(ctxt, &ptr);
5913 if (name == NULL) {
5914 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5916 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005917 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005918 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005919 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005920 } else {
5921 if (*ptr == ';') {
5922 ptr++;
5923 /*
5924 * Ask first SAX for entity resolution, otherwise try the
5925 * predefined set.
5926 */
5927 if (ctxt->sax != NULL) {
5928 if (ctxt->sax->getEntity != NULL)
5929 ent = ctxt->sax->getEntity(ctxt->userData, name);
5930 if (ent == NULL)
5931 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005932 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5933 ent = getEntity(ctxt, name);
5934 }
Owen Taylor3473f882001-02-23 17:55:21 +00005935 }
5936 /*
5937 * [ WFC: Entity Declared ]
5938 * In a document without any DTD, a document with only an
5939 * internal DTD subset which contains no parameter entity
5940 * references, or a document with "standalone='yes'", the
5941 * Name given in the entity reference must match that in an
5942 * entity declaration, except that well-formed documents
5943 * need not declare any of the following entities: amp, lt,
5944 * gt, apos, quot.
5945 * The declaration of a parameter entity must precede any
5946 * reference to it.
5947 * Similarly, the declaration of a general entity must
5948 * precede any reference to it which appears in a default
5949 * value in an attribute-list declaration. Note that if
5950 * entities are declared in the external subset or in
5951 * external parameter entities, a non-validating processor
5952 * is not obligated to read and process their declarations;
5953 * for such documents, the rule that an entity must be
5954 * declared is a well-formedness constraint only if
5955 * standalone='yes'.
5956 */
5957 if (ent == NULL) {
5958 if ((ctxt->standalone == 1) ||
5959 ((ctxt->hasExternalSubset == 0) &&
5960 (ctxt->hasPErefs == 0))) {
5961 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5962 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5963 ctxt->sax->error(ctxt->userData,
5964 "Entity '%s' not defined\n", name);
5965 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005966 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005967 } else {
5968 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5969 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5970 ctxt->sax->warning(ctxt->userData,
5971 "Entity '%s' not defined\n", name);
5972 }
5973 }
5974
5975 /*
5976 * [ WFC: Parsed Entity ]
5977 * An entity reference must not contain the name of an
5978 * unparsed entity
5979 */
5980 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5981 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5983 ctxt->sax->error(ctxt->userData,
5984 "Entity reference to unparsed entity %s\n", name);
5985 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005986 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005987 }
5988
5989 /*
5990 * [ WFC: No External Entity References ]
5991 * Attribute values cannot contain direct or indirect
5992 * entity references to external entities.
5993 */
5994 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5995 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5996 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5998 ctxt->sax->error(ctxt->userData,
5999 "Attribute references external entity '%s'\n", name);
6000 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006001 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006002 }
6003 /*
6004 * [ WFC: No < in Attribute Values ]
6005 * The replacement text of any entity referred to directly or
6006 * indirectly in an attribute value (other than "&lt;") must
6007 * not contain a <.
6008 */
6009 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6010 (ent != NULL) &&
6011 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6012 (ent->content != NULL) &&
6013 (xmlStrchr(ent->content, '<'))) {
6014 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6015 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6016 ctxt->sax->error(ctxt->userData,
6017 "'<' in entity '%s' is not allowed in attributes values\n", name);
6018 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006019 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006020 }
6021
6022 /*
6023 * Internal check, no parameter entities here ...
6024 */
6025 else {
6026 switch (ent->etype) {
6027 case XML_INTERNAL_PARAMETER_ENTITY:
6028 case XML_EXTERNAL_PARAMETER_ENTITY:
6029 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6030 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6031 ctxt->sax->error(ctxt->userData,
6032 "Attempt to reference the parameter entity '%s'\n", name);
6033 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006034 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006035 break;
6036 default:
6037 break;
6038 }
6039 }
6040
6041 /*
6042 * [ WFC: No Recursion ]
6043 * A parsed entity must not contain a recursive reference
6044 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006045 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006046 */
6047
6048 } else {
6049 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6051 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006052 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006053 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006054 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006055 }
6056 xmlFree(name);
6057 }
6058 }
6059 *str = ptr;
6060 return(ent);
6061}
6062
6063/**
6064 * xmlParsePEReference:
6065 * @ctxt: an XML parser context
6066 *
6067 * parse PEReference declarations
6068 * The entity content is handled directly by pushing it's content as
6069 * a new input stream.
6070 *
6071 * [69] PEReference ::= '%' Name ';'
6072 *
6073 * [ WFC: No Recursion ]
6074 * A parsed entity must not contain a recursive
6075 * reference to itself, either directly or indirectly.
6076 *
6077 * [ WFC: Entity Declared ]
6078 * In a document without any DTD, a document with only an internal DTD
6079 * subset which contains no parameter entity references, or a document
6080 * with "standalone='yes'", ... ... The declaration of a parameter
6081 * entity must precede any reference to it...
6082 *
6083 * [ VC: Entity Declared ]
6084 * In a document with an external subset or external parameter entities
6085 * with "standalone='no'", ... ... The declaration of a parameter entity
6086 * must precede any reference to it...
6087 *
6088 * [ WFC: In DTD ]
6089 * Parameter-entity references may only appear in the DTD.
6090 * NOTE: misleading but this is handled.
6091 */
6092void
6093xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6094 xmlChar *name;
6095 xmlEntityPtr entity = NULL;
6096 xmlParserInputPtr input;
6097
6098 if (RAW == '%') {
6099 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006100 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006101 if (name == NULL) {
6102 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6104 ctxt->sax->error(ctxt->userData,
6105 "xmlParsePEReference: no name\n");
6106 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006107 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006108 } else {
6109 if (RAW == ';') {
6110 NEXT;
6111 if ((ctxt->sax != NULL) &&
6112 (ctxt->sax->getParameterEntity != NULL))
6113 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6114 name);
6115 if (entity == NULL) {
6116 /*
6117 * [ WFC: Entity Declared ]
6118 * In a document without any DTD, a document with only an
6119 * internal DTD subset which contains no parameter entity
6120 * references, or a document with "standalone='yes'", ...
6121 * ... The declaration of a parameter entity must precede
6122 * any reference to it...
6123 */
6124 if ((ctxt->standalone == 1) ||
6125 ((ctxt->hasExternalSubset == 0) &&
6126 (ctxt->hasPErefs == 0))) {
6127 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6128 if ((!ctxt->disableSAX) &&
6129 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6130 ctxt->sax->error(ctxt->userData,
6131 "PEReference: %%%s; not found\n", name);
6132 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006133 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006134 } else {
6135 /*
6136 * [ VC: Entity Declared ]
6137 * In a document with an external subset or external
6138 * parameter entities with "standalone='no'", ...
6139 * ... The declaration of a parameter entity must precede
6140 * any reference to it...
6141 */
6142 if ((!ctxt->disableSAX) &&
6143 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6144 ctxt->sax->warning(ctxt->userData,
6145 "PEReference: %%%s; not found\n", name);
6146 ctxt->valid = 0;
6147 }
6148 } else {
6149 /*
6150 * Internal checking in case the entity quest barfed
6151 */
6152 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6153 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6154 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6155 ctxt->sax->warning(ctxt->userData,
6156 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006157 } else if (ctxt->input->free != deallocblankswrapper) {
6158 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6159 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006160 } else {
6161 /*
6162 * TODO !!!
6163 * handle the extra spaces added before and after
6164 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6165 */
6166 input = xmlNewEntityInputStream(ctxt, entity);
6167 xmlPushInput(ctxt, input);
6168 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6169 (RAW == '<') && (NXT(1) == '?') &&
6170 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6171 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6172 xmlParseTextDecl(ctxt);
6173 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6174 /*
6175 * The XML REC instructs us to stop parsing
6176 * right here
6177 */
6178 ctxt->instate = XML_PARSER_EOF;
6179 xmlFree(name);
6180 return;
6181 }
6182 }
Owen Taylor3473f882001-02-23 17:55:21 +00006183 }
6184 }
6185 ctxt->hasPErefs = 1;
6186 } else {
6187 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6188 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6189 ctxt->sax->error(ctxt->userData,
6190 "xmlParsePEReference: expecting ';'\n");
6191 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006192 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006193 }
6194 xmlFree(name);
6195 }
6196 }
6197}
6198
6199/**
6200 * xmlParseStringPEReference:
6201 * @ctxt: an XML parser context
6202 * @str: a pointer to an index in the string
6203 *
6204 * parse PEReference declarations
6205 *
6206 * [69] PEReference ::= '%' Name ';'
6207 *
6208 * [ WFC: No Recursion ]
6209 * A parsed entity must not contain a recursive
6210 * reference to itself, either directly or indirectly.
6211 *
6212 * [ WFC: Entity Declared ]
6213 * In a document without any DTD, a document with only an internal DTD
6214 * subset which contains no parameter entity references, or a document
6215 * with "standalone='yes'", ... ... The declaration of a parameter
6216 * entity must precede any reference to it...
6217 *
6218 * [ VC: Entity Declared ]
6219 * In a document with an external subset or external parameter entities
6220 * with "standalone='no'", ... ... The declaration of a parameter entity
6221 * must precede any reference to it...
6222 *
6223 * [ WFC: In DTD ]
6224 * Parameter-entity references may only appear in the DTD.
6225 * NOTE: misleading but this is handled.
6226 *
6227 * Returns the string of the entity content.
6228 * str is updated to the current value of the index
6229 */
6230xmlEntityPtr
6231xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6232 const xmlChar *ptr;
6233 xmlChar cur;
6234 xmlChar *name;
6235 xmlEntityPtr entity = NULL;
6236
6237 if ((str == NULL) || (*str == NULL)) return(NULL);
6238 ptr = *str;
6239 cur = *ptr;
6240 if (cur == '%') {
6241 ptr++;
6242 cur = *ptr;
6243 name = xmlParseStringName(ctxt, &ptr);
6244 if (name == NULL) {
6245 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6247 ctxt->sax->error(ctxt->userData,
6248 "xmlParseStringPEReference: no name\n");
6249 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006250 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006251 } else {
6252 cur = *ptr;
6253 if (cur == ';') {
6254 ptr++;
6255 cur = *ptr;
6256 if ((ctxt->sax != NULL) &&
6257 (ctxt->sax->getParameterEntity != NULL))
6258 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6259 name);
6260 if (entity == NULL) {
6261 /*
6262 * [ WFC: Entity Declared ]
6263 * In a document without any DTD, a document with only an
6264 * internal DTD subset which contains no parameter entity
6265 * references, or a document with "standalone='yes'", ...
6266 * ... The declaration of a parameter entity must precede
6267 * any reference to it...
6268 */
6269 if ((ctxt->standalone == 1) ||
6270 ((ctxt->hasExternalSubset == 0) &&
6271 (ctxt->hasPErefs == 0))) {
6272 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6274 ctxt->sax->error(ctxt->userData,
6275 "PEReference: %%%s; not found\n", name);
6276 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006277 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006278 } else {
6279 /*
6280 * [ VC: Entity Declared ]
6281 * In a document with an external subset or external
6282 * parameter entities with "standalone='no'", ...
6283 * ... The declaration of a parameter entity must
6284 * precede any reference to it...
6285 */
6286 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6287 ctxt->sax->warning(ctxt->userData,
6288 "PEReference: %%%s; not found\n", name);
6289 ctxt->valid = 0;
6290 }
6291 } else {
6292 /*
6293 * Internal checking in case the entity quest barfed
6294 */
6295 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6296 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6297 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6298 ctxt->sax->warning(ctxt->userData,
6299 "Internal: %%%s; is not a parameter entity\n", name);
6300 }
6301 }
6302 ctxt->hasPErefs = 1;
6303 } else {
6304 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6306 ctxt->sax->error(ctxt->userData,
6307 "xmlParseStringPEReference: expecting ';'\n");
6308 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006309 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006310 }
6311 xmlFree(name);
6312 }
6313 }
6314 *str = ptr;
6315 return(entity);
6316}
6317
6318/**
6319 * xmlParseDocTypeDecl:
6320 * @ctxt: an XML parser context
6321 *
6322 * parse a DOCTYPE declaration
6323 *
6324 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6325 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6326 *
6327 * [ VC: Root Element Type ]
6328 * The Name in the document type declaration must match the element
6329 * type of the root element.
6330 */
6331
6332void
6333xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6334 xmlChar *name = NULL;
6335 xmlChar *ExternalID = NULL;
6336 xmlChar *URI = NULL;
6337
6338 /*
6339 * We know that '<!DOCTYPE' has been detected.
6340 */
6341 SKIP(9);
6342
6343 SKIP_BLANKS;
6344
6345 /*
6346 * Parse the DOCTYPE name.
6347 */
6348 name = xmlParseName(ctxt);
6349 if (name == NULL) {
6350 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6352 ctxt->sax->error(ctxt->userData,
6353 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6354 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006355 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006356 }
6357 ctxt->intSubName = name;
6358
6359 SKIP_BLANKS;
6360
6361 /*
6362 * Check for SystemID and ExternalID
6363 */
6364 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6365
6366 if ((URI != NULL) || (ExternalID != NULL)) {
6367 ctxt->hasExternalSubset = 1;
6368 }
6369 ctxt->extSubURI = URI;
6370 ctxt->extSubSystem = ExternalID;
6371
6372 SKIP_BLANKS;
6373
6374 /*
6375 * Create and update the internal subset.
6376 */
6377 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6378 (!ctxt->disableSAX))
6379 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6380
6381 /*
6382 * Is there any internal subset declarations ?
6383 * they are handled separately in xmlParseInternalSubset()
6384 */
6385 if (RAW == '[')
6386 return;
6387
6388 /*
6389 * We should be at the end of the DOCTYPE declaration.
6390 */
6391 if (RAW != '>') {
6392 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006394 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006395 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006396 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006397 }
6398 NEXT;
6399}
6400
6401/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006402 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006403 * @ctxt: an XML parser context
6404 *
6405 * parse the internal subset declaration
6406 *
6407 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6408 */
6409
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006410static void
Owen Taylor3473f882001-02-23 17:55:21 +00006411xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6412 /*
6413 * Is there any DTD definition ?
6414 */
6415 if (RAW == '[') {
6416 ctxt->instate = XML_PARSER_DTD;
6417 NEXT;
6418 /*
6419 * Parse the succession of Markup declarations and
6420 * PEReferences.
6421 * Subsequence (markupdecl | PEReference | S)*
6422 */
6423 while (RAW != ']') {
6424 const xmlChar *check = CUR_PTR;
6425 int cons = ctxt->input->consumed;
6426
6427 SKIP_BLANKS;
6428 xmlParseMarkupDecl(ctxt);
6429 xmlParsePEReference(ctxt);
6430
6431 /*
6432 * Pop-up of finished entities.
6433 */
6434 while ((RAW == 0) && (ctxt->inputNr > 1))
6435 xmlPopInput(ctxt);
6436
6437 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6438 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6439 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6440 ctxt->sax->error(ctxt->userData,
6441 "xmlParseInternalSubset: error detected in Markup declaration\n");
6442 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006443 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006444 break;
6445 }
6446 }
6447 if (RAW == ']') {
6448 NEXT;
6449 SKIP_BLANKS;
6450 }
6451 }
6452
6453 /*
6454 * We should be at the end of the DOCTYPE declaration.
6455 */
6456 if (RAW != '>') {
6457 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006459 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006460 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006461 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006462 }
6463 NEXT;
6464}
6465
6466/**
6467 * xmlParseAttribute:
6468 * @ctxt: an XML parser context
6469 * @value: a xmlChar ** used to store the value of the attribute
6470 *
6471 * parse an attribute
6472 *
6473 * [41] Attribute ::= Name Eq AttValue
6474 *
6475 * [ WFC: No External Entity References ]
6476 * Attribute values cannot contain direct or indirect entity references
6477 * to external entities.
6478 *
6479 * [ WFC: No < in Attribute Values ]
6480 * The replacement text of any entity referred to directly or indirectly in
6481 * an attribute value (other than "&lt;") must not contain a <.
6482 *
6483 * [ VC: Attribute Value Type ]
6484 * The attribute must have been declared; the value must be of the type
6485 * declared for it.
6486 *
6487 * [25] Eq ::= S? '=' S?
6488 *
6489 * With namespace:
6490 *
6491 * [NS 11] Attribute ::= QName Eq AttValue
6492 *
6493 * Also the case QName == xmlns:??? is handled independently as a namespace
6494 * definition.
6495 *
6496 * Returns the attribute name, and the value in *value.
6497 */
6498
6499xmlChar *
6500xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6501 xmlChar *name, *val;
6502
6503 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006504 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006505 name = xmlParseName(ctxt);
6506 if (name == NULL) {
6507 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6509 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6510 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006511 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006512 return(NULL);
6513 }
6514
6515 /*
6516 * read the value
6517 */
6518 SKIP_BLANKS;
6519 if (RAW == '=') {
6520 NEXT;
6521 SKIP_BLANKS;
6522 val = xmlParseAttValue(ctxt);
6523 ctxt->instate = XML_PARSER_CONTENT;
6524 } else {
6525 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6526 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6527 ctxt->sax->error(ctxt->userData,
6528 "Specification mandate value for attribute %s\n", name);
6529 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006530 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006531 xmlFree(name);
6532 return(NULL);
6533 }
6534
6535 /*
6536 * Check that xml:lang conforms to the specification
6537 * No more registered as an error, just generate a warning now
6538 * since this was deprecated in XML second edition
6539 */
6540 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6541 if (!xmlCheckLanguageID(val)) {
6542 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6543 ctxt->sax->warning(ctxt->userData,
6544 "Malformed value for xml:lang : %s\n", val);
6545 }
6546 }
6547
6548 /*
6549 * Check that xml:space conforms to the specification
6550 */
6551 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6552 if (xmlStrEqual(val, BAD_CAST "default"))
6553 *(ctxt->space) = 0;
6554 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6555 *(ctxt->space) = 1;
6556 else {
6557 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6559 ctxt->sax->error(ctxt->userData,
6560"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6561 val);
6562 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006563 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006564 }
6565 }
6566
6567 *value = val;
6568 return(name);
6569}
6570
6571/**
6572 * xmlParseStartTag:
6573 * @ctxt: an XML parser context
6574 *
6575 * parse a start of tag either for rule element or
6576 * EmptyElement. In both case we don't parse the tag closing chars.
6577 *
6578 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6579 *
6580 * [ WFC: Unique Att Spec ]
6581 * No attribute name may appear more than once in the same start-tag or
6582 * empty-element tag.
6583 *
6584 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6585 *
6586 * [ WFC: Unique Att Spec ]
6587 * No attribute name may appear more than once in the same start-tag or
6588 * empty-element tag.
6589 *
6590 * With namespace:
6591 *
6592 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6593 *
6594 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6595 *
6596 * Returns the element name parsed
6597 */
6598
6599xmlChar *
6600xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6601 xmlChar *name;
6602 xmlChar *attname;
6603 xmlChar *attvalue;
6604 const xmlChar **atts = NULL;
6605 int nbatts = 0;
6606 int maxatts = 0;
6607 int i;
6608
6609 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006610 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006611
6612 name = xmlParseName(ctxt);
6613 if (name == NULL) {
6614 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6616 ctxt->sax->error(ctxt->userData,
6617 "xmlParseStartTag: invalid element name\n");
6618 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006619 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006620 return(NULL);
6621 }
6622
6623 /*
6624 * Now parse the attributes, it ends up with the ending
6625 *
6626 * (S Attribute)* S?
6627 */
6628 SKIP_BLANKS;
6629 GROW;
6630
Daniel Veillard21a0f912001-02-25 19:54:14 +00006631 while ((RAW != '>') &&
6632 ((RAW != '/') || (NXT(1) != '>')) &&
6633 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006634 const xmlChar *q = CUR_PTR;
6635 int cons = ctxt->input->consumed;
6636
6637 attname = xmlParseAttribute(ctxt, &attvalue);
6638 if ((attname != NULL) && (attvalue != NULL)) {
6639 /*
6640 * [ WFC: Unique Att Spec ]
6641 * No attribute name may appear more than once in the same
6642 * start-tag or empty-element tag.
6643 */
6644 for (i = 0; i < nbatts;i += 2) {
6645 if (xmlStrEqual(atts[i], attname)) {
6646 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6648 ctxt->sax->error(ctxt->userData,
6649 "Attribute %s redefined\n",
6650 attname);
6651 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006652 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006653 xmlFree(attname);
6654 xmlFree(attvalue);
6655 goto failed;
6656 }
6657 }
6658
6659 /*
6660 * Add the pair to atts
6661 */
6662 if (atts == NULL) {
6663 maxatts = 10;
6664 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6665 if (atts == NULL) {
6666 xmlGenericError(xmlGenericErrorContext,
6667 "malloc of %ld byte failed\n",
6668 maxatts * (long)sizeof(xmlChar *));
6669 return(NULL);
6670 }
6671 } else if (nbatts + 4 > maxatts) {
6672 maxatts *= 2;
6673 atts = (const xmlChar **) xmlRealloc((void *) atts,
6674 maxatts * sizeof(xmlChar *));
6675 if (atts == NULL) {
6676 xmlGenericError(xmlGenericErrorContext,
6677 "realloc of %ld byte failed\n",
6678 maxatts * (long)sizeof(xmlChar *));
6679 return(NULL);
6680 }
6681 }
6682 atts[nbatts++] = attname;
6683 atts[nbatts++] = attvalue;
6684 atts[nbatts] = NULL;
6685 atts[nbatts + 1] = NULL;
6686 } else {
6687 if (attname != NULL)
6688 xmlFree(attname);
6689 if (attvalue != NULL)
6690 xmlFree(attvalue);
6691 }
6692
6693failed:
6694
Daniel Veillard3772de32002-12-17 10:31:45 +00006695 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006696 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6697 break;
6698 if (!IS_BLANK(RAW)) {
6699 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6701 ctxt->sax->error(ctxt->userData,
6702 "attributes construct error\n");
6703 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006704 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006705 }
6706 SKIP_BLANKS;
6707 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6708 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6710 ctxt->sax->error(ctxt->userData,
6711 "xmlParseStartTag: problem parsing attributes\n");
6712 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006713 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006714 break;
6715 }
6716 GROW;
6717 }
6718
6719 /*
6720 * SAX: Start of Element !
6721 */
6722 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6723 (!ctxt->disableSAX))
6724 ctxt->sax->startElement(ctxt->userData, name, atts);
6725
6726 if (atts != NULL) {
6727 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6728 xmlFree((void *) atts);
6729 }
6730 return(name);
6731}
6732
6733/**
6734 * xmlParseEndTag:
6735 * @ctxt: an XML parser context
6736 *
6737 * parse an end of tag
6738 *
6739 * [42] ETag ::= '</' Name S? '>'
6740 *
6741 * With namespace
6742 *
6743 * [NS 9] ETag ::= '</' QName S? '>'
6744 */
6745
6746void
6747xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6748 xmlChar *name;
6749 xmlChar *oldname;
6750
6751 GROW;
6752 if ((RAW != '<') || (NXT(1) != '/')) {
6753 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6755 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6756 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006757 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006758 return;
6759 }
6760 SKIP(2);
6761
Daniel Veillard46de64e2002-05-29 08:21:33 +00006762 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006763
6764 /*
6765 * We should definitely be at the ending "S? '>'" part
6766 */
6767 GROW;
6768 SKIP_BLANKS;
6769 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6770 ctxt->errNo = XML_ERR_GT_REQUIRED;
6771 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6772 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6773 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006774 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006775 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006776 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006777
6778 /*
6779 * [ WFC: Element Type Match ]
6780 * The Name in an element's end-tag must match the element type in the
6781 * start-tag.
6782 *
6783 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006784 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006785 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6786 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006787 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006788 ctxt->sax->error(ctxt->userData,
6789 "Opening and ending tag mismatch: %s and %s\n",
6790 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006791 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006792 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006793 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006794 }
6795
6796 }
6797 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006798 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6799#if 0
6800 else {
6801 /*
6802 * Recover in case of one missing close
6803 */
6804 if ((ctxt->nameNr > 2) &&
6805 (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) {
6806 namePop(ctxt);
6807 spacePop(ctxt);
6808 }
6809 }
6810#endif
6811 if (name != NULL)
6812 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006813 }
6814
6815 /*
6816 * SAX: End of Tag
6817 */
6818 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6819 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006820 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006821
Owen Taylor3473f882001-02-23 17:55:21 +00006822 oldname = namePop(ctxt);
6823 spacePop(ctxt);
6824 if (oldname != NULL) {
6825#ifdef DEBUG_STACK
6826 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6827#endif
6828 xmlFree(oldname);
6829 }
6830 return;
6831}
6832
6833/**
6834 * xmlParseCDSect:
6835 * @ctxt: an XML parser context
6836 *
6837 * Parse escaped pure raw content.
6838 *
6839 * [18] CDSect ::= CDStart CData CDEnd
6840 *
6841 * [19] CDStart ::= '<![CDATA['
6842 *
6843 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6844 *
6845 * [21] CDEnd ::= ']]>'
6846 */
6847void
6848xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6849 xmlChar *buf = NULL;
6850 int len = 0;
6851 int size = XML_PARSER_BUFFER_SIZE;
6852 int r, rl;
6853 int s, sl;
6854 int cur, l;
6855 int count = 0;
6856
6857 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6858 (NXT(2) == '[') && (NXT(3) == 'C') &&
6859 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6860 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6861 (NXT(8) == '[')) {
6862 SKIP(9);
6863 } else
6864 return;
6865
6866 ctxt->instate = XML_PARSER_CDATA_SECTION;
6867 r = CUR_CHAR(rl);
6868 if (!IS_CHAR(r)) {
6869 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6871 ctxt->sax->error(ctxt->userData,
6872 "CData section not finished\n");
6873 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006874 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006875 ctxt->instate = XML_PARSER_CONTENT;
6876 return;
6877 }
6878 NEXTL(rl);
6879 s = CUR_CHAR(sl);
6880 if (!IS_CHAR(s)) {
6881 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6883 ctxt->sax->error(ctxt->userData,
6884 "CData section not finished\n");
6885 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006886 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006887 ctxt->instate = XML_PARSER_CONTENT;
6888 return;
6889 }
6890 NEXTL(sl);
6891 cur = CUR_CHAR(l);
6892 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6893 if (buf == NULL) {
6894 xmlGenericError(xmlGenericErrorContext,
6895 "malloc of %d byte failed\n", size);
6896 return;
6897 }
6898 while (IS_CHAR(cur) &&
6899 ((r != ']') || (s != ']') || (cur != '>'))) {
6900 if (len + 5 >= size) {
6901 size *= 2;
6902 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6903 if (buf == NULL) {
6904 xmlGenericError(xmlGenericErrorContext,
6905 "realloc of %d byte failed\n", size);
6906 return;
6907 }
6908 }
6909 COPY_BUF(rl,buf,len,r);
6910 r = s;
6911 rl = sl;
6912 s = cur;
6913 sl = l;
6914 count++;
6915 if (count > 50) {
6916 GROW;
6917 count = 0;
6918 }
6919 NEXTL(l);
6920 cur = CUR_CHAR(l);
6921 }
6922 buf[len] = 0;
6923 ctxt->instate = XML_PARSER_CONTENT;
6924 if (cur != '>') {
6925 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6927 ctxt->sax->error(ctxt->userData,
6928 "CData section not finished\n%.50s\n", buf);
6929 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006930 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006931 xmlFree(buf);
6932 return;
6933 }
6934 NEXTL(l);
6935
6936 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006937 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006938 */
6939 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6940 if (ctxt->sax->cdataBlock != NULL)
6941 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006942 else if (ctxt->sax->characters != NULL)
6943 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006944 }
6945 xmlFree(buf);
6946}
6947
6948/**
6949 * xmlParseContent:
6950 * @ctxt: an XML parser context
6951 *
6952 * Parse a content:
6953 *
6954 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6955 */
6956
6957void
6958xmlParseContent(xmlParserCtxtPtr ctxt) {
6959 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006960 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006961 ((RAW != '<') || (NXT(1) != '/'))) {
6962 const xmlChar *test = CUR_PTR;
6963 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006964 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006965
6966 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006967 * First case : a Processing Instruction.
6968 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006969 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006970 xmlParsePI(ctxt);
6971 }
6972
6973 /*
6974 * Second case : a CDSection
6975 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006976 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006977 (NXT(2) == '[') && (NXT(3) == 'C') &&
6978 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6979 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6980 (NXT(8) == '[')) {
6981 xmlParseCDSect(ctxt);
6982 }
6983
6984 /*
6985 * Third case : a comment
6986 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006987 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006988 (NXT(2) == '-') && (NXT(3) == '-')) {
6989 xmlParseComment(ctxt);
6990 ctxt->instate = XML_PARSER_CONTENT;
6991 }
6992
6993 /*
6994 * Fourth case : a sub-element.
6995 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006996 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006997 xmlParseElement(ctxt);
6998 }
6999
7000 /*
7001 * Fifth case : a reference. If if has not been resolved,
7002 * parsing returns it's Name, create the node
7003 */
7004
Daniel Veillard21a0f912001-02-25 19:54:14 +00007005 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007006 xmlParseReference(ctxt);
7007 }
7008
7009 /*
7010 * Last case, text. Note that References are handled directly.
7011 */
7012 else {
7013 xmlParseCharData(ctxt, 0);
7014 }
7015
7016 GROW;
7017 /*
7018 * Pop-up of finished entities.
7019 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007020 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007021 xmlPopInput(ctxt);
7022 SHRINK;
7023
Daniel Veillardfdc91562002-07-01 21:52:03 +00007024 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007025 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7026 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7027 ctxt->sax->error(ctxt->userData,
7028 "detected an error in element content\n");
7029 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007030 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007031 ctxt->instate = XML_PARSER_EOF;
7032 break;
7033 }
7034 }
7035}
7036
7037/**
7038 * xmlParseElement:
7039 * @ctxt: an XML parser context
7040 *
7041 * parse an XML element, this is highly recursive
7042 *
7043 * [39] element ::= EmptyElemTag | STag content ETag
7044 *
7045 * [ WFC: Element Type Match ]
7046 * The Name in an element's end-tag must match the element type in the
7047 * start-tag.
7048 *
7049 * [ VC: Element Valid ]
7050 * An element is valid if there is a declaration matching elementdecl
7051 * where the Name matches the element type and one of the following holds:
7052 * - The declaration matches EMPTY and the element has no content.
7053 * - The declaration matches children and the sequence of child elements
7054 * belongs to the language generated by the regular expression in the
7055 * content model, with optional white space (characters matching the
7056 * nonterminal S) between each pair of child elements.
7057 * - The declaration matches Mixed and the content consists of character
7058 * data and child elements whose types match names in the content model.
7059 * - The declaration matches ANY, and the types of any child elements have
7060 * been declared.
7061 */
7062
7063void
7064xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007065 xmlChar *name;
7066 xmlChar *oldname;
7067 xmlParserNodeInfo node_info;
7068 xmlNodePtr ret;
7069
7070 /* Capture start position */
7071 if (ctxt->record_info) {
7072 node_info.begin_pos = ctxt->input->consumed +
7073 (CUR_PTR - ctxt->input->base);
7074 node_info.begin_line = ctxt->input->line;
7075 }
7076
7077 if (ctxt->spaceNr == 0)
7078 spacePush(ctxt, -1);
7079 else
7080 spacePush(ctxt, *ctxt->space);
7081
7082 name = xmlParseStartTag(ctxt);
7083 if (name == NULL) {
7084 spacePop(ctxt);
7085 return;
7086 }
7087 namePush(ctxt, name);
7088 ret = ctxt->node;
7089
7090 /*
7091 * [ VC: Root Element Type ]
7092 * The Name in the document type declaration must match the element
7093 * type of the root element.
7094 */
7095 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7096 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7097 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7098
7099 /*
7100 * Check for an Empty Element.
7101 */
7102 if ((RAW == '/') && (NXT(1) == '>')) {
7103 SKIP(2);
7104 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7105 (!ctxt->disableSAX))
7106 ctxt->sax->endElement(ctxt->userData, name);
7107 oldname = namePop(ctxt);
7108 spacePop(ctxt);
7109 if (oldname != NULL) {
7110#ifdef DEBUG_STACK
7111 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7112#endif
7113 xmlFree(oldname);
7114 }
7115 if ( ret != NULL && ctxt->record_info ) {
7116 node_info.end_pos = ctxt->input->consumed +
7117 (CUR_PTR - ctxt->input->base);
7118 node_info.end_line = ctxt->input->line;
7119 node_info.node = ret;
7120 xmlParserAddNodeInfo(ctxt, &node_info);
7121 }
7122 return;
7123 }
7124 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007125 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007126 } else {
7127 ctxt->errNo = XML_ERR_GT_REQUIRED;
7128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7129 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007130 "Couldn't find end of Start Tag %s\n",
7131 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007132 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007133 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007134
7135 /*
7136 * end of parsing of this node.
7137 */
7138 nodePop(ctxt);
7139 oldname = namePop(ctxt);
7140 spacePop(ctxt);
7141 if (oldname != NULL) {
7142#ifdef DEBUG_STACK
7143 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7144#endif
7145 xmlFree(oldname);
7146 }
7147
7148 /*
7149 * Capture end position and add node
7150 */
7151 if ( ret != NULL && ctxt->record_info ) {
7152 node_info.end_pos = ctxt->input->consumed +
7153 (CUR_PTR - ctxt->input->base);
7154 node_info.end_line = ctxt->input->line;
7155 node_info.node = ret;
7156 xmlParserAddNodeInfo(ctxt, &node_info);
7157 }
7158 return;
7159 }
7160
7161 /*
7162 * Parse the content of the element:
7163 */
7164 xmlParseContent(ctxt);
7165 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007166 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007167 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7168 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007169 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007170 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007171 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007172
7173 /*
7174 * end of parsing of this node.
7175 */
7176 nodePop(ctxt);
7177 oldname = namePop(ctxt);
7178 spacePop(ctxt);
7179 if (oldname != NULL) {
7180#ifdef DEBUG_STACK
7181 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7182#endif
7183 xmlFree(oldname);
7184 }
7185 return;
7186 }
7187
7188 /*
7189 * parse the end of tag: '</' should be here.
7190 */
7191 xmlParseEndTag(ctxt);
7192
7193 /*
7194 * Capture end position and add node
7195 */
7196 if ( ret != NULL && ctxt->record_info ) {
7197 node_info.end_pos = ctxt->input->consumed +
7198 (CUR_PTR - ctxt->input->base);
7199 node_info.end_line = ctxt->input->line;
7200 node_info.node = ret;
7201 xmlParserAddNodeInfo(ctxt, &node_info);
7202 }
7203}
7204
7205/**
7206 * xmlParseVersionNum:
7207 * @ctxt: an XML parser context
7208 *
7209 * parse the XML version value.
7210 *
7211 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7212 *
7213 * Returns the string giving the XML version number, or NULL
7214 */
7215xmlChar *
7216xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7217 xmlChar *buf = NULL;
7218 int len = 0;
7219 int size = 10;
7220 xmlChar cur;
7221
7222 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7223 if (buf == NULL) {
7224 xmlGenericError(xmlGenericErrorContext,
7225 "malloc of %d byte failed\n", size);
7226 return(NULL);
7227 }
7228 cur = CUR;
7229 while (((cur >= 'a') && (cur <= 'z')) ||
7230 ((cur >= 'A') && (cur <= 'Z')) ||
7231 ((cur >= '0') && (cur <= '9')) ||
7232 (cur == '_') || (cur == '.') ||
7233 (cur == ':') || (cur == '-')) {
7234 if (len + 1 >= size) {
7235 size *= 2;
7236 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7237 if (buf == NULL) {
7238 xmlGenericError(xmlGenericErrorContext,
7239 "realloc of %d byte failed\n", size);
7240 return(NULL);
7241 }
7242 }
7243 buf[len++] = cur;
7244 NEXT;
7245 cur=CUR;
7246 }
7247 buf[len] = 0;
7248 return(buf);
7249}
7250
7251/**
7252 * xmlParseVersionInfo:
7253 * @ctxt: an XML parser context
7254 *
7255 * parse the XML version.
7256 *
7257 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7258 *
7259 * [25] Eq ::= S? '=' S?
7260 *
7261 * Returns the version string, e.g. "1.0"
7262 */
7263
7264xmlChar *
7265xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7266 xmlChar *version = NULL;
7267 const xmlChar *q;
7268
7269 if ((RAW == 'v') && (NXT(1) == 'e') &&
7270 (NXT(2) == 'r') && (NXT(3) == 's') &&
7271 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7272 (NXT(6) == 'n')) {
7273 SKIP(7);
7274 SKIP_BLANKS;
7275 if (RAW != '=') {
7276 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7277 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7278 ctxt->sax->error(ctxt->userData,
7279 "xmlParseVersionInfo : expected '='\n");
7280 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007281 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007282 return(NULL);
7283 }
7284 NEXT;
7285 SKIP_BLANKS;
7286 if (RAW == '"') {
7287 NEXT;
7288 q = CUR_PTR;
7289 version = xmlParseVersionNum(ctxt);
7290 if (RAW != '"') {
7291 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7292 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7293 ctxt->sax->error(ctxt->userData,
7294 "String not closed\n%.50s\n", q);
7295 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007296 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007297 } else
7298 NEXT;
7299 } else if (RAW == '\''){
7300 NEXT;
7301 q = CUR_PTR;
7302 version = xmlParseVersionNum(ctxt);
7303 if (RAW != '\'') {
7304 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7306 ctxt->sax->error(ctxt->userData,
7307 "String not closed\n%.50s\n", q);
7308 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007309 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007310 } else
7311 NEXT;
7312 } else {
7313 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7314 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7315 ctxt->sax->error(ctxt->userData,
7316 "xmlParseVersionInfo : expected ' or \"\n");
7317 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007318 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007319 }
7320 }
7321 return(version);
7322}
7323
7324/**
7325 * xmlParseEncName:
7326 * @ctxt: an XML parser context
7327 *
7328 * parse the XML encoding name
7329 *
7330 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7331 *
7332 * Returns the encoding name value or NULL
7333 */
7334xmlChar *
7335xmlParseEncName(xmlParserCtxtPtr ctxt) {
7336 xmlChar *buf = NULL;
7337 int len = 0;
7338 int size = 10;
7339 xmlChar cur;
7340
7341 cur = CUR;
7342 if (((cur >= 'a') && (cur <= 'z')) ||
7343 ((cur >= 'A') && (cur <= 'Z'))) {
7344 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7345 if (buf == NULL) {
7346 xmlGenericError(xmlGenericErrorContext,
7347 "malloc of %d byte failed\n", size);
7348 return(NULL);
7349 }
7350
7351 buf[len++] = cur;
7352 NEXT;
7353 cur = CUR;
7354 while (((cur >= 'a') && (cur <= 'z')) ||
7355 ((cur >= 'A') && (cur <= 'Z')) ||
7356 ((cur >= '0') && (cur <= '9')) ||
7357 (cur == '.') || (cur == '_') ||
7358 (cur == '-')) {
7359 if (len + 1 >= size) {
7360 size *= 2;
7361 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7362 if (buf == NULL) {
7363 xmlGenericError(xmlGenericErrorContext,
7364 "realloc of %d byte failed\n", size);
7365 return(NULL);
7366 }
7367 }
7368 buf[len++] = cur;
7369 NEXT;
7370 cur = CUR;
7371 if (cur == 0) {
7372 SHRINK;
7373 GROW;
7374 cur = CUR;
7375 }
7376 }
7377 buf[len] = 0;
7378 } else {
7379 ctxt->errNo = XML_ERR_ENCODING_NAME;
7380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7381 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7382 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007383 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007384 }
7385 return(buf);
7386}
7387
7388/**
7389 * xmlParseEncodingDecl:
7390 * @ctxt: an XML parser context
7391 *
7392 * parse the XML encoding declaration
7393 *
7394 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7395 *
7396 * this setups the conversion filters.
7397 *
7398 * Returns the encoding value or NULL
7399 */
7400
7401xmlChar *
7402xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7403 xmlChar *encoding = NULL;
7404 const xmlChar *q;
7405
7406 SKIP_BLANKS;
7407 if ((RAW == 'e') && (NXT(1) == 'n') &&
7408 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7409 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7410 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7411 SKIP(8);
7412 SKIP_BLANKS;
7413 if (RAW != '=') {
7414 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7416 ctxt->sax->error(ctxt->userData,
7417 "xmlParseEncodingDecl : expected '='\n");
7418 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007419 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007420 return(NULL);
7421 }
7422 NEXT;
7423 SKIP_BLANKS;
7424 if (RAW == '"') {
7425 NEXT;
7426 q = CUR_PTR;
7427 encoding = xmlParseEncName(ctxt);
7428 if (RAW != '"') {
7429 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7431 ctxt->sax->error(ctxt->userData,
7432 "String not closed\n%.50s\n", q);
7433 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007434 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007435 } else
7436 NEXT;
7437 } else if (RAW == '\''){
7438 NEXT;
7439 q = CUR_PTR;
7440 encoding = xmlParseEncName(ctxt);
7441 if (RAW != '\'') {
7442 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7444 ctxt->sax->error(ctxt->userData,
7445 "String not closed\n%.50s\n", q);
7446 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007447 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007448 } else
7449 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007450 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007451 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7453 ctxt->sax->error(ctxt->userData,
7454 "xmlParseEncodingDecl : expected ' or \"\n");
7455 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007456 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007457 }
7458 if (encoding != NULL) {
7459 xmlCharEncoding enc;
7460 xmlCharEncodingHandlerPtr handler;
7461
7462 if (ctxt->input->encoding != NULL)
7463 xmlFree((xmlChar *) ctxt->input->encoding);
7464 ctxt->input->encoding = encoding;
7465
7466 enc = xmlParseCharEncoding((const char *) encoding);
7467 /*
7468 * registered set of known encodings
7469 */
7470 if (enc != XML_CHAR_ENCODING_ERROR) {
7471 xmlSwitchEncoding(ctxt, enc);
7472 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007473 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007474 xmlFree(encoding);
7475 return(NULL);
7476 }
7477 } else {
7478 /*
7479 * fallback for unknown encodings
7480 */
7481 handler = xmlFindCharEncodingHandler((const char *) encoding);
7482 if (handler != NULL) {
7483 xmlSwitchToEncoding(ctxt, handler);
7484 } else {
7485 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7487 ctxt->sax->error(ctxt->userData,
7488 "Unsupported encoding %s\n", encoding);
7489 return(NULL);
7490 }
7491 }
7492 }
7493 }
7494 return(encoding);
7495}
7496
7497/**
7498 * xmlParseSDDecl:
7499 * @ctxt: an XML parser context
7500 *
7501 * parse the XML standalone declaration
7502 *
7503 * [32] SDDecl ::= S 'standalone' Eq
7504 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7505 *
7506 * [ VC: Standalone Document Declaration ]
7507 * TODO The standalone document declaration must have the value "no"
7508 * if any external markup declarations contain declarations of:
7509 * - attributes with default values, if elements to which these
7510 * attributes apply appear in the document without specifications
7511 * of values for these attributes, or
7512 * - entities (other than amp, lt, gt, apos, quot), if references
7513 * to those entities appear in the document, or
7514 * - attributes with values subject to normalization, where the
7515 * attribute appears in the document with a value which will change
7516 * as a result of normalization, or
7517 * - element types with element content, if white space occurs directly
7518 * within any instance of those types.
7519 *
7520 * Returns 1 if standalone, 0 otherwise
7521 */
7522
7523int
7524xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7525 int standalone = -1;
7526
7527 SKIP_BLANKS;
7528 if ((RAW == 's') && (NXT(1) == 't') &&
7529 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7530 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7531 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7532 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7533 SKIP(10);
7534 SKIP_BLANKS;
7535 if (RAW != '=') {
7536 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7538 ctxt->sax->error(ctxt->userData,
7539 "XML standalone declaration : expected '='\n");
7540 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007541 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007542 return(standalone);
7543 }
7544 NEXT;
7545 SKIP_BLANKS;
7546 if (RAW == '\''){
7547 NEXT;
7548 if ((RAW == 'n') && (NXT(1) == 'o')) {
7549 standalone = 0;
7550 SKIP(2);
7551 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7552 (NXT(2) == 's')) {
7553 standalone = 1;
7554 SKIP(3);
7555 } else {
7556 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7558 ctxt->sax->error(ctxt->userData,
7559 "standalone accepts only 'yes' or 'no'\n");
7560 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007561 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007562 }
7563 if (RAW != '\'') {
7564 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7565 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7566 ctxt->sax->error(ctxt->userData, "String not closed\n");
7567 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007568 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007569 } else
7570 NEXT;
7571 } else if (RAW == '"'){
7572 NEXT;
7573 if ((RAW == 'n') && (NXT(1) == 'o')) {
7574 standalone = 0;
7575 SKIP(2);
7576 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7577 (NXT(2) == 's')) {
7578 standalone = 1;
7579 SKIP(3);
7580 } else {
7581 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7582 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7583 ctxt->sax->error(ctxt->userData,
7584 "standalone accepts only 'yes' or 'no'\n");
7585 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007586 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007587 }
7588 if (RAW != '"') {
7589 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7591 ctxt->sax->error(ctxt->userData, "String not closed\n");
7592 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007593 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007594 } else
7595 NEXT;
7596 } else {
7597 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7599 ctxt->sax->error(ctxt->userData,
7600 "Standalone value not found\n");
7601 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007602 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007603 }
7604 }
7605 return(standalone);
7606}
7607
7608/**
7609 * xmlParseXMLDecl:
7610 * @ctxt: an XML parser context
7611 *
7612 * parse an XML declaration header
7613 *
7614 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7615 */
7616
7617void
7618xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7619 xmlChar *version;
7620
7621 /*
7622 * We know that '<?xml' is here.
7623 */
7624 SKIP(5);
7625
7626 if (!IS_BLANK(RAW)) {
7627 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7629 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7630 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007631 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007632 }
7633 SKIP_BLANKS;
7634
7635 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007636 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007637 */
7638 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007639 if (version == NULL) {
7640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7641 ctxt->sax->error(ctxt->userData,
7642 "Malformed declaration expecting version\n");
7643 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007644 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007645 } else {
7646 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7647 /*
7648 * TODO: Blueberry should be detected here
7649 */
7650 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7651 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7652 version);
7653 }
7654 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007655 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007656 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007657 }
Owen Taylor3473f882001-02-23 17:55:21 +00007658
7659 /*
7660 * We may have the encoding declaration
7661 */
7662 if (!IS_BLANK(RAW)) {
7663 if ((RAW == '?') && (NXT(1) == '>')) {
7664 SKIP(2);
7665 return;
7666 }
7667 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7669 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7670 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007671 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007672 }
7673 xmlParseEncodingDecl(ctxt);
7674 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7675 /*
7676 * The XML REC instructs us to stop parsing right here
7677 */
7678 return;
7679 }
7680
7681 /*
7682 * We may have the standalone status.
7683 */
7684 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7685 if ((RAW == '?') && (NXT(1) == '>')) {
7686 SKIP(2);
7687 return;
7688 }
7689 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7690 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7691 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7692 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007693 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007694 }
7695 SKIP_BLANKS;
7696 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7697
7698 SKIP_BLANKS;
7699 if ((RAW == '?') && (NXT(1) == '>')) {
7700 SKIP(2);
7701 } else if (RAW == '>') {
7702 /* Deprecated old WD ... */
7703 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7705 ctxt->sax->error(ctxt->userData,
7706 "XML declaration must end-up with '?>'\n");
7707 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007708 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007709 NEXT;
7710 } else {
7711 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7713 ctxt->sax->error(ctxt->userData,
7714 "parsing XML declaration: '?>' expected\n");
7715 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007716 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007717 MOVETO_ENDTAG(CUR_PTR);
7718 NEXT;
7719 }
7720}
7721
7722/**
7723 * xmlParseMisc:
7724 * @ctxt: an XML parser context
7725 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007726 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007727 *
7728 * [27] Misc ::= Comment | PI | S
7729 */
7730
7731void
7732xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007733 while (((RAW == '<') && (NXT(1) == '?')) ||
7734 ((RAW == '<') && (NXT(1) == '!') &&
7735 (NXT(2) == '-') && (NXT(3) == '-')) ||
7736 IS_BLANK(CUR)) {
7737 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007738 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007739 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007740 NEXT;
7741 } else
7742 xmlParseComment(ctxt);
7743 }
7744}
7745
7746/**
7747 * xmlParseDocument:
7748 * @ctxt: an XML parser context
7749 *
7750 * parse an XML document (and build a tree if using the standard SAX
7751 * interface).
7752 *
7753 * [1] document ::= prolog element Misc*
7754 *
7755 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7756 *
7757 * Returns 0, -1 in case of error. the parser context is augmented
7758 * as a result of the parsing.
7759 */
7760
7761int
7762xmlParseDocument(xmlParserCtxtPtr ctxt) {
7763 xmlChar start[4];
7764 xmlCharEncoding enc;
7765
7766 xmlInitParser();
7767
7768 GROW;
7769
7770 /*
7771 * SAX: beginning of the document processing.
7772 */
7773 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7774 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7775
Daniel Veillard50f34372001-08-03 12:06:36 +00007776 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007777 /*
7778 * Get the 4 first bytes and decode the charset
7779 * if enc != XML_CHAR_ENCODING_NONE
7780 * plug some encoding conversion routines.
7781 */
7782 start[0] = RAW;
7783 start[1] = NXT(1);
7784 start[2] = NXT(2);
7785 start[3] = NXT(3);
7786 enc = xmlDetectCharEncoding(start, 4);
7787 if (enc != XML_CHAR_ENCODING_NONE) {
7788 xmlSwitchEncoding(ctxt, enc);
7789 }
Owen Taylor3473f882001-02-23 17:55:21 +00007790 }
7791
7792
7793 if (CUR == 0) {
7794 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7796 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7797 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007798 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007799 }
7800
7801 /*
7802 * Check for the XMLDecl in the Prolog.
7803 */
7804 GROW;
7805 if ((RAW == '<') && (NXT(1) == '?') &&
7806 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7807 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7808
7809 /*
7810 * Note that we will switch encoding on the fly.
7811 */
7812 xmlParseXMLDecl(ctxt);
7813 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7814 /*
7815 * The XML REC instructs us to stop parsing right here
7816 */
7817 return(-1);
7818 }
7819 ctxt->standalone = ctxt->input->standalone;
7820 SKIP_BLANKS;
7821 } else {
7822 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7823 }
7824 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7825 ctxt->sax->startDocument(ctxt->userData);
7826
7827 /*
7828 * The Misc part of the Prolog
7829 */
7830 GROW;
7831 xmlParseMisc(ctxt);
7832
7833 /*
7834 * Then possibly doc type declaration(s) and more Misc
7835 * (doctypedecl Misc*)?
7836 */
7837 GROW;
7838 if ((RAW == '<') && (NXT(1) == '!') &&
7839 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7840 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7841 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7842 (NXT(8) == 'E')) {
7843
7844 ctxt->inSubset = 1;
7845 xmlParseDocTypeDecl(ctxt);
7846 if (RAW == '[') {
7847 ctxt->instate = XML_PARSER_DTD;
7848 xmlParseInternalSubset(ctxt);
7849 }
7850
7851 /*
7852 * Create and update the external subset.
7853 */
7854 ctxt->inSubset = 2;
7855 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7856 (!ctxt->disableSAX))
7857 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7858 ctxt->extSubSystem, ctxt->extSubURI);
7859 ctxt->inSubset = 0;
7860
7861
7862 ctxt->instate = XML_PARSER_PROLOG;
7863 xmlParseMisc(ctxt);
7864 }
7865
7866 /*
7867 * Time to start parsing the tree itself
7868 */
7869 GROW;
7870 if (RAW != '<') {
7871 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7873 ctxt->sax->error(ctxt->userData,
7874 "Start tag expected, '<' not found\n");
7875 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007876 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007877 ctxt->instate = XML_PARSER_EOF;
7878 } else {
7879 ctxt->instate = XML_PARSER_CONTENT;
7880 xmlParseElement(ctxt);
7881 ctxt->instate = XML_PARSER_EPILOG;
7882
7883
7884 /*
7885 * The Misc part at the end
7886 */
7887 xmlParseMisc(ctxt);
7888
Daniel Veillard561b7f82002-03-20 21:55:57 +00007889 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007890 ctxt->errNo = XML_ERR_DOCUMENT_END;
7891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7892 ctxt->sax->error(ctxt->userData,
7893 "Extra content at the end of the document\n");
7894 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007895 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007896 }
7897 ctxt->instate = XML_PARSER_EOF;
7898 }
7899
7900 /*
7901 * SAX: end of the document processing.
7902 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007903 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007904 ctxt->sax->endDocument(ctxt->userData);
7905
Daniel Veillard5997aca2002-03-18 18:36:20 +00007906 /*
7907 * Remove locally kept entity definitions if the tree was not built
7908 */
7909 if ((ctxt->myDoc != NULL) &&
7910 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7911 xmlFreeDoc(ctxt->myDoc);
7912 ctxt->myDoc = NULL;
7913 }
7914
Daniel Veillardc7612992002-02-17 22:47:37 +00007915 if (! ctxt->wellFormed) {
7916 ctxt->valid = 0;
7917 return(-1);
7918 }
Owen Taylor3473f882001-02-23 17:55:21 +00007919 return(0);
7920}
7921
7922/**
7923 * xmlParseExtParsedEnt:
7924 * @ctxt: an XML parser context
7925 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007926 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007927 * An external general parsed entity is well-formed if it matches the
7928 * production labeled extParsedEnt.
7929 *
7930 * [78] extParsedEnt ::= TextDecl? content
7931 *
7932 * Returns 0, -1 in case of error. the parser context is augmented
7933 * as a result of the parsing.
7934 */
7935
7936int
7937xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7938 xmlChar start[4];
7939 xmlCharEncoding enc;
7940
7941 xmlDefaultSAXHandlerInit();
7942
7943 GROW;
7944
7945 /*
7946 * SAX: beginning of the document processing.
7947 */
7948 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7949 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7950
7951 /*
7952 * Get the 4 first bytes and decode the charset
7953 * if enc != XML_CHAR_ENCODING_NONE
7954 * plug some encoding conversion routines.
7955 */
7956 start[0] = RAW;
7957 start[1] = NXT(1);
7958 start[2] = NXT(2);
7959 start[3] = NXT(3);
7960 enc = xmlDetectCharEncoding(start, 4);
7961 if (enc != XML_CHAR_ENCODING_NONE) {
7962 xmlSwitchEncoding(ctxt, enc);
7963 }
7964
7965
7966 if (CUR == 0) {
7967 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7969 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7970 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007971 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007972 }
7973
7974 /*
7975 * Check for the XMLDecl in the Prolog.
7976 */
7977 GROW;
7978 if ((RAW == '<') && (NXT(1) == '?') &&
7979 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7980 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7981
7982 /*
7983 * Note that we will switch encoding on the fly.
7984 */
7985 xmlParseXMLDecl(ctxt);
7986 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7987 /*
7988 * The XML REC instructs us to stop parsing right here
7989 */
7990 return(-1);
7991 }
7992 SKIP_BLANKS;
7993 } else {
7994 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7995 }
7996 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7997 ctxt->sax->startDocument(ctxt->userData);
7998
7999 /*
8000 * Doing validity checking on chunk doesn't make sense
8001 */
8002 ctxt->instate = XML_PARSER_CONTENT;
8003 ctxt->validate = 0;
8004 ctxt->loadsubset = 0;
8005 ctxt->depth = 0;
8006
8007 xmlParseContent(ctxt);
8008
8009 if ((RAW == '<') && (NXT(1) == '/')) {
8010 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8012 ctxt->sax->error(ctxt->userData,
8013 "chunk is not well balanced\n");
8014 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008015 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008016 } else if (RAW != 0) {
8017 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8019 ctxt->sax->error(ctxt->userData,
8020 "extra content at the end of well balanced chunk\n");
8021 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008022 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008023 }
8024
8025 /*
8026 * SAX: end of the document processing.
8027 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008028 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008029 ctxt->sax->endDocument(ctxt->userData);
8030
8031 if (! ctxt->wellFormed) return(-1);
8032 return(0);
8033}
8034
8035/************************************************************************
8036 * *
8037 * Progressive parsing interfaces *
8038 * *
8039 ************************************************************************/
8040
8041/**
8042 * xmlParseLookupSequence:
8043 * @ctxt: an XML parser context
8044 * @first: the first char to lookup
8045 * @next: the next char to lookup or zero
8046 * @third: the next char to lookup or zero
8047 *
8048 * Try to find if a sequence (first, next, third) or just (first next) or
8049 * (first) is available in the input stream.
8050 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8051 * to avoid rescanning sequences of bytes, it DOES change the state of the
8052 * parser, do not use liberally.
8053 *
8054 * Returns the index to the current parsing point if the full sequence
8055 * is available, -1 otherwise.
8056 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008057static int
Owen Taylor3473f882001-02-23 17:55:21 +00008058xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8059 xmlChar next, xmlChar third) {
8060 int base, len;
8061 xmlParserInputPtr in;
8062 const xmlChar *buf;
8063
8064 in = ctxt->input;
8065 if (in == NULL) return(-1);
8066 base = in->cur - in->base;
8067 if (base < 0) return(-1);
8068 if (ctxt->checkIndex > base)
8069 base = ctxt->checkIndex;
8070 if (in->buf == NULL) {
8071 buf = in->base;
8072 len = in->length;
8073 } else {
8074 buf = in->buf->buffer->content;
8075 len = in->buf->buffer->use;
8076 }
8077 /* take into account the sequence length */
8078 if (third) len -= 2;
8079 else if (next) len --;
8080 for (;base < len;base++) {
8081 if (buf[base] == first) {
8082 if (third != 0) {
8083 if ((buf[base + 1] != next) ||
8084 (buf[base + 2] != third)) continue;
8085 } else if (next != 0) {
8086 if (buf[base + 1] != next) continue;
8087 }
8088 ctxt->checkIndex = 0;
8089#ifdef DEBUG_PUSH
8090 if (next == 0)
8091 xmlGenericError(xmlGenericErrorContext,
8092 "PP: lookup '%c' found at %d\n",
8093 first, base);
8094 else if (third == 0)
8095 xmlGenericError(xmlGenericErrorContext,
8096 "PP: lookup '%c%c' found at %d\n",
8097 first, next, base);
8098 else
8099 xmlGenericError(xmlGenericErrorContext,
8100 "PP: lookup '%c%c%c' found at %d\n",
8101 first, next, third, base);
8102#endif
8103 return(base - (in->cur - in->base));
8104 }
8105 }
8106 ctxt->checkIndex = base;
8107#ifdef DEBUG_PUSH
8108 if (next == 0)
8109 xmlGenericError(xmlGenericErrorContext,
8110 "PP: lookup '%c' failed\n", first);
8111 else if (third == 0)
8112 xmlGenericError(xmlGenericErrorContext,
8113 "PP: lookup '%c%c' failed\n", first, next);
8114 else
8115 xmlGenericError(xmlGenericErrorContext,
8116 "PP: lookup '%c%c%c' failed\n", first, next, third);
8117#endif
8118 return(-1);
8119}
8120
8121/**
8122 * xmlParseTryOrFinish:
8123 * @ctxt: an XML parser context
8124 * @terminate: last chunk indicator
8125 *
8126 * Try to progress on parsing
8127 *
8128 * Returns zero if no parsing was possible
8129 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008130static int
Owen Taylor3473f882001-02-23 17:55:21 +00008131xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8132 int ret = 0;
8133 int avail;
8134 xmlChar cur, next;
8135
8136#ifdef DEBUG_PUSH
8137 switch (ctxt->instate) {
8138 case XML_PARSER_EOF:
8139 xmlGenericError(xmlGenericErrorContext,
8140 "PP: try EOF\n"); break;
8141 case XML_PARSER_START:
8142 xmlGenericError(xmlGenericErrorContext,
8143 "PP: try START\n"); break;
8144 case XML_PARSER_MISC:
8145 xmlGenericError(xmlGenericErrorContext,
8146 "PP: try MISC\n");break;
8147 case XML_PARSER_COMMENT:
8148 xmlGenericError(xmlGenericErrorContext,
8149 "PP: try COMMENT\n");break;
8150 case XML_PARSER_PROLOG:
8151 xmlGenericError(xmlGenericErrorContext,
8152 "PP: try PROLOG\n");break;
8153 case XML_PARSER_START_TAG:
8154 xmlGenericError(xmlGenericErrorContext,
8155 "PP: try START_TAG\n");break;
8156 case XML_PARSER_CONTENT:
8157 xmlGenericError(xmlGenericErrorContext,
8158 "PP: try CONTENT\n");break;
8159 case XML_PARSER_CDATA_SECTION:
8160 xmlGenericError(xmlGenericErrorContext,
8161 "PP: try CDATA_SECTION\n");break;
8162 case XML_PARSER_END_TAG:
8163 xmlGenericError(xmlGenericErrorContext,
8164 "PP: try END_TAG\n");break;
8165 case XML_PARSER_ENTITY_DECL:
8166 xmlGenericError(xmlGenericErrorContext,
8167 "PP: try ENTITY_DECL\n");break;
8168 case XML_PARSER_ENTITY_VALUE:
8169 xmlGenericError(xmlGenericErrorContext,
8170 "PP: try ENTITY_VALUE\n");break;
8171 case XML_PARSER_ATTRIBUTE_VALUE:
8172 xmlGenericError(xmlGenericErrorContext,
8173 "PP: try ATTRIBUTE_VALUE\n");break;
8174 case XML_PARSER_DTD:
8175 xmlGenericError(xmlGenericErrorContext,
8176 "PP: try DTD\n");break;
8177 case XML_PARSER_EPILOG:
8178 xmlGenericError(xmlGenericErrorContext,
8179 "PP: try EPILOG\n");break;
8180 case XML_PARSER_PI:
8181 xmlGenericError(xmlGenericErrorContext,
8182 "PP: try PI\n");break;
8183 case XML_PARSER_IGNORE:
8184 xmlGenericError(xmlGenericErrorContext,
8185 "PP: try IGNORE\n");break;
8186 }
8187#endif
8188
8189 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008190 SHRINK;
8191
Owen Taylor3473f882001-02-23 17:55:21 +00008192 /*
8193 * Pop-up of finished entities.
8194 */
8195 while ((RAW == 0) && (ctxt->inputNr > 1))
8196 xmlPopInput(ctxt);
8197
8198 if (ctxt->input ==NULL) break;
8199 if (ctxt->input->buf == NULL)
8200 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008201 else {
8202 /*
8203 * If we are operating on converted input, try to flush
8204 * remainng chars to avoid them stalling in the non-converted
8205 * buffer.
8206 */
8207 if ((ctxt->input->buf->raw != NULL) &&
8208 (ctxt->input->buf->raw->use > 0)) {
8209 int base = ctxt->input->base -
8210 ctxt->input->buf->buffer->content;
8211 int current = ctxt->input->cur - ctxt->input->base;
8212
8213 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8214 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8215 ctxt->input->cur = ctxt->input->base + current;
8216 ctxt->input->end =
8217 &ctxt->input->buf->buffer->content[
8218 ctxt->input->buf->buffer->use];
8219 }
8220 avail = ctxt->input->buf->buffer->use -
8221 (ctxt->input->cur - ctxt->input->base);
8222 }
Owen Taylor3473f882001-02-23 17:55:21 +00008223 if (avail < 1)
8224 goto done;
8225 switch (ctxt->instate) {
8226 case XML_PARSER_EOF:
8227 /*
8228 * Document parsing is done !
8229 */
8230 goto done;
8231 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008232 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8233 xmlChar start[4];
8234 xmlCharEncoding enc;
8235
8236 /*
8237 * Very first chars read from the document flow.
8238 */
8239 if (avail < 4)
8240 goto done;
8241
8242 /*
8243 * Get the 4 first bytes and decode the charset
8244 * if enc != XML_CHAR_ENCODING_NONE
8245 * plug some encoding conversion routines.
8246 */
8247 start[0] = RAW;
8248 start[1] = NXT(1);
8249 start[2] = NXT(2);
8250 start[3] = NXT(3);
8251 enc = xmlDetectCharEncoding(start, 4);
8252 if (enc != XML_CHAR_ENCODING_NONE) {
8253 xmlSwitchEncoding(ctxt, enc);
8254 }
8255 break;
8256 }
Owen Taylor3473f882001-02-23 17:55:21 +00008257
8258 cur = ctxt->input->cur[0];
8259 next = ctxt->input->cur[1];
8260 if (cur == 0) {
8261 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8262 ctxt->sax->setDocumentLocator(ctxt->userData,
8263 &xmlDefaultSAXLocator);
8264 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8266 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8267 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008268 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008269 ctxt->instate = XML_PARSER_EOF;
8270#ifdef DEBUG_PUSH
8271 xmlGenericError(xmlGenericErrorContext,
8272 "PP: entering EOF\n");
8273#endif
8274 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8275 ctxt->sax->endDocument(ctxt->userData);
8276 goto done;
8277 }
8278 if ((cur == '<') && (next == '?')) {
8279 /* PI or XML decl */
8280 if (avail < 5) return(ret);
8281 if ((!terminate) &&
8282 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8283 return(ret);
8284 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8285 ctxt->sax->setDocumentLocator(ctxt->userData,
8286 &xmlDefaultSAXLocator);
8287 if ((ctxt->input->cur[2] == 'x') &&
8288 (ctxt->input->cur[3] == 'm') &&
8289 (ctxt->input->cur[4] == 'l') &&
8290 (IS_BLANK(ctxt->input->cur[5]))) {
8291 ret += 5;
8292#ifdef DEBUG_PUSH
8293 xmlGenericError(xmlGenericErrorContext,
8294 "PP: Parsing XML Decl\n");
8295#endif
8296 xmlParseXMLDecl(ctxt);
8297 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8298 /*
8299 * The XML REC instructs us to stop parsing right
8300 * here
8301 */
8302 ctxt->instate = XML_PARSER_EOF;
8303 return(0);
8304 }
8305 ctxt->standalone = ctxt->input->standalone;
8306 if ((ctxt->encoding == NULL) &&
8307 (ctxt->input->encoding != NULL))
8308 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8309 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8310 (!ctxt->disableSAX))
8311 ctxt->sax->startDocument(ctxt->userData);
8312 ctxt->instate = XML_PARSER_MISC;
8313#ifdef DEBUG_PUSH
8314 xmlGenericError(xmlGenericErrorContext,
8315 "PP: entering MISC\n");
8316#endif
8317 } else {
8318 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8319 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8320 (!ctxt->disableSAX))
8321 ctxt->sax->startDocument(ctxt->userData);
8322 ctxt->instate = XML_PARSER_MISC;
8323#ifdef DEBUG_PUSH
8324 xmlGenericError(xmlGenericErrorContext,
8325 "PP: entering MISC\n");
8326#endif
8327 }
8328 } else {
8329 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8330 ctxt->sax->setDocumentLocator(ctxt->userData,
8331 &xmlDefaultSAXLocator);
8332 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8333 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8334 (!ctxt->disableSAX))
8335 ctxt->sax->startDocument(ctxt->userData);
8336 ctxt->instate = XML_PARSER_MISC;
8337#ifdef DEBUG_PUSH
8338 xmlGenericError(xmlGenericErrorContext,
8339 "PP: entering MISC\n");
8340#endif
8341 }
8342 break;
8343 case XML_PARSER_MISC:
8344 SKIP_BLANKS;
8345 if (ctxt->input->buf == NULL)
8346 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8347 else
8348 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8349 if (avail < 2)
8350 goto done;
8351 cur = ctxt->input->cur[0];
8352 next = ctxt->input->cur[1];
8353 if ((cur == '<') && (next == '?')) {
8354 if ((!terminate) &&
8355 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8356 goto done;
8357#ifdef DEBUG_PUSH
8358 xmlGenericError(xmlGenericErrorContext,
8359 "PP: Parsing PI\n");
8360#endif
8361 xmlParsePI(ctxt);
8362 } else if ((cur == '<') && (next == '!') &&
8363 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8364 if ((!terminate) &&
8365 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8366 goto done;
8367#ifdef DEBUG_PUSH
8368 xmlGenericError(xmlGenericErrorContext,
8369 "PP: Parsing Comment\n");
8370#endif
8371 xmlParseComment(ctxt);
8372 ctxt->instate = XML_PARSER_MISC;
8373 } else if ((cur == '<') && (next == '!') &&
8374 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8375 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8376 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8377 (ctxt->input->cur[8] == 'E')) {
8378 if ((!terminate) &&
8379 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8380 goto done;
8381#ifdef DEBUG_PUSH
8382 xmlGenericError(xmlGenericErrorContext,
8383 "PP: Parsing internal subset\n");
8384#endif
8385 ctxt->inSubset = 1;
8386 xmlParseDocTypeDecl(ctxt);
8387 if (RAW == '[') {
8388 ctxt->instate = XML_PARSER_DTD;
8389#ifdef DEBUG_PUSH
8390 xmlGenericError(xmlGenericErrorContext,
8391 "PP: entering DTD\n");
8392#endif
8393 } else {
8394 /*
8395 * Create and update the external subset.
8396 */
8397 ctxt->inSubset = 2;
8398 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8399 (ctxt->sax->externalSubset != NULL))
8400 ctxt->sax->externalSubset(ctxt->userData,
8401 ctxt->intSubName, ctxt->extSubSystem,
8402 ctxt->extSubURI);
8403 ctxt->inSubset = 0;
8404 ctxt->instate = XML_PARSER_PROLOG;
8405#ifdef DEBUG_PUSH
8406 xmlGenericError(xmlGenericErrorContext,
8407 "PP: entering PROLOG\n");
8408#endif
8409 }
8410 } else if ((cur == '<') && (next == '!') &&
8411 (avail < 9)) {
8412 goto done;
8413 } else {
8414 ctxt->instate = XML_PARSER_START_TAG;
8415#ifdef DEBUG_PUSH
8416 xmlGenericError(xmlGenericErrorContext,
8417 "PP: entering START_TAG\n");
8418#endif
8419 }
8420 break;
8421 case XML_PARSER_IGNORE:
8422 xmlGenericError(xmlGenericErrorContext,
8423 "PP: internal error, state == IGNORE");
8424 ctxt->instate = XML_PARSER_DTD;
8425#ifdef DEBUG_PUSH
8426 xmlGenericError(xmlGenericErrorContext,
8427 "PP: entering DTD\n");
8428#endif
8429 break;
8430 case XML_PARSER_PROLOG:
8431 SKIP_BLANKS;
8432 if (ctxt->input->buf == NULL)
8433 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8434 else
8435 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8436 if (avail < 2)
8437 goto done;
8438 cur = ctxt->input->cur[0];
8439 next = ctxt->input->cur[1];
8440 if ((cur == '<') && (next == '?')) {
8441 if ((!terminate) &&
8442 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8443 goto done;
8444#ifdef DEBUG_PUSH
8445 xmlGenericError(xmlGenericErrorContext,
8446 "PP: Parsing PI\n");
8447#endif
8448 xmlParsePI(ctxt);
8449 } else if ((cur == '<') && (next == '!') &&
8450 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8451 if ((!terminate) &&
8452 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8453 goto done;
8454#ifdef DEBUG_PUSH
8455 xmlGenericError(xmlGenericErrorContext,
8456 "PP: Parsing Comment\n");
8457#endif
8458 xmlParseComment(ctxt);
8459 ctxt->instate = XML_PARSER_PROLOG;
8460 } else if ((cur == '<') && (next == '!') &&
8461 (avail < 4)) {
8462 goto done;
8463 } else {
8464 ctxt->instate = XML_PARSER_START_TAG;
8465#ifdef DEBUG_PUSH
8466 xmlGenericError(xmlGenericErrorContext,
8467 "PP: entering START_TAG\n");
8468#endif
8469 }
8470 break;
8471 case XML_PARSER_EPILOG:
8472 SKIP_BLANKS;
8473 if (ctxt->input->buf == NULL)
8474 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8475 else
8476 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8477 if (avail < 2)
8478 goto done;
8479 cur = ctxt->input->cur[0];
8480 next = ctxt->input->cur[1];
8481 if ((cur == '<') && (next == '?')) {
8482 if ((!terminate) &&
8483 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8484 goto done;
8485#ifdef DEBUG_PUSH
8486 xmlGenericError(xmlGenericErrorContext,
8487 "PP: Parsing PI\n");
8488#endif
8489 xmlParsePI(ctxt);
8490 ctxt->instate = XML_PARSER_EPILOG;
8491 } else if ((cur == '<') && (next == '!') &&
8492 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8493 if ((!terminate) &&
8494 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8495 goto done;
8496#ifdef DEBUG_PUSH
8497 xmlGenericError(xmlGenericErrorContext,
8498 "PP: Parsing Comment\n");
8499#endif
8500 xmlParseComment(ctxt);
8501 ctxt->instate = XML_PARSER_EPILOG;
8502 } else if ((cur == '<') && (next == '!') &&
8503 (avail < 4)) {
8504 goto done;
8505 } else {
8506 ctxt->errNo = XML_ERR_DOCUMENT_END;
8507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8508 ctxt->sax->error(ctxt->userData,
8509 "Extra content at the end of the document\n");
8510 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008511 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008512 ctxt->instate = XML_PARSER_EOF;
8513#ifdef DEBUG_PUSH
8514 xmlGenericError(xmlGenericErrorContext,
8515 "PP: entering EOF\n");
8516#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008517 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008518 ctxt->sax->endDocument(ctxt->userData);
8519 goto done;
8520 }
8521 break;
8522 case XML_PARSER_START_TAG: {
8523 xmlChar *name, *oldname;
8524
8525 if ((avail < 2) && (ctxt->inputNr == 1))
8526 goto done;
8527 cur = ctxt->input->cur[0];
8528 if (cur != '<') {
8529 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8531 ctxt->sax->error(ctxt->userData,
8532 "Start tag expect, '<' not found\n");
8533 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008534 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008535 ctxt->instate = XML_PARSER_EOF;
8536#ifdef DEBUG_PUSH
8537 xmlGenericError(xmlGenericErrorContext,
8538 "PP: entering EOF\n");
8539#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008540 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008541 ctxt->sax->endDocument(ctxt->userData);
8542 goto done;
8543 }
8544 if ((!terminate) &&
8545 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8546 goto done;
8547 if (ctxt->spaceNr == 0)
8548 spacePush(ctxt, -1);
8549 else
8550 spacePush(ctxt, *ctxt->space);
8551 name = xmlParseStartTag(ctxt);
8552 if (name == NULL) {
8553 spacePop(ctxt);
8554 ctxt->instate = XML_PARSER_EOF;
8555#ifdef DEBUG_PUSH
8556 xmlGenericError(xmlGenericErrorContext,
8557 "PP: entering EOF\n");
8558#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008559 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008560 ctxt->sax->endDocument(ctxt->userData);
8561 goto done;
8562 }
8563 namePush(ctxt, xmlStrdup(name));
8564
8565 /*
8566 * [ VC: Root Element Type ]
8567 * The Name in the document type declaration must match
8568 * the element type of the root element.
8569 */
8570 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8571 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8572 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8573
8574 /*
8575 * Check for an Empty Element.
8576 */
8577 if ((RAW == '/') && (NXT(1) == '>')) {
8578 SKIP(2);
8579 if ((ctxt->sax != NULL) &&
8580 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8581 ctxt->sax->endElement(ctxt->userData, name);
8582 xmlFree(name);
8583 oldname = namePop(ctxt);
8584 spacePop(ctxt);
8585 if (oldname != NULL) {
8586#ifdef DEBUG_STACK
8587 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8588#endif
8589 xmlFree(oldname);
8590 }
8591 if (ctxt->name == NULL) {
8592 ctxt->instate = XML_PARSER_EPILOG;
8593#ifdef DEBUG_PUSH
8594 xmlGenericError(xmlGenericErrorContext,
8595 "PP: entering EPILOG\n");
8596#endif
8597 } else {
8598 ctxt->instate = XML_PARSER_CONTENT;
8599#ifdef DEBUG_PUSH
8600 xmlGenericError(xmlGenericErrorContext,
8601 "PP: entering CONTENT\n");
8602#endif
8603 }
8604 break;
8605 }
8606 if (RAW == '>') {
8607 NEXT;
8608 } else {
8609 ctxt->errNo = XML_ERR_GT_REQUIRED;
8610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8611 ctxt->sax->error(ctxt->userData,
8612 "Couldn't find end of Start Tag %s\n",
8613 name);
8614 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008615 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008616
8617 /*
8618 * end of parsing of this node.
8619 */
8620 nodePop(ctxt);
8621 oldname = namePop(ctxt);
8622 spacePop(ctxt);
8623 if (oldname != NULL) {
8624#ifdef DEBUG_STACK
8625 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8626#endif
8627 xmlFree(oldname);
8628 }
8629 }
8630 xmlFree(name);
8631 ctxt->instate = XML_PARSER_CONTENT;
8632#ifdef DEBUG_PUSH
8633 xmlGenericError(xmlGenericErrorContext,
8634 "PP: entering CONTENT\n");
8635#endif
8636 break;
8637 }
8638 case XML_PARSER_CONTENT: {
8639 const xmlChar *test;
8640 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008641 if ((avail < 2) && (ctxt->inputNr == 1))
8642 goto done;
8643 cur = ctxt->input->cur[0];
8644 next = ctxt->input->cur[1];
8645
8646 test = CUR_PTR;
8647 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008648 if ((cur == '<') && (next == '?')) {
8649 if ((!terminate) &&
8650 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8651 goto done;
8652#ifdef DEBUG_PUSH
8653 xmlGenericError(xmlGenericErrorContext,
8654 "PP: Parsing PI\n");
8655#endif
8656 xmlParsePI(ctxt);
8657 } else if ((cur == '<') && (next == '!') &&
8658 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8659 if ((!terminate) &&
8660 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8661 goto done;
8662#ifdef DEBUG_PUSH
8663 xmlGenericError(xmlGenericErrorContext,
8664 "PP: Parsing Comment\n");
8665#endif
8666 xmlParseComment(ctxt);
8667 ctxt->instate = XML_PARSER_CONTENT;
8668 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8669 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8670 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8671 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8672 (ctxt->input->cur[8] == '[')) {
8673 SKIP(9);
8674 ctxt->instate = XML_PARSER_CDATA_SECTION;
8675#ifdef DEBUG_PUSH
8676 xmlGenericError(xmlGenericErrorContext,
8677 "PP: entering CDATA_SECTION\n");
8678#endif
8679 break;
8680 } else if ((cur == '<') && (next == '!') &&
8681 (avail < 9)) {
8682 goto done;
8683 } else if ((cur == '<') && (next == '/')) {
8684 ctxt->instate = XML_PARSER_END_TAG;
8685#ifdef DEBUG_PUSH
8686 xmlGenericError(xmlGenericErrorContext,
8687 "PP: entering END_TAG\n");
8688#endif
8689 break;
8690 } else if (cur == '<') {
8691 ctxt->instate = XML_PARSER_START_TAG;
8692#ifdef DEBUG_PUSH
8693 xmlGenericError(xmlGenericErrorContext,
8694 "PP: entering START_TAG\n");
8695#endif
8696 break;
8697 } else if (cur == '&') {
8698 if ((!terminate) &&
8699 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8700 goto done;
8701#ifdef DEBUG_PUSH
8702 xmlGenericError(xmlGenericErrorContext,
8703 "PP: Parsing Reference\n");
8704#endif
8705 xmlParseReference(ctxt);
8706 } else {
8707 /* TODO Avoid the extra copy, handle directly !!! */
8708 /*
8709 * Goal of the following test is:
8710 * - minimize calls to the SAX 'character' callback
8711 * when they are mergeable
8712 * - handle an problem for isBlank when we only parse
8713 * a sequence of blank chars and the next one is
8714 * not available to check against '<' presence.
8715 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008716 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008717 * of the parser.
8718 */
8719 if ((ctxt->inputNr == 1) &&
8720 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8721 if ((!terminate) &&
8722 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8723 goto done;
8724 }
8725 ctxt->checkIndex = 0;
8726#ifdef DEBUG_PUSH
8727 xmlGenericError(xmlGenericErrorContext,
8728 "PP: Parsing char data\n");
8729#endif
8730 xmlParseCharData(ctxt, 0);
8731 }
8732 /*
8733 * Pop-up of finished entities.
8734 */
8735 while ((RAW == 0) && (ctxt->inputNr > 1))
8736 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008737 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008738 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8740 ctxt->sax->error(ctxt->userData,
8741 "detected an error in element content\n");
8742 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008743 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008744 ctxt->instate = XML_PARSER_EOF;
8745 break;
8746 }
8747 break;
8748 }
8749 case XML_PARSER_CDATA_SECTION: {
8750 /*
8751 * The Push mode need to have the SAX callback for
8752 * cdataBlock merge back contiguous callbacks.
8753 */
8754 int base;
8755
8756 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8757 if (base < 0) {
8758 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8759 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8760 if (ctxt->sax->cdataBlock != NULL)
8761 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8762 XML_PARSER_BIG_BUFFER_SIZE);
8763 }
8764 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8765 ctxt->checkIndex = 0;
8766 }
8767 goto done;
8768 } else {
8769 if ((ctxt->sax != NULL) && (base > 0) &&
8770 (!ctxt->disableSAX)) {
8771 if (ctxt->sax->cdataBlock != NULL)
8772 ctxt->sax->cdataBlock(ctxt->userData,
8773 ctxt->input->cur, base);
8774 }
8775 SKIP(base + 3);
8776 ctxt->checkIndex = 0;
8777 ctxt->instate = XML_PARSER_CONTENT;
8778#ifdef DEBUG_PUSH
8779 xmlGenericError(xmlGenericErrorContext,
8780 "PP: entering CONTENT\n");
8781#endif
8782 }
8783 break;
8784 }
8785 case XML_PARSER_END_TAG:
8786 if (avail < 2)
8787 goto done;
8788 if ((!terminate) &&
8789 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8790 goto done;
8791 xmlParseEndTag(ctxt);
8792 if (ctxt->name == NULL) {
8793 ctxt->instate = XML_PARSER_EPILOG;
8794#ifdef DEBUG_PUSH
8795 xmlGenericError(xmlGenericErrorContext,
8796 "PP: entering EPILOG\n");
8797#endif
8798 } else {
8799 ctxt->instate = XML_PARSER_CONTENT;
8800#ifdef DEBUG_PUSH
8801 xmlGenericError(xmlGenericErrorContext,
8802 "PP: entering CONTENT\n");
8803#endif
8804 }
8805 break;
8806 case XML_PARSER_DTD: {
8807 /*
8808 * Sorry but progressive parsing of the internal subset
8809 * is not expected to be supported. We first check that
8810 * the full content of the internal subset is available and
8811 * the parsing is launched only at that point.
8812 * Internal subset ends up with "']' S? '>'" in an unescaped
8813 * section and not in a ']]>' sequence which are conditional
8814 * sections (whoever argued to keep that crap in XML deserve
8815 * a place in hell !).
8816 */
8817 int base, i;
8818 xmlChar *buf;
8819 xmlChar quote = 0;
8820
8821 base = ctxt->input->cur - ctxt->input->base;
8822 if (base < 0) return(0);
8823 if (ctxt->checkIndex > base)
8824 base = ctxt->checkIndex;
8825 buf = ctxt->input->buf->buffer->content;
8826 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8827 base++) {
8828 if (quote != 0) {
8829 if (buf[base] == quote)
8830 quote = 0;
8831 continue;
8832 }
8833 if (buf[base] == '"') {
8834 quote = '"';
8835 continue;
8836 }
8837 if (buf[base] == '\'') {
8838 quote = '\'';
8839 continue;
8840 }
8841 if (buf[base] == ']') {
8842 if ((unsigned int) base +1 >=
8843 ctxt->input->buf->buffer->use)
8844 break;
8845 if (buf[base + 1] == ']') {
8846 /* conditional crap, skip both ']' ! */
8847 base++;
8848 continue;
8849 }
8850 for (i = 0;
8851 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8852 i++) {
8853 if (buf[base + i] == '>')
8854 goto found_end_int_subset;
8855 }
8856 break;
8857 }
8858 }
8859 /*
8860 * We didn't found the end of the Internal subset
8861 */
8862 if (quote == 0)
8863 ctxt->checkIndex = base;
8864#ifdef DEBUG_PUSH
8865 if (next == 0)
8866 xmlGenericError(xmlGenericErrorContext,
8867 "PP: lookup of int subset end filed\n");
8868#endif
8869 goto done;
8870
8871found_end_int_subset:
8872 xmlParseInternalSubset(ctxt);
8873 ctxt->inSubset = 2;
8874 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8875 (ctxt->sax->externalSubset != NULL))
8876 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8877 ctxt->extSubSystem, ctxt->extSubURI);
8878 ctxt->inSubset = 0;
8879 ctxt->instate = XML_PARSER_PROLOG;
8880 ctxt->checkIndex = 0;
8881#ifdef DEBUG_PUSH
8882 xmlGenericError(xmlGenericErrorContext,
8883 "PP: entering PROLOG\n");
8884#endif
8885 break;
8886 }
8887 case XML_PARSER_COMMENT:
8888 xmlGenericError(xmlGenericErrorContext,
8889 "PP: internal error, state == COMMENT\n");
8890 ctxt->instate = XML_PARSER_CONTENT;
8891#ifdef DEBUG_PUSH
8892 xmlGenericError(xmlGenericErrorContext,
8893 "PP: entering CONTENT\n");
8894#endif
8895 break;
8896 case XML_PARSER_PI:
8897 xmlGenericError(xmlGenericErrorContext,
8898 "PP: internal error, state == PI\n");
8899 ctxt->instate = XML_PARSER_CONTENT;
8900#ifdef DEBUG_PUSH
8901 xmlGenericError(xmlGenericErrorContext,
8902 "PP: entering CONTENT\n");
8903#endif
8904 break;
8905 case XML_PARSER_ENTITY_DECL:
8906 xmlGenericError(xmlGenericErrorContext,
8907 "PP: internal error, state == ENTITY_DECL\n");
8908 ctxt->instate = XML_PARSER_DTD;
8909#ifdef DEBUG_PUSH
8910 xmlGenericError(xmlGenericErrorContext,
8911 "PP: entering DTD\n");
8912#endif
8913 break;
8914 case XML_PARSER_ENTITY_VALUE:
8915 xmlGenericError(xmlGenericErrorContext,
8916 "PP: internal error, state == ENTITY_VALUE\n");
8917 ctxt->instate = XML_PARSER_CONTENT;
8918#ifdef DEBUG_PUSH
8919 xmlGenericError(xmlGenericErrorContext,
8920 "PP: entering DTD\n");
8921#endif
8922 break;
8923 case XML_PARSER_ATTRIBUTE_VALUE:
8924 xmlGenericError(xmlGenericErrorContext,
8925 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8926 ctxt->instate = XML_PARSER_START_TAG;
8927#ifdef DEBUG_PUSH
8928 xmlGenericError(xmlGenericErrorContext,
8929 "PP: entering START_TAG\n");
8930#endif
8931 break;
8932 case XML_PARSER_SYSTEM_LITERAL:
8933 xmlGenericError(xmlGenericErrorContext,
8934 "PP: internal error, state == SYSTEM_LITERAL\n");
8935 ctxt->instate = XML_PARSER_START_TAG;
8936#ifdef DEBUG_PUSH
8937 xmlGenericError(xmlGenericErrorContext,
8938 "PP: entering START_TAG\n");
8939#endif
8940 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008941 case XML_PARSER_PUBLIC_LITERAL:
8942 xmlGenericError(xmlGenericErrorContext,
8943 "PP: internal error, state == PUBLIC_LITERAL\n");
8944 ctxt->instate = XML_PARSER_START_TAG;
8945#ifdef DEBUG_PUSH
8946 xmlGenericError(xmlGenericErrorContext,
8947 "PP: entering START_TAG\n");
8948#endif
8949 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008950 }
8951 }
8952done:
8953#ifdef DEBUG_PUSH
8954 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8955#endif
8956 return(ret);
8957}
8958
8959/**
Owen Taylor3473f882001-02-23 17:55:21 +00008960 * xmlParseChunk:
8961 * @ctxt: an XML parser context
8962 * @chunk: an char array
8963 * @size: the size in byte of the chunk
8964 * @terminate: last chunk indicator
8965 *
8966 * Parse a Chunk of memory
8967 *
8968 * Returns zero if no error, the xmlParserErrors otherwise.
8969 */
8970int
8971xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8972 int terminate) {
8973 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8974 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8975 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8976 int cur = ctxt->input->cur - ctxt->input->base;
8977
8978 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8979 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8980 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008981 ctxt->input->end =
8982 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008983#ifdef DEBUG_PUSH
8984 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8985#endif
8986
8987 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8988 xmlParseTryOrFinish(ctxt, terminate);
8989 } else if (ctxt->instate != XML_PARSER_EOF) {
8990 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8991 xmlParserInputBufferPtr in = ctxt->input->buf;
8992 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8993 (in->raw != NULL)) {
8994 int nbchars;
8995
8996 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8997 if (nbchars < 0) {
8998 xmlGenericError(xmlGenericErrorContext,
8999 "xmlParseChunk: encoder error\n");
9000 return(XML_ERR_INVALID_ENCODING);
9001 }
9002 }
9003 }
9004 }
9005 xmlParseTryOrFinish(ctxt, terminate);
9006 if (terminate) {
9007 /*
9008 * Check for termination
9009 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009010 int avail = 0;
9011 if (ctxt->input->buf == NULL)
9012 avail = ctxt->input->length -
9013 (ctxt->input->cur - ctxt->input->base);
9014 else
9015 avail = ctxt->input->buf->buffer->use -
9016 (ctxt->input->cur - ctxt->input->base);
9017
Owen Taylor3473f882001-02-23 17:55:21 +00009018 if ((ctxt->instate != XML_PARSER_EOF) &&
9019 (ctxt->instate != XML_PARSER_EPILOG)) {
9020 ctxt->errNo = XML_ERR_DOCUMENT_END;
9021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9022 ctxt->sax->error(ctxt->userData,
9023 "Extra content at the end of the document\n");
9024 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009025 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009026 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009027 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9028 ctxt->errNo = XML_ERR_DOCUMENT_END;
9029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9030 ctxt->sax->error(ctxt->userData,
9031 "Extra content at the end of the document\n");
9032 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009033 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009034
9035 }
Owen Taylor3473f882001-02-23 17:55:21 +00009036 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009037 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009038 ctxt->sax->endDocument(ctxt->userData);
9039 }
9040 ctxt->instate = XML_PARSER_EOF;
9041 }
9042 return((xmlParserErrors) ctxt->errNo);
9043}
9044
9045/************************************************************************
9046 * *
9047 * I/O front end functions to the parser *
9048 * *
9049 ************************************************************************/
9050
9051/**
9052 * xmlStopParser:
9053 * @ctxt: an XML parser context
9054 *
9055 * Blocks further parser processing
9056 */
9057void
9058xmlStopParser(xmlParserCtxtPtr ctxt) {
9059 ctxt->instate = XML_PARSER_EOF;
9060 if (ctxt->input != NULL)
9061 ctxt->input->cur = BAD_CAST"";
9062}
9063
9064/**
9065 * xmlCreatePushParserCtxt:
9066 * @sax: a SAX handler
9067 * @user_data: The user data returned on SAX callbacks
9068 * @chunk: a pointer to an array of chars
9069 * @size: number of chars in the array
9070 * @filename: an optional file name or URI
9071 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009072 * Create a parser context for using the XML parser in push mode.
9073 * If @buffer and @size are non-NULL, the data is used to detect
9074 * the encoding. The remaining characters will be parsed so they
9075 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009076 * To allow content encoding detection, @size should be >= 4
9077 * The value of @filename is used for fetching external entities
9078 * and error/warning reports.
9079 *
9080 * Returns the new parser context or NULL
9081 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009082
Owen Taylor3473f882001-02-23 17:55:21 +00009083xmlParserCtxtPtr
9084xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9085 const char *chunk, int size, const char *filename) {
9086 xmlParserCtxtPtr ctxt;
9087 xmlParserInputPtr inputStream;
9088 xmlParserInputBufferPtr buf;
9089 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9090
9091 /*
9092 * plug some encoding conversion routines
9093 */
9094 if ((chunk != NULL) && (size >= 4))
9095 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9096
9097 buf = xmlAllocParserInputBuffer(enc);
9098 if (buf == NULL) return(NULL);
9099
9100 ctxt = xmlNewParserCtxt();
9101 if (ctxt == NULL) {
9102 xmlFree(buf);
9103 return(NULL);
9104 }
9105 if (sax != NULL) {
9106 if (ctxt->sax != &xmlDefaultSAXHandler)
9107 xmlFree(ctxt->sax);
9108 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9109 if (ctxt->sax == NULL) {
9110 xmlFree(buf);
9111 xmlFree(ctxt);
9112 return(NULL);
9113 }
9114 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9115 if (user_data != NULL)
9116 ctxt->userData = user_data;
9117 }
9118 if (filename == NULL) {
9119 ctxt->directory = NULL;
9120 } else {
9121 ctxt->directory = xmlParserGetDirectory(filename);
9122 }
9123
9124 inputStream = xmlNewInputStream(ctxt);
9125 if (inputStream == NULL) {
9126 xmlFreeParserCtxt(ctxt);
9127 return(NULL);
9128 }
9129
9130 if (filename == NULL)
9131 inputStream->filename = NULL;
9132 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009133 inputStream->filename = (char *)
9134 xmlNormalizeWindowsPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009135 inputStream->buf = buf;
9136 inputStream->base = inputStream->buf->buffer->content;
9137 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009138 inputStream->end =
9139 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009140
9141 inputPush(ctxt, inputStream);
9142
9143 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9144 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009145 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9146 int cur = ctxt->input->cur - ctxt->input->base;
9147
Owen Taylor3473f882001-02-23 17:55:21 +00009148 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009149
9150 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9151 ctxt->input->cur = ctxt->input->base + cur;
9152 ctxt->input->end =
9153 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009154#ifdef DEBUG_PUSH
9155 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9156#endif
9157 }
9158
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009159 if (enc != XML_CHAR_ENCODING_NONE) {
9160 xmlSwitchEncoding(ctxt, enc);
9161 }
9162
Owen Taylor3473f882001-02-23 17:55:21 +00009163 return(ctxt);
9164}
9165
9166/**
9167 * xmlCreateIOParserCtxt:
9168 * @sax: a SAX handler
9169 * @user_data: The user data returned on SAX callbacks
9170 * @ioread: an I/O read function
9171 * @ioclose: an I/O close function
9172 * @ioctx: an I/O handler
9173 * @enc: the charset encoding if known
9174 *
9175 * Create a parser context for using the XML parser with an existing
9176 * I/O stream
9177 *
9178 * Returns the new parser context or NULL
9179 */
9180xmlParserCtxtPtr
9181xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9182 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9183 void *ioctx, xmlCharEncoding enc) {
9184 xmlParserCtxtPtr ctxt;
9185 xmlParserInputPtr inputStream;
9186 xmlParserInputBufferPtr buf;
9187
9188 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9189 if (buf == NULL) return(NULL);
9190
9191 ctxt = xmlNewParserCtxt();
9192 if (ctxt == NULL) {
9193 xmlFree(buf);
9194 return(NULL);
9195 }
9196 if (sax != NULL) {
9197 if (ctxt->sax != &xmlDefaultSAXHandler)
9198 xmlFree(ctxt->sax);
9199 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9200 if (ctxt->sax == NULL) {
9201 xmlFree(buf);
9202 xmlFree(ctxt);
9203 return(NULL);
9204 }
9205 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9206 if (user_data != NULL)
9207 ctxt->userData = user_data;
9208 }
9209
9210 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9211 if (inputStream == NULL) {
9212 xmlFreeParserCtxt(ctxt);
9213 return(NULL);
9214 }
9215 inputPush(ctxt, inputStream);
9216
9217 return(ctxt);
9218}
9219
9220/************************************************************************
9221 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009222 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009223 * *
9224 ************************************************************************/
9225
9226/**
9227 * xmlIOParseDTD:
9228 * @sax: the SAX handler block or NULL
9229 * @input: an Input Buffer
9230 * @enc: the charset encoding if known
9231 *
9232 * Load and parse a DTD
9233 *
9234 * Returns the resulting xmlDtdPtr or NULL in case of error.
9235 * @input will be freed at parsing end.
9236 */
9237
9238xmlDtdPtr
9239xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9240 xmlCharEncoding enc) {
9241 xmlDtdPtr ret = NULL;
9242 xmlParserCtxtPtr ctxt;
9243 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009244 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009245
9246 if (input == NULL)
9247 return(NULL);
9248
9249 ctxt = xmlNewParserCtxt();
9250 if (ctxt == NULL) {
9251 return(NULL);
9252 }
9253
9254 /*
9255 * Set-up the SAX context
9256 */
9257 if (sax != NULL) {
9258 if (ctxt->sax != NULL)
9259 xmlFree(ctxt->sax);
9260 ctxt->sax = sax;
9261 ctxt->userData = NULL;
9262 }
9263
9264 /*
9265 * generate a parser input from the I/O handler
9266 */
9267
9268 pinput = xmlNewIOInputStream(ctxt, input, enc);
9269 if (pinput == NULL) {
9270 if (sax != NULL) ctxt->sax = NULL;
9271 xmlFreeParserCtxt(ctxt);
9272 return(NULL);
9273 }
9274
9275 /*
9276 * plug some encoding conversion routines here.
9277 */
9278 xmlPushInput(ctxt, pinput);
9279
9280 pinput->filename = NULL;
9281 pinput->line = 1;
9282 pinput->col = 1;
9283 pinput->base = ctxt->input->cur;
9284 pinput->cur = ctxt->input->cur;
9285 pinput->free = NULL;
9286
9287 /*
9288 * let's parse that entity knowing it's an external subset.
9289 */
9290 ctxt->inSubset = 2;
9291 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9292 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9293 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009294
9295 if (enc == XML_CHAR_ENCODING_NONE) {
9296 /*
9297 * Get the 4 first bytes and decode the charset
9298 * if enc != XML_CHAR_ENCODING_NONE
9299 * plug some encoding conversion routines.
9300 */
9301 start[0] = RAW;
9302 start[1] = NXT(1);
9303 start[2] = NXT(2);
9304 start[3] = NXT(3);
9305 enc = xmlDetectCharEncoding(start, 4);
9306 if (enc != XML_CHAR_ENCODING_NONE) {
9307 xmlSwitchEncoding(ctxt, enc);
9308 }
9309 }
9310
Owen Taylor3473f882001-02-23 17:55:21 +00009311 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9312
9313 if (ctxt->myDoc != NULL) {
9314 if (ctxt->wellFormed) {
9315 ret = ctxt->myDoc->extSubset;
9316 ctxt->myDoc->extSubset = NULL;
9317 } else {
9318 ret = NULL;
9319 }
9320 xmlFreeDoc(ctxt->myDoc);
9321 ctxt->myDoc = NULL;
9322 }
9323 if (sax != NULL) ctxt->sax = NULL;
9324 xmlFreeParserCtxt(ctxt);
9325
9326 return(ret);
9327}
9328
9329/**
9330 * xmlSAXParseDTD:
9331 * @sax: the SAX handler block
9332 * @ExternalID: a NAME* containing the External ID of the DTD
9333 * @SystemID: a NAME* containing the URL to the DTD
9334 *
9335 * Load and parse an external subset.
9336 *
9337 * Returns the resulting xmlDtdPtr or NULL in case of error.
9338 */
9339
9340xmlDtdPtr
9341xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9342 const xmlChar *SystemID) {
9343 xmlDtdPtr ret = NULL;
9344 xmlParserCtxtPtr ctxt;
9345 xmlParserInputPtr input = NULL;
9346 xmlCharEncoding enc;
9347
9348 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9349
9350 ctxt = xmlNewParserCtxt();
9351 if (ctxt == NULL) {
9352 return(NULL);
9353 }
9354
9355 /*
9356 * Set-up the SAX context
9357 */
9358 if (sax != NULL) {
9359 if (ctxt->sax != NULL)
9360 xmlFree(ctxt->sax);
9361 ctxt->sax = sax;
9362 ctxt->userData = NULL;
9363 }
9364
9365 /*
9366 * Ask the Entity resolver to load the damn thing
9367 */
9368
9369 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9370 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9371 if (input == NULL) {
9372 if (sax != NULL) ctxt->sax = NULL;
9373 xmlFreeParserCtxt(ctxt);
9374 return(NULL);
9375 }
9376
9377 /*
9378 * plug some encoding conversion routines here.
9379 */
9380 xmlPushInput(ctxt, input);
9381 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9382 xmlSwitchEncoding(ctxt, enc);
9383
9384 if (input->filename == NULL)
9385 input->filename = (char *) xmlStrdup(SystemID);
9386 input->line = 1;
9387 input->col = 1;
9388 input->base = ctxt->input->cur;
9389 input->cur = ctxt->input->cur;
9390 input->free = NULL;
9391
9392 /*
9393 * let's parse that entity knowing it's an external subset.
9394 */
9395 ctxt->inSubset = 2;
9396 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9397 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9398 ExternalID, SystemID);
9399 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9400
9401 if (ctxt->myDoc != NULL) {
9402 if (ctxt->wellFormed) {
9403 ret = ctxt->myDoc->extSubset;
9404 ctxt->myDoc->extSubset = NULL;
9405 } else {
9406 ret = NULL;
9407 }
9408 xmlFreeDoc(ctxt->myDoc);
9409 ctxt->myDoc = NULL;
9410 }
9411 if (sax != NULL) ctxt->sax = NULL;
9412 xmlFreeParserCtxt(ctxt);
9413
9414 return(ret);
9415}
9416
9417/**
9418 * xmlParseDTD:
9419 * @ExternalID: a NAME* containing the External ID of the DTD
9420 * @SystemID: a NAME* containing the URL to the DTD
9421 *
9422 * Load and parse an external subset.
9423 *
9424 * Returns the resulting xmlDtdPtr or NULL in case of error.
9425 */
9426
9427xmlDtdPtr
9428xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9429 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9430}
9431
9432/************************************************************************
9433 * *
9434 * Front ends when parsing an Entity *
9435 * *
9436 ************************************************************************/
9437
9438/**
Owen Taylor3473f882001-02-23 17:55:21 +00009439 * xmlParseCtxtExternalEntity:
9440 * @ctx: the existing parsing context
9441 * @URL: the URL for the entity to load
9442 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009443 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009444 *
9445 * Parse an external general entity within an existing parsing context
9446 * An external general parsed entity is well-formed if it matches the
9447 * production labeled extParsedEnt.
9448 *
9449 * [78] extParsedEnt ::= TextDecl? content
9450 *
9451 * Returns 0 if the entity is well formed, -1 in case of args problem and
9452 * the parser error code otherwise
9453 */
9454
9455int
9456xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009457 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009458 xmlParserCtxtPtr ctxt;
9459 xmlDocPtr newDoc;
9460 xmlSAXHandlerPtr oldsax = NULL;
9461 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009462 xmlChar start[4];
9463 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009464
9465 if (ctx->depth > 40) {
9466 return(XML_ERR_ENTITY_LOOP);
9467 }
9468
Daniel Veillardcda96922001-08-21 10:56:31 +00009469 if (lst != NULL)
9470 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009471 if ((URL == NULL) && (ID == NULL))
9472 return(-1);
9473 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9474 return(-1);
9475
9476
9477 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9478 if (ctxt == NULL) return(-1);
9479 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +00009480 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +00009481 oldsax = ctxt->sax;
9482 ctxt->sax = ctx->sax;
9483 newDoc = xmlNewDoc(BAD_CAST "1.0");
9484 if (newDoc == NULL) {
9485 xmlFreeParserCtxt(ctxt);
9486 return(-1);
9487 }
9488 if (ctx->myDoc != NULL) {
9489 newDoc->intSubset = ctx->myDoc->intSubset;
9490 newDoc->extSubset = ctx->myDoc->extSubset;
9491 }
9492 if (ctx->myDoc->URL != NULL) {
9493 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9494 }
9495 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9496 if (newDoc->children == NULL) {
9497 ctxt->sax = oldsax;
9498 xmlFreeParserCtxt(ctxt);
9499 newDoc->intSubset = NULL;
9500 newDoc->extSubset = NULL;
9501 xmlFreeDoc(newDoc);
9502 return(-1);
9503 }
9504 nodePush(ctxt, newDoc->children);
9505 if (ctx->myDoc == NULL) {
9506 ctxt->myDoc = newDoc;
9507 } else {
9508 ctxt->myDoc = ctx->myDoc;
9509 newDoc->children->doc = ctx->myDoc;
9510 }
9511
Daniel Veillard87a764e2001-06-20 17:41:10 +00009512 /*
9513 * Get the 4 first bytes and decode the charset
9514 * if enc != XML_CHAR_ENCODING_NONE
9515 * plug some encoding conversion routines.
9516 */
9517 GROW
9518 start[0] = RAW;
9519 start[1] = NXT(1);
9520 start[2] = NXT(2);
9521 start[3] = NXT(3);
9522 enc = xmlDetectCharEncoding(start, 4);
9523 if (enc != XML_CHAR_ENCODING_NONE) {
9524 xmlSwitchEncoding(ctxt, enc);
9525 }
9526
Owen Taylor3473f882001-02-23 17:55:21 +00009527 /*
9528 * Parse a possible text declaration first
9529 */
Owen Taylor3473f882001-02-23 17:55:21 +00009530 if ((RAW == '<') && (NXT(1) == '?') &&
9531 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9532 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9533 xmlParseTextDecl(ctxt);
9534 }
9535
9536 /*
9537 * Doing validity checking on chunk doesn't make sense
9538 */
9539 ctxt->instate = XML_PARSER_CONTENT;
9540 ctxt->validate = ctx->validate;
9541 ctxt->loadsubset = ctx->loadsubset;
9542 ctxt->depth = ctx->depth + 1;
9543 ctxt->replaceEntities = ctx->replaceEntities;
9544 if (ctxt->validate) {
9545 ctxt->vctxt.error = ctx->vctxt.error;
9546 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009547 } else {
9548 ctxt->vctxt.error = NULL;
9549 ctxt->vctxt.warning = NULL;
9550 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009551 ctxt->vctxt.nodeTab = NULL;
9552 ctxt->vctxt.nodeNr = 0;
9553 ctxt->vctxt.nodeMax = 0;
9554 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009555
9556 xmlParseContent(ctxt);
9557
9558 if ((RAW == '<') && (NXT(1) == '/')) {
9559 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9561 ctxt->sax->error(ctxt->userData,
9562 "chunk is not well balanced\n");
9563 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009564 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009565 } else if (RAW != 0) {
9566 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9568 ctxt->sax->error(ctxt->userData,
9569 "extra content at the end of well balanced chunk\n");
9570 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009571 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009572 }
9573 if (ctxt->node != newDoc->children) {
9574 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9575 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9576 ctxt->sax->error(ctxt->userData,
9577 "chunk is not well balanced\n");
9578 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009579 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009580 }
9581
9582 if (!ctxt->wellFormed) {
9583 if (ctxt->errNo == 0)
9584 ret = 1;
9585 else
9586 ret = ctxt->errNo;
9587 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009588 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009589 xmlNodePtr cur;
9590
9591 /*
9592 * Return the newly created nodeset after unlinking it from
9593 * they pseudo parent.
9594 */
9595 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009596 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009597 while (cur != NULL) {
9598 cur->parent = NULL;
9599 cur = cur->next;
9600 }
9601 newDoc->children->children = NULL;
9602 }
9603 ret = 0;
9604 }
9605 ctxt->sax = oldsax;
9606 xmlFreeParserCtxt(ctxt);
9607 newDoc->intSubset = NULL;
9608 newDoc->extSubset = NULL;
9609 xmlFreeDoc(newDoc);
9610
9611 return(ret);
9612}
9613
9614/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009615 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009616 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009617 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009618 * @sax: the SAX handler bloc (possibly NULL)
9619 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9620 * @depth: Used for loop detection, use 0
9621 * @URL: the URL for the entity to load
9622 * @ID: the System ID for the entity to load
9623 * @list: the return value for the set of parsed nodes
9624 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009625 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009626 *
9627 * Returns 0 if the entity is well formed, -1 in case of args problem and
9628 * the parser error code otherwise
9629 */
9630
Daniel Veillard257d9102001-05-08 10:41:44 +00009631static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009632xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9633 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009634 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009635 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009636 xmlParserCtxtPtr ctxt;
9637 xmlDocPtr newDoc;
9638 xmlSAXHandlerPtr oldsax = NULL;
9639 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009640 xmlChar start[4];
9641 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009642
9643 if (depth > 40) {
9644 return(XML_ERR_ENTITY_LOOP);
9645 }
9646
9647
9648
9649 if (list != NULL)
9650 *list = NULL;
9651 if ((URL == NULL) && (ID == NULL))
9652 return(-1);
9653 if (doc == NULL) /* @@ relax but check for dereferences */
9654 return(-1);
9655
9656
9657 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9658 if (ctxt == NULL) return(-1);
9659 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009660 if (oldctxt != NULL) {
9661 ctxt->_private = oldctxt->_private;
9662 ctxt->loadsubset = oldctxt->loadsubset;
9663 ctxt->validate = oldctxt->validate;
9664 ctxt->external = oldctxt->external;
9665 } else {
9666 /*
9667 * Doing validity checking on chunk without context
9668 * doesn't make sense
9669 */
9670 ctxt->_private = NULL;
9671 ctxt->validate = 0;
9672 ctxt->external = 2;
9673 ctxt->loadsubset = 0;
9674 }
Owen Taylor3473f882001-02-23 17:55:21 +00009675 if (sax != NULL) {
9676 oldsax = ctxt->sax;
9677 ctxt->sax = sax;
9678 if (user_data != NULL)
9679 ctxt->userData = user_data;
9680 }
9681 newDoc = xmlNewDoc(BAD_CAST "1.0");
9682 if (newDoc == NULL) {
9683 xmlFreeParserCtxt(ctxt);
9684 return(-1);
9685 }
9686 if (doc != NULL) {
9687 newDoc->intSubset = doc->intSubset;
9688 newDoc->extSubset = doc->extSubset;
9689 }
9690 if (doc->URL != NULL) {
9691 newDoc->URL = xmlStrdup(doc->URL);
9692 }
9693 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9694 if (newDoc->children == NULL) {
9695 if (sax != NULL)
9696 ctxt->sax = oldsax;
9697 xmlFreeParserCtxt(ctxt);
9698 newDoc->intSubset = NULL;
9699 newDoc->extSubset = NULL;
9700 xmlFreeDoc(newDoc);
9701 return(-1);
9702 }
9703 nodePush(ctxt, newDoc->children);
9704 if (doc == NULL) {
9705 ctxt->myDoc = newDoc;
9706 } else {
9707 ctxt->myDoc = doc;
9708 newDoc->children->doc = doc;
9709 }
9710
Daniel Veillard87a764e2001-06-20 17:41:10 +00009711 /*
9712 * Get the 4 first bytes and decode the charset
9713 * if enc != XML_CHAR_ENCODING_NONE
9714 * plug some encoding conversion routines.
9715 */
9716 GROW;
9717 start[0] = RAW;
9718 start[1] = NXT(1);
9719 start[2] = NXT(2);
9720 start[3] = NXT(3);
9721 enc = xmlDetectCharEncoding(start, 4);
9722 if (enc != XML_CHAR_ENCODING_NONE) {
9723 xmlSwitchEncoding(ctxt, enc);
9724 }
9725
Owen Taylor3473f882001-02-23 17:55:21 +00009726 /*
9727 * Parse a possible text declaration first
9728 */
Owen Taylor3473f882001-02-23 17:55:21 +00009729 if ((RAW == '<') && (NXT(1) == '?') &&
9730 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9731 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9732 xmlParseTextDecl(ctxt);
9733 }
9734
Owen Taylor3473f882001-02-23 17:55:21 +00009735 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009736 ctxt->depth = depth;
9737
9738 xmlParseContent(ctxt);
9739
Daniel Veillard561b7f82002-03-20 21:55:57 +00009740 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009741 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9743 ctxt->sax->error(ctxt->userData,
9744 "chunk is not well balanced\n");
9745 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009746 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009747 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009748 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9750 ctxt->sax->error(ctxt->userData,
9751 "extra content at the end of well balanced chunk\n");
9752 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009753 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009754 }
9755 if (ctxt->node != newDoc->children) {
9756 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9758 ctxt->sax->error(ctxt->userData,
9759 "chunk is not well balanced\n");
9760 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009761 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009762 }
9763
9764 if (!ctxt->wellFormed) {
9765 if (ctxt->errNo == 0)
9766 ret = 1;
9767 else
9768 ret = ctxt->errNo;
9769 } else {
9770 if (list != NULL) {
9771 xmlNodePtr cur;
9772
9773 /*
9774 * Return the newly created nodeset after unlinking it from
9775 * they pseudo parent.
9776 */
9777 cur = newDoc->children->children;
9778 *list = cur;
9779 while (cur != NULL) {
9780 cur->parent = NULL;
9781 cur = cur->next;
9782 }
9783 newDoc->children->children = NULL;
9784 }
9785 ret = 0;
9786 }
9787 if (sax != NULL)
9788 ctxt->sax = oldsax;
9789 xmlFreeParserCtxt(ctxt);
9790 newDoc->intSubset = NULL;
9791 newDoc->extSubset = NULL;
9792 xmlFreeDoc(newDoc);
9793
9794 return(ret);
9795}
9796
9797/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009798 * xmlParseExternalEntity:
9799 * @doc: the document the chunk pertains to
9800 * @sax: the SAX handler bloc (possibly NULL)
9801 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9802 * @depth: Used for loop detection, use 0
9803 * @URL: the URL for the entity to load
9804 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009805 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009806 *
9807 * Parse an external general entity
9808 * An external general parsed entity is well-formed if it matches the
9809 * production labeled extParsedEnt.
9810 *
9811 * [78] extParsedEnt ::= TextDecl? content
9812 *
9813 * Returns 0 if the entity is well formed, -1 in case of args problem and
9814 * the parser error code otherwise
9815 */
9816
9817int
9818xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009819 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009820 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009821 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009822}
9823
9824/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009825 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009826 * @doc: the document the chunk pertains to
9827 * @sax: the SAX handler bloc (possibly NULL)
9828 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9829 * @depth: Used for loop detection, use 0
9830 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009831 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009832 *
9833 * Parse a well-balanced chunk of an XML document
9834 * called by the parser
9835 * The allowed sequence for the Well Balanced Chunk is the one defined by
9836 * the content production in the XML grammar:
9837 *
9838 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9839 *
9840 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9841 * the parser error code otherwise
9842 */
9843
9844int
9845xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009846 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009847 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9848 depth, string, lst, 0 );
9849}
9850
9851/**
Daniel Veillard328f48c2002-11-15 15:24:34 +00009852 * xmlParseBalancedChunkMemoryInternal:
9853 * @oldctxt: the existing parsing context
9854 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9855 * @user_data: the user data field for the parser context
9856 * @lst: the return value for the set of parsed nodes
9857 *
9858 *
9859 * Parse a well-balanced chunk of an XML document
9860 * called by the parser
9861 * The allowed sequence for the Well Balanced Chunk is the one defined by
9862 * the content production in the XML grammar:
9863 *
9864 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9865 *
9866 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9867 * the parser error code otherwise
9868 *
9869 * In case recover is set to 1, the nodelist will not be empty even if
9870 * the parsed chunk is not well balanced.
9871 */
9872static int
9873xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9874 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9875 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009876 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009877 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009878 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009879 int size;
9880 int ret = 0;
9881
9882 if (oldctxt->depth > 40) {
9883 return(XML_ERR_ENTITY_LOOP);
9884 }
9885
9886
9887 if (lst != NULL)
9888 *lst = NULL;
9889 if (string == NULL)
9890 return(-1);
9891
9892 size = xmlStrlen(string);
9893
9894 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9895 if (ctxt == NULL) return(-1);
9896 if (user_data != NULL)
9897 ctxt->userData = user_data;
9898 else
9899 ctxt->userData = ctxt;
9900
9901 oldsax = ctxt->sax;
9902 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +00009903 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009904 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +00009905 newDoc = xmlNewDoc(BAD_CAST "1.0");
9906 if (newDoc == NULL) {
9907 ctxt->sax = oldsax;
9908 xmlFreeParserCtxt(ctxt);
9909 return(-1);
9910 }
Daniel Veillard328f48c2002-11-15 15:24:34 +00009911 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009912 } else {
9913 ctxt->myDoc = oldctxt->myDoc;
9914 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009915 }
Daniel Veillard9bc53102002-11-25 13:20:04 +00009916 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +00009917 BAD_CAST "pseudoroot", NULL);
9918 if (ctxt->myDoc->children == NULL) {
9919 ctxt->sax = oldsax;
9920 xmlFreeParserCtxt(ctxt);
9921 if (newDoc != NULL)
9922 xmlFreeDoc(newDoc);
9923 return(-1);
9924 }
9925 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009926 ctxt->instate = XML_PARSER_CONTENT;
9927 ctxt->depth = oldctxt->depth + 1;
9928
9929 /*
9930 * Doing validity checking on chunk doesn't make sense
9931 */
9932 ctxt->validate = 0;
9933 ctxt->loadsubset = oldctxt->loadsubset;
9934
Daniel Veillard68e9e742002-11-16 15:35:11 +00009935 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009936 if ((RAW == '<') && (NXT(1) == '/')) {
9937 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9939 ctxt->sax->error(ctxt->userData,
9940 "chunk is not well balanced\n");
9941 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009942 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009943 } else if (RAW != 0) {
9944 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9946 ctxt->sax->error(ctxt->userData,
9947 "extra content at the end of well balanced chunk\n");
9948 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009949 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009950 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009951 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +00009952 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9954 ctxt->sax->error(ctxt->userData,
9955 "chunk is not well balanced\n");
9956 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009957 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009958 }
9959
9960 if (!ctxt->wellFormed) {
9961 if (ctxt->errNo == 0)
9962 ret = 1;
9963 else
9964 ret = ctxt->errNo;
9965 } else {
9966 ret = 0;
9967 }
9968
9969 if ((lst != NULL) && (ret == 0)) {
9970 xmlNodePtr cur;
9971
9972 /*
9973 * Return the newly created nodeset after unlinking it from
9974 * they pseudo parent.
9975 */
Daniel Veillard68e9e742002-11-16 15:35:11 +00009976 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009977 *lst = cur;
9978 while (cur != NULL) {
9979 cur->parent = NULL;
9980 cur = cur->next;
9981 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009982 ctxt->myDoc->children->children = NULL;
9983 }
9984 if (ctxt->myDoc != NULL) {
9985 xmlFreeNode(ctxt->myDoc->children);
9986 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009987 }
9988
9989 ctxt->sax = oldsax;
9990 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +00009991 if (newDoc != NULL)
9992 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009993
9994 return(ret);
9995}
9996
9997/**
Daniel Veillard58e44c92002-08-02 22:19:49 +00009998 * xmlParseBalancedChunkMemoryRecover:
9999 * @doc: the document the chunk pertains to
10000 * @sax: the SAX handler bloc (possibly NULL)
10001 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10002 * @depth: Used for loop detection, use 0
10003 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10004 * @lst: the return value for the set of parsed nodes
10005 * @recover: return nodes even if the data is broken (use 0)
10006 *
10007 *
10008 * Parse a well-balanced chunk of an XML document
10009 * called by the parser
10010 * The allowed sequence for the Well Balanced Chunk is the one defined by
10011 * the content production in the XML grammar:
10012 *
10013 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10014 *
10015 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10016 * the parser error code otherwise
10017 *
10018 * In case recover is set to 1, the nodelist will not be empty even if
10019 * the parsed chunk is not well balanced.
10020 */
10021int
10022xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10023 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10024 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010025 xmlParserCtxtPtr ctxt;
10026 xmlDocPtr newDoc;
10027 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010028 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010029 int size;
10030 int ret = 0;
10031
10032 if (depth > 40) {
10033 return(XML_ERR_ENTITY_LOOP);
10034 }
10035
10036
Daniel Veillardcda96922001-08-21 10:56:31 +000010037 if (lst != NULL)
10038 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010039 if (string == NULL)
10040 return(-1);
10041
10042 size = xmlStrlen(string);
10043
10044 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10045 if (ctxt == NULL) return(-1);
10046 ctxt->userData = ctxt;
10047 if (sax != NULL) {
10048 oldsax = ctxt->sax;
10049 ctxt->sax = sax;
10050 if (user_data != NULL)
10051 ctxt->userData = user_data;
10052 }
10053 newDoc = xmlNewDoc(BAD_CAST "1.0");
10054 if (newDoc == NULL) {
10055 xmlFreeParserCtxt(ctxt);
10056 return(-1);
10057 }
10058 if (doc != NULL) {
10059 newDoc->intSubset = doc->intSubset;
10060 newDoc->extSubset = doc->extSubset;
10061 }
10062 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10063 if (newDoc->children == NULL) {
10064 if (sax != NULL)
10065 ctxt->sax = oldsax;
10066 xmlFreeParserCtxt(ctxt);
10067 newDoc->intSubset = NULL;
10068 newDoc->extSubset = NULL;
10069 xmlFreeDoc(newDoc);
10070 return(-1);
10071 }
10072 nodePush(ctxt, newDoc->children);
10073 if (doc == NULL) {
10074 ctxt->myDoc = newDoc;
10075 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010076 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010077 newDoc->children->doc = doc;
10078 }
10079 ctxt->instate = XML_PARSER_CONTENT;
10080 ctxt->depth = depth;
10081
10082 /*
10083 * Doing validity checking on chunk doesn't make sense
10084 */
10085 ctxt->validate = 0;
10086 ctxt->loadsubset = 0;
10087
Daniel Veillardb39bc392002-10-26 19:29:51 +000010088 if ( doc != NULL ){
10089 content = doc->children;
10090 doc->children = NULL;
10091 xmlParseContent(ctxt);
10092 doc->children = content;
10093 }
10094 else {
10095 xmlParseContent(ctxt);
10096 }
Owen Taylor3473f882001-02-23 17:55:21 +000010097 if ((RAW == '<') && (NXT(1) == '/')) {
10098 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10100 ctxt->sax->error(ctxt->userData,
10101 "chunk is not well balanced\n");
10102 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010103 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010104 } else if (RAW != 0) {
10105 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10106 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10107 ctxt->sax->error(ctxt->userData,
10108 "extra content at the end of well balanced chunk\n");
10109 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010110 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010111 }
10112 if (ctxt->node != newDoc->children) {
10113 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10114 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10115 ctxt->sax->error(ctxt->userData,
10116 "chunk is not well balanced\n");
10117 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010118 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010119 }
10120
10121 if (!ctxt->wellFormed) {
10122 if (ctxt->errNo == 0)
10123 ret = 1;
10124 else
10125 ret = ctxt->errNo;
10126 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010127 ret = 0;
10128 }
10129
10130 if (lst != NULL && (ret == 0 || recover == 1)) {
10131 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010132
10133 /*
10134 * Return the newly created nodeset after unlinking it from
10135 * they pseudo parent.
10136 */
10137 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010138 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010139 while (cur != NULL) {
10140 cur->parent = NULL;
10141 cur = cur->next;
10142 }
10143 newDoc->children->children = NULL;
10144 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010145
Owen Taylor3473f882001-02-23 17:55:21 +000010146 if (sax != NULL)
10147 ctxt->sax = oldsax;
10148 xmlFreeParserCtxt(ctxt);
10149 newDoc->intSubset = NULL;
10150 newDoc->extSubset = NULL;
10151 xmlFreeDoc(newDoc);
10152
10153 return(ret);
10154}
10155
10156/**
10157 * xmlSAXParseEntity:
10158 * @sax: the SAX handler block
10159 * @filename: the filename
10160 *
10161 * parse an XML external entity out of context and build a tree.
10162 * It use the given SAX function block to handle the parsing callback.
10163 * If sax is NULL, fallback to the default DOM tree building routines.
10164 *
10165 * [78] extParsedEnt ::= TextDecl? content
10166 *
10167 * This correspond to a "Well Balanced" chunk
10168 *
10169 * Returns the resulting document tree
10170 */
10171
10172xmlDocPtr
10173xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10174 xmlDocPtr ret;
10175 xmlParserCtxtPtr ctxt;
10176 char *directory = NULL;
10177
10178 ctxt = xmlCreateFileParserCtxt(filename);
10179 if (ctxt == NULL) {
10180 return(NULL);
10181 }
10182 if (sax != NULL) {
10183 if (ctxt->sax != NULL)
10184 xmlFree(ctxt->sax);
10185 ctxt->sax = sax;
10186 ctxt->userData = NULL;
10187 }
10188
10189 if ((ctxt->directory == NULL) && (directory == NULL))
10190 directory = xmlParserGetDirectory(filename);
10191
10192 xmlParseExtParsedEnt(ctxt);
10193
10194 if (ctxt->wellFormed)
10195 ret = ctxt->myDoc;
10196 else {
10197 ret = NULL;
10198 xmlFreeDoc(ctxt->myDoc);
10199 ctxt->myDoc = NULL;
10200 }
10201 if (sax != NULL)
10202 ctxt->sax = NULL;
10203 xmlFreeParserCtxt(ctxt);
10204
10205 return(ret);
10206}
10207
10208/**
10209 * xmlParseEntity:
10210 * @filename: the filename
10211 *
10212 * parse an XML external entity out of context and build a tree.
10213 *
10214 * [78] extParsedEnt ::= TextDecl? content
10215 *
10216 * This correspond to a "Well Balanced" chunk
10217 *
10218 * Returns the resulting document tree
10219 */
10220
10221xmlDocPtr
10222xmlParseEntity(const char *filename) {
10223 return(xmlSAXParseEntity(NULL, filename));
10224}
10225
10226/**
10227 * xmlCreateEntityParserCtxt:
10228 * @URL: the entity URL
10229 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010230 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010231 *
10232 * Create a parser context for an external entity
10233 * Automatic support for ZLIB/Compress compressed document is provided
10234 * by default if found at compile-time.
10235 *
10236 * Returns the new parser context or NULL
10237 */
10238xmlParserCtxtPtr
10239xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10240 const xmlChar *base) {
10241 xmlParserCtxtPtr ctxt;
10242 xmlParserInputPtr inputStream;
10243 char *directory = NULL;
10244 xmlChar *uri;
10245
10246 ctxt = xmlNewParserCtxt();
10247 if (ctxt == NULL) {
10248 return(NULL);
10249 }
10250
10251 uri = xmlBuildURI(URL, base);
10252
10253 if (uri == NULL) {
10254 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10255 if (inputStream == NULL) {
10256 xmlFreeParserCtxt(ctxt);
10257 return(NULL);
10258 }
10259
10260 inputPush(ctxt, inputStream);
10261
10262 if ((ctxt->directory == NULL) && (directory == NULL))
10263 directory = xmlParserGetDirectory((char *)URL);
10264 if ((ctxt->directory == NULL) && (directory != NULL))
10265 ctxt->directory = directory;
10266 } else {
10267 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10268 if (inputStream == NULL) {
10269 xmlFree(uri);
10270 xmlFreeParserCtxt(ctxt);
10271 return(NULL);
10272 }
10273
10274 inputPush(ctxt, inputStream);
10275
10276 if ((ctxt->directory == NULL) && (directory == NULL))
10277 directory = xmlParserGetDirectory((char *)uri);
10278 if ((ctxt->directory == NULL) && (directory != NULL))
10279 ctxt->directory = directory;
10280 xmlFree(uri);
10281 }
10282
10283 return(ctxt);
10284}
10285
10286/************************************************************************
10287 * *
10288 * Front ends when parsing from a file *
10289 * *
10290 ************************************************************************/
10291
10292/**
10293 * xmlCreateFileParserCtxt:
10294 * @filename: the filename
10295 *
10296 * Create a parser context for a file content.
10297 * Automatic support for ZLIB/Compress compressed document is provided
10298 * by default if found at compile-time.
10299 *
10300 * Returns the new parser context or NULL
10301 */
10302xmlParserCtxtPtr
10303xmlCreateFileParserCtxt(const char *filename)
10304{
10305 xmlParserCtxtPtr ctxt;
10306 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010307 char *directory = NULL;
Daniel Veillardf4862f02002-09-10 11:13:43 +000010308 xmlChar *normalized;
Owen Taylor3473f882001-02-23 17:55:21 +000010309
Owen Taylor3473f882001-02-23 17:55:21 +000010310 ctxt = xmlNewParserCtxt();
10311 if (ctxt == NULL) {
10312 if (xmlDefaultSAXHandler.error != NULL) {
10313 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10314 }
10315 return(NULL);
10316 }
10317
Daniel Veillardf4862f02002-09-10 11:13:43 +000010318 normalized = xmlNormalizeWindowsPath((const xmlChar *) filename);
10319 if (normalized == NULL) {
10320 xmlFreeParserCtxt(ctxt);
10321 return(NULL);
10322 }
10323 inputStream = xmlLoadExternalEntity((char *) normalized, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010324 if (inputStream == NULL) {
10325 xmlFreeParserCtxt(ctxt);
Daniel Veillardf4862f02002-09-10 11:13:43 +000010326 xmlFree(normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010327 return(NULL);
10328 }
10329
Owen Taylor3473f882001-02-23 17:55:21 +000010330 inputPush(ctxt, inputStream);
10331 if ((ctxt->directory == NULL) && (directory == NULL))
Daniel Veillardf4862f02002-09-10 11:13:43 +000010332 directory = xmlParserGetDirectory((char *) normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010333 if ((ctxt->directory == NULL) && (directory != NULL))
10334 ctxt->directory = directory;
10335
Daniel Veillardf4862f02002-09-10 11:13:43 +000010336 xmlFree(normalized);
10337
Owen Taylor3473f882001-02-23 17:55:21 +000010338 return(ctxt);
10339}
10340
10341/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010342 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010343 * @sax: the SAX handler block
10344 * @filename: the filename
10345 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10346 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010347 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010348 *
10349 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10350 * compressed document is provided by default if found at compile-time.
10351 * It use the given SAX function block to handle the parsing callback.
10352 * If sax is NULL, fallback to the default DOM tree building routines.
10353 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010354 * User data (void *) is stored within the parser context in the
10355 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010356 *
Owen Taylor3473f882001-02-23 17:55:21 +000010357 * Returns the resulting document tree
10358 */
10359
10360xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010361xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10362 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010363 xmlDocPtr ret;
10364 xmlParserCtxtPtr ctxt;
10365 char *directory = NULL;
10366
Daniel Veillard635ef722001-10-29 11:48:19 +000010367 xmlInitParser();
10368
Owen Taylor3473f882001-02-23 17:55:21 +000010369 ctxt = xmlCreateFileParserCtxt(filename);
10370 if (ctxt == NULL) {
10371 return(NULL);
10372 }
10373 if (sax != NULL) {
10374 if (ctxt->sax != NULL)
10375 xmlFree(ctxt->sax);
10376 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010377 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010378 if (data!=NULL) {
10379 ctxt->_private=data;
10380 }
Owen Taylor3473f882001-02-23 17:55:21 +000010381
10382 if ((ctxt->directory == NULL) && (directory == NULL))
10383 directory = xmlParserGetDirectory(filename);
10384 if ((ctxt->directory == NULL) && (directory != NULL))
10385 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10386
Daniel Veillarddad3f682002-11-17 16:47:27 +000010387 ctxt->recovery = recovery;
10388
Owen Taylor3473f882001-02-23 17:55:21 +000010389 xmlParseDocument(ctxt);
10390
10391 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10392 else {
10393 ret = NULL;
10394 xmlFreeDoc(ctxt->myDoc);
10395 ctxt->myDoc = NULL;
10396 }
10397 if (sax != NULL)
10398 ctxt->sax = NULL;
10399 xmlFreeParserCtxt(ctxt);
10400
10401 return(ret);
10402}
10403
10404/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010405 * xmlSAXParseFile:
10406 * @sax: the SAX handler block
10407 * @filename: the filename
10408 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10409 * documents
10410 *
10411 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10412 * compressed document is provided by default if found at compile-time.
10413 * It use the given SAX function block to handle the parsing callback.
10414 * If sax is NULL, fallback to the default DOM tree building routines.
10415 *
10416 * Returns the resulting document tree
10417 */
10418
10419xmlDocPtr
10420xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10421 int recovery) {
10422 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10423}
10424
10425/**
Owen Taylor3473f882001-02-23 17:55:21 +000010426 * xmlRecoverDoc:
10427 * @cur: a pointer to an array of xmlChar
10428 *
10429 * parse an XML in-memory document and build a tree.
10430 * In the case the document is not Well Formed, a tree is built anyway
10431 *
10432 * Returns the resulting document tree
10433 */
10434
10435xmlDocPtr
10436xmlRecoverDoc(xmlChar *cur) {
10437 return(xmlSAXParseDoc(NULL, cur, 1));
10438}
10439
10440/**
10441 * xmlParseFile:
10442 * @filename: the filename
10443 *
10444 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10445 * compressed document is provided by default if found at compile-time.
10446 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010447 * Returns the resulting document tree if the file was wellformed,
10448 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010449 */
10450
10451xmlDocPtr
10452xmlParseFile(const char *filename) {
10453 return(xmlSAXParseFile(NULL, filename, 0));
10454}
10455
10456/**
10457 * xmlRecoverFile:
10458 * @filename: the filename
10459 *
10460 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10461 * compressed document is provided by default if found at compile-time.
10462 * In the case the document is not Well Formed, a tree is built anyway
10463 *
10464 * Returns the resulting document tree
10465 */
10466
10467xmlDocPtr
10468xmlRecoverFile(const char *filename) {
10469 return(xmlSAXParseFile(NULL, filename, 1));
10470}
10471
10472
10473/**
10474 * xmlSetupParserForBuffer:
10475 * @ctxt: an XML parser context
10476 * @buffer: a xmlChar * buffer
10477 * @filename: a file name
10478 *
10479 * Setup the parser context to parse a new buffer; Clears any prior
10480 * contents from the parser context. The buffer parameter must not be
10481 * NULL, but the filename parameter can be
10482 */
10483void
10484xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10485 const char* filename)
10486{
10487 xmlParserInputPtr input;
10488
10489 input = xmlNewInputStream(ctxt);
10490 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010491 xmlGenericError(xmlGenericErrorContext,
10492 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010493 xmlFree(ctxt);
10494 return;
10495 }
10496
10497 xmlClearParserCtxt(ctxt);
10498 if (filename != NULL)
10499 input->filename = xmlMemStrdup(filename);
10500 input->base = buffer;
10501 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010502 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010503 inputPush(ctxt, input);
10504}
10505
10506/**
10507 * xmlSAXUserParseFile:
10508 * @sax: a SAX handler
10509 * @user_data: The user data returned on SAX callbacks
10510 * @filename: a file name
10511 *
10512 * parse an XML file and call the given SAX handler routines.
10513 * Automatic support for ZLIB/Compress compressed document is provided
10514 *
10515 * Returns 0 in case of success or a error number otherwise
10516 */
10517int
10518xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10519 const char *filename) {
10520 int ret = 0;
10521 xmlParserCtxtPtr ctxt;
10522
10523 ctxt = xmlCreateFileParserCtxt(filename);
10524 if (ctxt == NULL) return -1;
10525 if (ctxt->sax != &xmlDefaultSAXHandler)
10526 xmlFree(ctxt->sax);
10527 ctxt->sax = sax;
10528 if (user_data != NULL)
10529 ctxt->userData = user_data;
10530
10531 xmlParseDocument(ctxt);
10532
10533 if (ctxt->wellFormed)
10534 ret = 0;
10535 else {
10536 if (ctxt->errNo != 0)
10537 ret = ctxt->errNo;
10538 else
10539 ret = -1;
10540 }
10541 if (sax != NULL)
10542 ctxt->sax = NULL;
10543 xmlFreeParserCtxt(ctxt);
10544
10545 return ret;
10546}
10547
10548/************************************************************************
10549 * *
10550 * Front ends when parsing from memory *
10551 * *
10552 ************************************************************************/
10553
10554/**
10555 * xmlCreateMemoryParserCtxt:
10556 * @buffer: a pointer to a char array
10557 * @size: the size of the array
10558 *
10559 * Create a parser context for an XML in-memory document.
10560 *
10561 * Returns the new parser context or NULL
10562 */
10563xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010564xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010565 xmlParserCtxtPtr ctxt;
10566 xmlParserInputPtr input;
10567 xmlParserInputBufferPtr buf;
10568
10569 if (buffer == NULL)
10570 return(NULL);
10571 if (size <= 0)
10572 return(NULL);
10573
10574 ctxt = xmlNewParserCtxt();
10575 if (ctxt == NULL)
10576 return(NULL);
10577
10578 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010579 if (buf == NULL) {
10580 xmlFreeParserCtxt(ctxt);
10581 return(NULL);
10582 }
Owen Taylor3473f882001-02-23 17:55:21 +000010583
10584 input = xmlNewInputStream(ctxt);
10585 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010586 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010587 xmlFreeParserCtxt(ctxt);
10588 return(NULL);
10589 }
10590
10591 input->filename = NULL;
10592 input->buf = buf;
10593 input->base = input->buf->buffer->content;
10594 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010595 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010596
10597 inputPush(ctxt, input);
10598 return(ctxt);
10599}
10600
10601/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010602 * xmlSAXParseMemoryWithData:
10603 * @sax: the SAX handler block
10604 * @buffer: an pointer to a char array
10605 * @size: the size of the array
10606 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10607 * documents
10608 * @data: the userdata
10609 *
10610 * parse an XML in-memory block and use the given SAX function block
10611 * to handle the parsing callback. If sax is NULL, fallback to the default
10612 * DOM tree building routines.
10613 *
10614 * User data (void *) is stored within the parser context in the
10615 * context's _private member, so it is available nearly everywhere in libxml
10616 *
10617 * Returns the resulting document tree
10618 */
10619
10620xmlDocPtr
10621xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10622 int size, int recovery, void *data) {
10623 xmlDocPtr ret;
10624 xmlParserCtxtPtr ctxt;
10625
10626 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10627 if (ctxt == NULL) return(NULL);
10628 if (sax != NULL) {
10629 if (ctxt->sax != NULL)
10630 xmlFree(ctxt->sax);
10631 ctxt->sax = sax;
10632 }
10633 if (data!=NULL) {
10634 ctxt->_private=data;
10635 }
10636
10637 xmlParseDocument(ctxt);
10638
10639 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10640 else {
10641 ret = NULL;
10642 xmlFreeDoc(ctxt->myDoc);
10643 ctxt->myDoc = NULL;
10644 }
10645 if (sax != NULL)
10646 ctxt->sax = NULL;
10647 xmlFreeParserCtxt(ctxt);
10648
10649 return(ret);
10650}
10651
10652/**
Owen Taylor3473f882001-02-23 17:55:21 +000010653 * xmlSAXParseMemory:
10654 * @sax: the SAX handler block
10655 * @buffer: an pointer to a char array
10656 * @size: the size of the array
10657 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10658 * documents
10659 *
10660 * parse an XML in-memory block and use the given SAX function block
10661 * to handle the parsing callback. If sax is NULL, fallback to the default
10662 * DOM tree building routines.
10663 *
10664 * Returns the resulting document tree
10665 */
10666xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010667xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10668 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010669 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010670}
10671
10672/**
10673 * xmlParseMemory:
10674 * @buffer: an pointer to a char array
10675 * @size: the size of the array
10676 *
10677 * parse an XML in-memory block and build a tree.
10678 *
10679 * Returns the resulting document tree
10680 */
10681
Daniel Veillard50822cb2001-07-26 20:05:51 +000010682xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010683 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10684}
10685
10686/**
10687 * xmlRecoverMemory:
10688 * @buffer: an pointer to a char array
10689 * @size: the size of the array
10690 *
10691 * parse an XML in-memory block and build a tree.
10692 * In the case the document is not Well Formed, a tree is built anyway
10693 *
10694 * Returns the resulting document tree
10695 */
10696
Daniel Veillard50822cb2001-07-26 20:05:51 +000010697xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010698 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10699}
10700
10701/**
10702 * xmlSAXUserParseMemory:
10703 * @sax: a SAX handler
10704 * @user_data: The user data returned on SAX callbacks
10705 * @buffer: an in-memory XML document input
10706 * @size: the length of the XML document in bytes
10707 *
10708 * A better SAX parsing routine.
10709 * parse an XML in-memory buffer and call the given SAX handler routines.
10710 *
10711 * Returns 0 in case of success or a error number otherwise
10712 */
10713int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010714 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010715 int ret = 0;
10716 xmlParserCtxtPtr ctxt;
10717 xmlSAXHandlerPtr oldsax = NULL;
10718
Daniel Veillard9e923512002-08-14 08:48:52 +000010719 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010720 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10721 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010722 oldsax = ctxt->sax;
10723 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010724 if (user_data != NULL)
10725 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010726
10727 xmlParseDocument(ctxt);
10728
10729 if (ctxt->wellFormed)
10730 ret = 0;
10731 else {
10732 if (ctxt->errNo != 0)
10733 ret = ctxt->errNo;
10734 else
10735 ret = -1;
10736 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010737 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010738 xmlFreeParserCtxt(ctxt);
10739
10740 return ret;
10741}
10742
10743/**
10744 * xmlCreateDocParserCtxt:
10745 * @cur: a pointer to an array of xmlChar
10746 *
10747 * Creates a parser context for an XML in-memory document.
10748 *
10749 * Returns the new parser context or NULL
10750 */
10751xmlParserCtxtPtr
10752xmlCreateDocParserCtxt(xmlChar *cur) {
10753 int len;
10754
10755 if (cur == NULL)
10756 return(NULL);
10757 len = xmlStrlen(cur);
10758 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10759}
10760
10761/**
10762 * xmlSAXParseDoc:
10763 * @sax: the SAX handler block
10764 * @cur: a pointer to an array of xmlChar
10765 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10766 * documents
10767 *
10768 * parse an XML in-memory document and build a tree.
10769 * It use the given SAX function block to handle the parsing callback.
10770 * If sax is NULL, fallback to the default DOM tree building routines.
10771 *
10772 * Returns the resulting document tree
10773 */
10774
10775xmlDocPtr
10776xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10777 xmlDocPtr ret;
10778 xmlParserCtxtPtr ctxt;
10779
10780 if (cur == NULL) return(NULL);
10781
10782
10783 ctxt = xmlCreateDocParserCtxt(cur);
10784 if (ctxt == NULL) return(NULL);
10785 if (sax != NULL) {
10786 ctxt->sax = sax;
10787 ctxt->userData = NULL;
10788 }
10789
10790 xmlParseDocument(ctxt);
10791 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10792 else {
10793 ret = NULL;
10794 xmlFreeDoc(ctxt->myDoc);
10795 ctxt->myDoc = NULL;
10796 }
10797 if (sax != NULL)
10798 ctxt->sax = NULL;
10799 xmlFreeParserCtxt(ctxt);
10800
10801 return(ret);
10802}
10803
10804/**
10805 * xmlParseDoc:
10806 * @cur: a pointer to an array of xmlChar
10807 *
10808 * parse an XML in-memory document and build a tree.
10809 *
10810 * Returns the resulting document tree
10811 */
10812
10813xmlDocPtr
10814xmlParseDoc(xmlChar *cur) {
10815 return(xmlSAXParseDoc(NULL, cur, 0));
10816}
10817
Daniel Veillard8107a222002-01-13 14:10:10 +000010818/************************************************************************
10819 * *
10820 * Specific function to keep track of entities references *
10821 * and used by the XSLT debugger *
10822 * *
10823 ************************************************************************/
10824
10825static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10826
10827/**
10828 * xmlAddEntityReference:
10829 * @ent : A valid entity
10830 * @firstNode : A valid first node for children of entity
10831 * @lastNode : A valid last node of children entity
10832 *
10833 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10834 */
10835static void
10836xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10837 xmlNodePtr lastNode)
10838{
10839 if (xmlEntityRefFunc != NULL) {
10840 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10841 }
10842}
10843
10844
10845/**
10846 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000010847 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000010848 *
10849 * Set the function to call call back when a xml reference has been made
10850 */
10851void
10852xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10853{
10854 xmlEntityRefFunc = func;
10855}
Owen Taylor3473f882001-02-23 17:55:21 +000010856
10857/************************************************************************
10858 * *
10859 * Miscellaneous *
10860 * *
10861 ************************************************************************/
10862
10863#ifdef LIBXML_XPATH_ENABLED
10864#include <libxml/xpath.h>
10865#endif
10866
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010867extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010868static int xmlParserInitialized = 0;
10869
10870/**
10871 * xmlInitParser:
10872 *
10873 * Initialization function for the XML parser.
10874 * This is not reentrant. Call once before processing in case of
10875 * use in multithreaded programs.
10876 */
10877
10878void
10879xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010880 if (xmlParserInitialized != 0)
10881 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010882
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010883 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10884 (xmlGenericError == NULL))
10885 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010886 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010887 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010888 xmlInitCharEncodingHandlers();
10889 xmlInitializePredefinedEntities();
10890 xmlDefaultSAXHandlerInit();
10891 xmlRegisterDefaultInputCallbacks();
10892 xmlRegisterDefaultOutputCallbacks();
10893#ifdef LIBXML_HTML_ENABLED
10894 htmlInitAutoClose();
10895 htmlDefaultSAXHandlerInit();
10896#endif
10897#ifdef LIBXML_XPATH_ENABLED
10898 xmlXPathInit();
10899#endif
10900 xmlParserInitialized = 1;
10901}
10902
10903/**
10904 * xmlCleanupParser:
10905 *
10906 * Cleanup function for the XML parser. It tries to reclaim all
10907 * parsing related global memory allocated for the parser processing.
10908 * It doesn't deallocate any document related memory. Calling this
10909 * function should not prevent reusing the parser.
10910 */
10911
10912void
10913xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010914 xmlCleanupCharEncodingHandlers();
10915 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010916#ifdef LIBXML_CATALOG_ENABLED
10917 xmlCatalogCleanup();
10918#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010919 xmlCleanupThreads();
10920 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010921}