blob: d281a7c52713a327b331d30f39cbed276adafe63 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Daniel Veillard328f48c2002-11-15 15:24:34 +0000108static int
109xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
110 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000111/************************************************************************
112 * *
113 * Parser stacks related functions and macros *
114 * *
115 ************************************************************************/
116
117xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
118 const xmlChar ** str);
119
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000120/**
121 * inputPush:
122 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000123 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000124 *
125 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000126 *
127 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000129extern int
130inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
131{
132 if (ctxt->inputNr >= ctxt->inputMax) {
133 ctxt->inputMax *= 2;
134 ctxt->inputTab =
135 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
136 ctxt->inputMax *
137 sizeof(ctxt->inputTab[0]));
138 if (ctxt->inputTab == NULL) {
139 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
140 return (0);
141 }
142 }
143 ctxt->inputTab[ctxt->inputNr] = value;
144 ctxt->input = value;
145 return (ctxt->inputNr++);
146}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000148 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000149 * @ctxt: an XML parser context
150 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000151 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000152 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000153 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000154 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000155extern xmlParserInputPtr
156inputPop(xmlParserCtxtPtr ctxt)
157{
158 xmlParserInputPtr ret;
159
160 if (ctxt->inputNr <= 0)
161 return (0);
162 ctxt->inputNr--;
163 if (ctxt->inputNr > 0)
164 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
165 else
166 ctxt->input = NULL;
167 ret = ctxt->inputTab[ctxt->inputNr];
168 ctxt->inputTab[ctxt->inputNr] = 0;
169 return (ret);
170}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000171/**
172 * nodePush:
173 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000174 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000175 *
176 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000177 *
178 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000180extern int
181nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
182{
183 if (ctxt->nodeNr >= ctxt->nodeMax) {
184 ctxt->nodeMax *= 2;
185 ctxt->nodeTab =
186 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
187 ctxt->nodeMax *
188 sizeof(ctxt->nodeTab[0]));
189 if (ctxt->nodeTab == NULL) {
190 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
191 return (0);
192 }
193 }
194 ctxt->nodeTab[ctxt->nodeNr] = value;
195 ctxt->node = value;
196 return (ctxt->nodeNr++);
197}
198/**
199 * nodePop:
200 * @ctxt: an XML parser context
201 *
202 * Pops the top element node from the node stack
203 *
204 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000205 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000206extern xmlNodePtr
207nodePop(xmlParserCtxtPtr ctxt)
208{
209 xmlNodePtr ret;
210
211 if (ctxt->nodeNr <= 0)
212 return (0);
213 ctxt->nodeNr--;
214 if (ctxt->nodeNr > 0)
215 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
216 else
217 ctxt->node = NULL;
218 ret = ctxt->nodeTab[ctxt->nodeNr];
219 ctxt->nodeTab[ctxt->nodeNr] = 0;
220 return (ret);
221}
222/**
223 * namePush:
224 * @ctxt: an XML parser context
225 * @value: the element name
226 *
227 * Pushes a new element name on top of the name stack
228 *
229 * Returns 0 in case of error, the index in the stack otherwise
230 */
231extern int
232namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
233{
234 if (ctxt->nameNr >= ctxt->nameMax) {
235 ctxt->nameMax *= 2;
236 ctxt->nameTab =
237 (xmlChar * *)xmlRealloc(ctxt->nameTab,
238 ctxt->nameMax *
239 sizeof(ctxt->nameTab[0]));
240 if (ctxt->nameTab == NULL) {
241 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
242 return (0);
243 }
244 }
245 ctxt->nameTab[ctxt->nameNr] = value;
246 ctxt->name = value;
247 return (ctxt->nameNr++);
248}
249/**
250 * namePop:
251 * @ctxt: an XML parser context
252 *
253 * Pops the top element name from the name stack
254 *
255 * Returns the name just removed
256 */
257extern xmlChar *
258namePop(xmlParserCtxtPtr ctxt)
259{
260 xmlChar *ret;
261
262 if (ctxt->nameNr <= 0)
263 return (0);
264 ctxt->nameNr--;
265 if (ctxt->nameNr > 0)
266 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
267 else
268 ctxt->name = NULL;
269 ret = ctxt->nameTab[ctxt->nameNr];
270 ctxt->nameTab[ctxt->nameNr] = 0;
271 return (ret);
272}
Owen Taylor3473f882001-02-23 17:55:21 +0000273
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000274static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000275 if (ctxt->spaceNr >= ctxt->spaceMax) {
276 ctxt->spaceMax *= 2;
277 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
278 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
279 if (ctxt->spaceTab == NULL) {
280 xmlGenericError(xmlGenericErrorContext,
281 "realloc failed !\n");
282 return(0);
283 }
284 }
285 ctxt->spaceTab[ctxt->spaceNr] = val;
286 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
287 return(ctxt->spaceNr++);
288}
289
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000290static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000291 int ret;
292 if (ctxt->spaceNr <= 0) return(0);
293 ctxt->spaceNr--;
294 if (ctxt->spaceNr > 0)
295 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
296 else
297 ctxt->space = NULL;
298 ret = ctxt->spaceTab[ctxt->spaceNr];
299 ctxt->spaceTab[ctxt->spaceNr] = -1;
300 return(ret);
301}
302
303/*
304 * Macros for accessing the content. Those should be used only by the parser,
305 * and not exported.
306 *
307 * Dirty macros, i.e. one often need to make assumption on the context to
308 * use them
309 *
310 * CUR_PTR return the current pointer to the xmlChar to be parsed.
311 * To be used with extreme caution since operations consuming
312 * characters may move the input buffer to a different location !
313 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
314 * This should be used internally by the parser
315 * only to compare to ASCII values otherwise it would break when
316 * running with UTF-8 encoding.
317 * RAW same as CUR but in the input buffer, bypass any token
318 * extraction that may have been done
319 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
320 * to compare on ASCII based substring.
321 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
322 * strings within the parser.
323 *
324 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
325 *
326 * NEXT Skip to the next character, this does the proper decoding
327 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000328 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000329 * CUR_CHAR(l) returns the current unicode character (int), set l
330 * to the number of xmlChars used for the encoding [0-5].
331 * CUR_SCHAR same but operate on a string instead of the context
332 * COPY_BUF copy the current unicode char to the target buffer, increment
333 * the index
334 * GROW, SHRINK handling of input buffers
335 */
336
Daniel Veillardfdc91562002-07-01 21:52:03 +0000337#define RAW (*ctxt->input->cur)
338#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000339#define NXT(val) ctxt->input->cur[(val)]
340#define CUR_PTR ctxt->input->cur
341
342#define SKIP(val) do { \
343 ctxt->nbChars += (val),ctxt->input->cur += (val); \
344 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000345 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000346 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
347 xmlPopInput(ctxt); \
348 } while (0)
349
Daniel Veillard46de64e2002-05-29 08:21:33 +0000350#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
351 xmlSHRINK (ctxt);
352
353static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
354 xmlParserInputShrink(ctxt->input);
355 if ((*ctxt->input->cur == 0) &&
356 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
357 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000358 }
Owen Taylor3473f882001-02-23 17:55:21 +0000359
Daniel Veillard46de64e2002-05-29 08:21:33 +0000360#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
361 xmlGROW (ctxt);
362
363static void xmlGROW (xmlParserCtxtPtr ctxt) {
364 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
365 if ((*ctxt->input->cur == 0) &&
366 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
367 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000368 }
Owen Taylor3473f882001-02-23 17:55:21 +0000369
370#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
371
372#define NEXT xmlNextChar(ctxt)
373
Daniel Veillard21a0f912001-02-25 19:54:14 +0000374#define NEXT1 { \
375 ctxt->input->cur++; \
376 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000377 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000378 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
379 }
380
Owen Taylor3473f882001-02-23 17:55:21 +0000381#define NEXTL(l) do { \
382 if (*(ctxt->input->cur) == '\n') { \
383 ctxt->input->line++; ctxt->input->col = 1; \
384 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000385 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000386 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000387 } while (0)
388
389#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
390#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
391
392#define COPY_BUF(l,b,i,v) \
393 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000394 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000395
396/**
397 * xmlSkipBlankChars:
398 * @ctxt: the XML parser context
399 *
400 * skip all blanks character found at that point in the input streams.
401 * It pops up finished entities in the process if allowable at that point.
402 *
403 * Returns the number of space chars skipped
404 */
405
406int
407xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000408 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000409
410 /*
411 * It's Okay to use CUR/NEXT here since all the blanks are on
412 * the ASCII range.
413 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000414 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
415 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000416 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000417 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000418 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000419 cur = ctxt->input->cur;
420 while (IS_BLANK(*cur)) {
421 if (*cur == '\n') {
422 ctxt->input->line++; ctxt->input->col = 1;
423 }
424 cur++;
425 res++;
426 if (*cur == 0) {
427 ctxt->input->cur = cur;
428 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
429 cur = ctxt->input->cur;
430 }
431 }
432 ctxt->input->cur = cur;
433 } else {
434 int cur;
435 do {
436 cur = CUR;
437 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
438 NEXT;
439 cur = CUR;
440 res++;
441 }
442 while ((cur == 0) && (ctxt->inputNr > 1) &&
443 (ctxt->instate != XML_PARSER_COMMENT)) {
444 xmlPopInput(ctxt);
445 cur = CUR;
446 }
447 /*
448 * Need to handle support of entities branching here
449 */
450 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
451 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
452 }
Owen Taylor3473f882001-02-23 17:55:21 +0000453 return(res);
454}
455
456/************************************************************************
457 * *
458 * Commodity functions to handle entities *
459 * *
460 ************************************************************************/
461
462/**
463 * xmlPopInput:
464 * @ctxt: an XML parser context
465 *
466 * xmlPopInput: the current input pointed by ctxt->input came to an end
467 * pop it and return the next char.
468 *
469 * Returns the current xmlChar in the parser context
470 */
471xmlChar
472xmlPopInput(xmlParserCtxtPtr ctxt) {
473 if (ctxt->inputNr == 1) return(0); /* End of main Input */
474 if (xmlParserDebugEntities)
475 xmlGenericError(xmlGenericErrorContext,
476 "Popping input %d\n", ctxt->inputNr);
477 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000478 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000479 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
480 return(xmlPopInput(ctxt));
481 return(CUR);
482}
483
484/**
485 * xmlPushInput:
486 * @ctxt: an XML parser context
487 * @input: an XML parser input fragment (entity, XML fragment ...).
488 *
489 * xmlPushInput: switch to a new input stream which is stacked on top
490 * of the previous one(s).
491 */
492void
493xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
494 if (input == NULL) return;
495
496 if (xmlParserDebugEntities) {
497 if ((ctxt->input != NULL) && (ctxt->input->filename))
498 xmlGenericError(xmlGenericErrorContext,
499 "%s(%d): ", ctxt->input->filename,
500 ctxt->input->line);
501 xmlGenericError(xmlGenericErrorContext,
502 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
503 }
504 inputPush(ctxt, input);
505 GROW;
506}
507
508/**
509 * xmlParseCharRef:
510 * @ctxt: an XML parser context
511 *
512 * parse Reference declarations
513 *
514 * [66] CharRef ::= '&#' [0-9]+ ';' |
515 * '&#x' [0-9a-fA-F]+ ';'
516 *
517 * [ WFC: Legal Character ]
518 * Characters referred to using character references must match the
519 * production for Char.
520 *
521 * Returns the value parsed (as an int), 0 in case of error
522 */
523int
524xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000525 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000526 int count = 0;
527
Owen Taylor3473f882001-02-23 17:55:21 +0000528 /*
529 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
530 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000531 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000532 (NXT(2) == 'x')) {
533 SKIP(3);
534 GROW;
535 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000536 if (count++ > 20) {
537 count = 0;
538 GROW;
539 }
540 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000541 val = val * 16 + (CUR - '0');
542 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
543 val = val * 16 + (CUR - 'a') + 10;
544 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
545 val = val * 16 + (CUR - 'A') + 10;
546 else {
547 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
549 ctxt->sax->error(ctxt->userData,
550 "xmlParseCharRef: invalid hexadecimal value\n");
551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000553 val = 0;
554 break;
555 }
556 NEXT;
557 count++;
558 }
559 if (RAW == ';') {
560 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
561 ctxt->nbChars ++;
562 ctxt->input->cur++;
563 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000564 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000565 SKIP(2);
566 GROW;
567 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000568 if (count++ > 20) {
569 count = 0;
570 GROW;
571 }
572 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000573 val = val * 10 + (CUR - '0');
574 else {
575 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
577 ctxt->sax->error(ctxt->userData,
578 "xmlParseCharRef: invalid decimal value\n");
579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000581 val = 0;
582 break;
583 }
584 NEXT;
585 count++;
586 }
587 if (RAW == ';') {
588 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
589 ctxt->nbChars ++;
590 ctxt->input->cur++;
591 }
592 } else {
593 ctxt->errNo = XML_ERR_INVALID_CHARREF;
594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
595 ctxt->sax->error(ctxt->userData,
596 "xmlParseCharRef: invalid value\n");
597 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000599 }
600
601 /*
602 * [ WFC: Legal Character ]
603 * Characters referred to using character references must match the
604 * production for Char.
605 */
606 if (IS_CHAR(val)) {
607 return(val);
608 } else {
609 ctxt->errNo = XML_ERR_INVALID_CHAR;
610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000611 ctxt->sax->error(ctxt->userData,
612 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000613 val);
614 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000615 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000616 }
617 return(0);
618}
619
620/**
621 * xmlParseStringCharRef:
622 * @ctxt: an XML parser context
623 * @str: a pointer to an index in the string
624 *
625 * parse Reference declarations, variant parsing from a string rather
626 * than an an input flow.
627 *
628 * [66] CharRef ::= '&#' [0-9]+ ';' |
629 * '&#x' [0-9a-fA-F]+ ';'
630 *
631 * [ WFC: Legal Character ]
632 * Characters referred to using character references must match the
633 * production for Char.
634 *
635 * Returns the value parsed (as an int), 0 in case of error, str will be
636 * updated to the current value of the index
637 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000638static int
Owen Taylor3473f882001-02-23 17:55:21 +0000639xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
640 const xmlChar *ptr;
641 xmlChar cur;
642 int val = 0;
643
644 if ((str == NULL) || (*str == NULL)) return(0);
645 ptr = *str;
646 cur = *ptr;
647 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
648 ptr += 3;
649 cur = *ptr;
650 while (cur != ';') { /* Non input consuming loop */
651 if ((cur >= '0') && (cur <= '9'))
652 val = val * 16 + (cur - '0');
653 else if ((cur >= 'a') && (cur <= 'f'))
654 val = val * 16 + (cur - 'a') + 10;
655 else if ((cur >= 'A') && (cur <= 'F'))
656 val = val * 16 + (cur - 'A') + 10;
657 else {
658 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
660 ctxt->sax->error(ctxt->userData,
661 "xmlParseStringCharRef: invalid hexadecimal value\n");
662 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000663 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000664 val = 0;
665 break;
666 }
667 ptr++;
668 cur = *ptr;
669 }
670 if (cur == ';')
671 ptr++;
672 } else if ((cur == '&') && (ptr[1] == '#')){
673 ptr += 2;
674 cur = *ptr;
675 while (cur != ';') { /* Non input consuming loops */
676 if ((cur >= '0') && (cur <= '9'))
677 val = val * 10 + (cur - '0');
678 else {
679 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
681 ctxt->sax->error(ctxt->userData,
682 "xmlParseStringCharRef: invalid decimal value\n");
683 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000684 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000685 val = 0;
686 break;
687 }
688 ptr++;
689 cur = *ptr;
690 }
691 if (cur == ';')
692 ptr++;
693 } else {
694 ctxt->errNo = XML_ERR_INVALID_CHARREF;
695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
696 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000697 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000698 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000699 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000700 return(0);
701 }
702 *str = ptr;
703
704 /*
705 * [ WFC: Legal Character ]
706 * Characters referred to using character references must match the
707 * production for Char.
708 */
709 if (IS_CHAR(val)) {
710 return(val);
711 } else {
712 ctxt->errNo = XML_ERR_INVALID_CHAR;
713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
714 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000715 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000716 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000717 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000718 }
719 return(0);
720}
721
722/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000723 * xmlNewBlanksWrapperInputStream:
724 * @ctxt: an XML parser context
725 * @entity: an Entity pointer
726 *
727 * Create a new input stream for wrapping
728 * blanks around a PEReference
729 *
730 * Returns the new input stream or NULL
731 */
732
733static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
734
Daniel Veillardf4862f02002-09-10 11:13:43 +0000735static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000736xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
737 xmlParserInputPtr input;
738 xmlChar *buffer;
739 size_t length;
740 if (entity == NULL) {
741 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
743 ctxt->sax->error(ctxt->userData,
744 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
745 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
746 return(NULL);
747 }
748 if (xmlParserDebugEntities)
749 xmlGenericError(xmlGenericErrorContext,
750 "new blanks wrapper for entity: %s\n", entity->name);
751 input = xmlNewInputStream(ctxt);
752 if (input == NULL) {
753 return(NULL);
754 }
755 length = xmlStrlen(entity->name) + 5;
756 buffer = xmlMalloc(length);
757 if (buffer == NULL) {
758 return(NULL);
759 }
760 buffer [0] = ' ';
761 buffer [1] = '%';
762 buffer [length-3] = ';';
763 buffer [length-2] = ' ';
764 buffer [length-1] = 0;
765 memcpy(buffer + 2, entity->name, length - 5);
766 input->free = deallocblankswrapper;
767 input->base = buffer;
768 input->cur = buffer;
769 input->length = length;
770 input->end = &buffer[length];
771 return(input);
772}
773
774/**
Owen Taylor3473f882001-02-23 17:55:21 +0000775 * xmlParserHandlePEReference:
776 * @ctxt: the parser context
777 *
778 * [69] PEReference ::= '%' Name ';'
779 *
780 * [ WFC: No Recursion ]
781 * A parsed entity must not contain a recursive
782 * reference to itself, either directly or indirectly.
783 *
784 * [ WFC: Entity Declared ]
785 * In a document without any DTD, a document with only an internal DTD
786 * subset which contains no parameter entity references, or a document
787 * with "standalone='yes'", ... ... The declaration of a parameter
788 * entity must precede any reference to it...
789 *
790 * [ VC: Entity Declared ]
791 * In a document with an external subset or external parameter entities
792 * with "standalone='no'", ... ... The declaration of a parameter entity
793 * must precede any reference to it...
794 *
795 * [ WFC: In DTD ]
796 * Parameter-entity references may only appear in the DTD.
797 * NOTE: misleading but this is handled.
798 *
799 * A PEReference may have been detected in the current input stream
800 * the handling is done accordingly to
801 * http://www.w3.org/TR/REC-xml#entproc
802 * i.e.
803 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000804 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000805 */
806void
807xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
808 xmlChar *name;
809 xmlEntityPtr entity = NULL;
810 xmlParserInputPtr input;
811
Owen Taylor3473f882001-02-23 17:55:21 +0000812 if (RAW != '%') return;
813 switch(ctxt->instate) {
814 case XML_PARSER_CDATA_SECTION:
815 return;
816 case XML_PARSER_COMMENT:
817 return;
818 case XML_PARSER_START_TAG:
819 return;
820 case XML_PARSER_END_TAG:
821 return;
822 case XML_PARSER_EOF:
823 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
825 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
826 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000827 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000828 return;
829 case XML_PARSER_PROLOG:
830 case XML_PARSER_START:
831 case XML_PARSER_MISC:
832 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
834 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
835 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000836 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000837 return;
838 case XML_PARSER_ENTITY_DECL:
839 case XML_PARSER_CONTENT:
840 case XML_PARSER_ATTRIBUTE_VALUE:
841 case XML_PARSER_PI:
842 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000843 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000844 /* we just ignore it there */
845 return;
846 case XML_PARSER_EPILOG:
847 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
849 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
850 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000851 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000852 return;
853 case XML_PARSER_ENTITY_VALUE:
854 /*
855 * NOTE: in the case of entity values, we don't do the
856 * substitution here since we need the literal
857 * entity value to be able to save the internal
858 * subset of the document.
859 * This will be handled by xmlStringDecodeEntities
860 */
861 return;
862 case XML_PARSER_DTD:
863 /*
864 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
865 * In the internal DTD subset, parameter-entity references
866 * can occur only where markup declarations can occur, not
867 * within markup declarations.
868 * In that case this is handled in xmlParseMarkupDecl
869 */
870 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
871 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000872 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
873 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000874 break;
875 case XML_PARSER_IGNORE:
876 return;
877 }
878
879 NEXT;
880 name = xmlParseName(ctxt);
881 if (xmlParserDebugEntities)
882 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000883 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000884 if (name == NULL) {
885 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000887 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000888 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000889 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000890 } else {
891 if (RAW == ';') {
892 NEXT;
893 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
894 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
895 if (entity == NULL) {
896
897 /*
898 * [ WFC: Entity Declared ]
899 * In a document without any DTD, a document with only an
900 * internal DTD subset which contains no parameter entity
901 * references, or a document with "standalone='yes'", ...
902 * ... The declaration of a parameter entity must precede
903 * any reference to it...
904 */
905 if ((ctxt->standalone == 1) ||
906 ((ctxt->hasExternalSubset == 0) &&
907 (ctxt->hasPErefs == 0))) {
908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
909 ctxt->sax->error(ctxt->userData,
910 "PEReference: %%%s; not found\n", name);
911 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000912 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000913 } else {
914 /*
915 * [ VC: Entity Declared ]
916 * In a document with an external subset or external
917 * parameter entities with "standalone='no'", ...
918 * ... The declaration of a parameter entity must precede
919 * any reference to it...
920 */
921 if ((!ctxt->disableSAX) &&
922 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
923 ctxt->vctxt.error(ctxt->vctxt.userData,
924 "PEReference: %%%s; not found\n", name);
925 } else if ((!ctxt->disableSAX) &&
926 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
927 ctxt->sax->warning(ctxt->userData,
928 "PEReference: %%%s; not found\n", name);
929 ctxt->valid = 0;
930 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000931 } else if (ctxt->input->free != deallocblankswrapper) {
932 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
933 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000934 } else {
935 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
936 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000937 xmlChar start[4];
938 xmlCharEncoding enc;
939
Owen Taylor3473f882001-02-23 17:55:21 +0000940 /*
941 * handle the extra spaces added before and after
942 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000943 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000944 */
945 input = xmlNewEntityInputStream(ctxt, entity);
946 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000947
948 /*
949 * Get the 4 first bytes and decode the charset
950 * if enc != XML_CHAR_ENCODING_NONE
951 * plug some encoding conversion routines.
952 */
953 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000954 if (entity->length >= 4) {
955 start[0] = RAW;
956 start[1] = NXT(1);
957 start[2] = NXT(2);
958 start[3] = NXT(3);
959 enc = xmlDetectCharEncoding(start, 4);
960 if (enc != XML_CHAR_ENCODING_NONE) {
961 xmlSwitchEncoding(ctxt, enc);
962 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000963 }
964
Owen Taylor3473f882001-02-23 17:55:21 +0000965 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
966 (RAW == '<') && (NXT(1) == '?') &&
967 (NXT(2) == 'x') && (NXT(3) == 'm') &&
968 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
969 xmlParseTextDecl(ctxt);
970 }
Owen Taylor3473f882001-02-23 17:55:21 +0000971 } else {
972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
973 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000974 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000975 name);
976 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000977 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000978 }
979 }
980 } else {
981 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
983 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000984 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000985 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000986 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000987 }
988 xmlFree(name);
989 }
990}
991
992/*
993 * Macro used to grow the current buffer.
994 */
995#define growBuffer(buffer) { \
996 buffer##_size *= 2; \
997 buffer = (xmlChar *) \
998 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
999 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001000 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001001 return(NULL); \
1002 } \
1003}
1004
1005/**
1006 * xmlStringDecodeEntities:
1007 * @ctxt: the parser context
1008 * @str: the input string
1009 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1010 * @end: an end marker xmlChar, 0 if none
1011 * @end2: an end marker xmlChar, 0 if none
1012 * @end3: an end marker xmlChar, 0 if none
1013 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001014 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001015 *
1016 * [67] Reference ::= EntityRef | CharRef
1017 *
1018 * [69] PEReference ::= '%' Name ';'
1019 *
1020 * Returns A newly allocated string with the substitution done. The caller
1021 * must deallocate it !
1022 */
1023xmlChar *
1024xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1025 xmlChar end, xmlChar end2, xmlChar end3) {
1026 xmlChar *buffer = NULL;
1027 int buffer_size = 0;
1028
1029 xmlChar *current = NULL;
1030 xmlEntityPtr ent;
1031 int c,l;
1032 int nbchars = 0;
1033
1034 if (str == NULL)
1035 return(NULL);
1036
1037 if (ctxt->depth > 40) {
1038 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1040 ctxt->sax->error(ctxt->userData,
1041 "Detected entity reference loop\n");
1042 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001043 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001044 return(NULL);
1045 }
1046
1047 /*
1048 * allocate a translation buffer.
1049 */
1050 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1051 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1052 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001053 xmlGenericError(xmlGenericErrorContext,
1054 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001055 return(NULL);
1056 }
1057
1058 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001059 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001060 * we are operating on already parsed values.
1061 */
1062 c = CUR_SCHAR(str, l);
1063 while ((c != 0) && (c != end) && /* non input consuming loop */
1064 (c != end2) && (c != end3)) {
1065
1066 if (c == 0) break;
1067 if ((c == '&') && (str[1] == '#')) {
1068 int val = xmlParseStringCharRef(ctxt, &str);
1069 if (val != 0) {
1070 COPY_BUF(0,buffer,nbchars,val);
1071 }
1072 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1073 if (xmlParserDebugEntities)
1074 xmlGenericError(xmlGenericErrorContext,
1075 "String decoding Entity Reference: %.30s\n",
1076 str);
1077 ent = xmlParseStringEntityRef(ctxt, &str);
1078 if ((ent != NULL) &&
1079 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1080 if (ent->content != NULL) {
1081 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1082 } else {
1083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1084 ctxt->sax->error(ctxt->userData,
1085 "internal error entity has no content\n");
1086 }
1087 } else if ((ent != NULL) && (ent->content != NULL)) {
1088 xmlChar *rep;
1089
1090 ctxt->depth++;
1091 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1092 0, 0, 0);
1093 ctxt->depth--;
1094 if (rep != NULL) {
1095 current = rep;
1096 while (*current != 0) { /* non input consuming loop */
1097 buffer[nbchars++] = *current++;
1098 if (nbchars >
1099 buffer_size - XML_PARSER_BUFFER_SIZE) {
1100 growBuffer(buffer);
1101 }
1102 }
1103 xmlFree(rep);
1104 }
1105 } else if (ent != NULL) {
1106 int i = xmlStrlen(ent->name);
1107 const xmlChar *cur = ent->name;
1108
1109 buffer[nbchars++] = '&';
1110 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1111 growBuffer(buffer);
1112 }
1113 for (;i > 0;i--)
1114 buffer[nbchars++] = *cur++;
1115 buffer[nbchars++] = ';';
1116 }
1117 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1118 if (xmlParserDebugEntities)
1119 xmlGenericError(xmlGenericErrorContext,
1120 "String decoding PE Reference: %.30s\n", str);
1121 ent = xmlParseStringPEReference(ctxt, &str);
1122 if (ent != NULL) {
1123 xmlChar *rep;
1124
1125 ctxt->depth++;
1126 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1127 0, 0, 0);
1128 ctxt->depth--;
1129 if (rep != NULL) {
1130 current = rep;
1131 while (*current != 0) { /* non input consuming loop */
1132 buffer[nbchars++] = *current++;
1133 if (nbchars >
1134 buffer_size - XML_PARSER_BUFFER_SIZE) {
1135 growBuffer(buffer);
1136 }
1137 }
1138 xmlFree(rep);
1139 }
1140 }
1141 } else {
1142 COPY_BUF(l,buffer,nbchars,c);
1143 str += l;
1144 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1145 growBuffer(buffer);
1146 }
1147 }
1148 c = CUR_SCHAR(str, l);
1149 }
1150 buffer[nbchars++] = 0;
1151 return(buffer);
1152}
1153
1154
1155/************************************************************************
1156 * *
1157 * Commodity functions to handle xmlChars *
1158 * *
1159 ************************************************************************/
1160
1161/**
1162 * xmlStrndup:
1163 * @cur: the input xmlChar *
1164 * @len: the len of @cur
1165 *
1166 * a strndup for array of xmlChar's
1167 *
1168 * Returns a new xmlChar * or NULL
1169 */
1170xmlChar *
1171xmlStrndup(const xmlChar *cur, int len) {
1172 xmlChar *ret;
1173
1174 if ((cur == NULL) || (len < 0)) return(NULL);
1175 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1176 if (ret == NULL) {
1177 xmlGenericError(xmlGenericErrorContext,
1178 "malloc of %ld byte failed\n",
1179 (len + 1) * (long)sizeof(xmlChar));
1180 return(NULL);
1181 }
1182 memcpy(ret, cur, len * sizeof(xmlChar));
1183 ret[len] = 0;
1184 return(ret);
1185}
1186
1187/**
1188 * xmlStrdup:
1189 * @cur: the input xmlChar *
1190 *
1191 * a strdup for array of xmlChar's. Since they are supposed to be
1192 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1193 * a termination mark of '0'.
1194 *
1195 * Returns a new xmlChar * or NULL
1196 */
1197xmlChar *
1198xmlStrdup(const xmlChar *cur) {
1199 const xmlChar *p = cur;
1200
1201 if (cur == NULL) return(NULL);
1202 while (*p != 0) p++; /* non input consuming */
1203 return(xmlStrndup(cur, p - cur));
1204}
1205
1206/**
1207 * xmlCharStrndup:
1208 * @cur: the input char *
1209 * @len: the len of @cur
1210 *
1211 * a strndup for char's to xmlChar's
1212 *
1213 * Returns a new xmlChar * or NULL
1214 */
1215
1216xmlChar *
1217xmlCharStrndup(const char *cur, int len) {
1218 int i;
1219 xmlChar *ret;
1220
1221 if ((cur == NULL) || (len < 0)) return(NULL);
1222 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1223 if (ret == NULL) {
1224 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1225 (len + 1) * (long)sizeof(xmlChar));
1226 return(NULL);
1227 }
1228 for (i = 0;i < len;i++)
1229 ret[i] = (xmlChar) cur[i];
1230 ret[len] = 0;
1231 return(ret);
1232}
1233
1234/**
1235 * xmlCharStrdup:
1236 * @cur: the input char *
1237 * @len: the len of @cur
1238 *
1239 * a strdup for char's to xmlChar's
1240 *
1241 * Returns a new xmlChar * or NULL
1242 */
1243
1244xmlChar *
1245xmlCharStrdup(const char *cur) {
1246 const char *p = cur;
1247
1248 if (cur == NULL) return(NULL);
1249 while (*p != '\0') p++; /* non input consuming */
1250 return(xmlCharStrndup(cur, p - cur));
1251}
1252
1253/**
1254 * xmlStrcmp:
1255 * @str1: the first xmlChar *
1256 * @str2: the second xmlChar *
1257 *
1258 * a strcmp for xmlChar's
1259 *
1260 * Returns the integer result of the comparison
1261 */
1262
1263int
1264xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1265 register int tmp;
1266
1267 if (str1 == str2) return(0);
1268 if (str1 == NULL) return(-1);
1269 if (str2 == NULL) return(1);
1270 do {
1271 tmp = *str1++ - *str2;
1272 if (tmp != 0) return(tmp);
1273 } while (*str2++ != 0);
1274 return 0;
1275}
1276
1277/**
1278 * xmlStrEqual:
1279 * @str1: the first xmlChar *
1280 * @str2: the second xmlChar *
1281 *
1282 * Check if both string are equal of have same content
1283 * Should be a bit more readable and faster than xmlStrEqual()
1284 *
1285 * Returns 1 if they are equal, 0 if they are different
1286 */
1287
1288int
1289xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1290 if (str1 == str2) return(1);
1291 if (str1 == NULL) return(0);
1292 if (str2 == NULL) return(0);
1293 do {
1294 if (*str1++ != *str2) return(0);
1295 } while (*str2++);
1296 return(1);
1297}
1298
1299/**
1300 * xmlStrncmp:
1301 * @str1: the first xmlChar *
1302 * @str2: the second xmlChar *
1303 * @len: the max comparison length
1304 *
1305 * a strncmp for xmlChar's
1306 *
1307 * Returns the integer result of the comparison
1308 */
1309
1310int
1311xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1312 register int tmp;
1313
1314 if (len <= 0) return(0);
1315 if (str1 == str2) return(0);
1316 if (str1 == NULL) return(-1);
1317 if (str2 == NULL) return(1);
1318 do {
1319 tmp = *str1++ - *str2;
1320 if (tmp != 0 || --len == 0) return(tmp);
1321 } while (*str2++ != 0);
1322 return 0;
1323}
1324
Daniel Veillardb44025c2001-10-11 22:55:55 +00001325static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001326 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1327 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1328 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1329 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1330 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1331 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1332 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1333 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1334 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1335 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1336 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1337 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1338 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1339 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1340 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1341 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1342 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1343 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1344 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1345 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1346 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1347 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1348 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1349 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1350 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1351 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1352 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1353 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1354 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1355 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1356 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1357 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1358};
1359
1360/**
1361 * xmlStrcasecmp:
1362 * @str1: the first xmlChar *
1363 * @str2: the second xmlChar *
1364 *
1365 * a strcasecmp for xmlChar's
1366 *
1367 * Returns the integer result of the comparison
1368 */
1369
1370int
1371xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1372 register int tmp;
1373
1374 if (str1 == str2) return(0);
1375 if (str1 == NULL) return(-1);
1376 if (str2 == NULL) return(1);
1377 do {
1378 tmp = casemap[*str1++] - casemap[*str2];
1379 if (tmp != 0) return(tmp);
1380 } while (*str2++ != 0);
1381 return 0;
1382}
1383
1384/**
1385 * xmlStrncasecmp:
1386 * @str1: the first xmlChar *
1387 * @str2: the second xmlChar *
1388 * @len: the max comparison length
1389 *
1390 * a strncasecmp for xmlChar's
1391 *
1392 * Returns the integer result of the comparison
1393 */
1394
1395int
1396xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1397 register int tmp;
1398
1399 if (len <= 0) return(0);
1400 if (str1 == str2) return(0);
1401 if (str1 == NULL) return(-1);
1402 if (str2 == NULL) return(1);
1403 do {
1404 tmp = casemap[*str1++] - casemap[*str2];
1405 if (tmp != 0 || --len == 0) return(tmp);
1406 } while (*str2++ != 0);
1407 return 0;
1408}
1409
1410/**
1411 * xmlStrchr:
1412 * @str: the xmlChar * array
1413 * @val: the xmlChar to search
1414 *
1415 * a strchr for xmlChar's
1416 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001417 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001418 */
1419
1420const xmlChar *
1421xmlStrchr(const xmlChar *str, xmlChar val) {
1422 if (str == NULL) return(NULL);
1423 while (*str != 0) { /* non input consuming */
1424 if (*str == val) return((xmlChar *) str);
1425 str++;
1426 }
1427 return(NULL);
1428}
1429
1430/**
1431 * xmlStrstr:
1432 * @str: the xmlChar * array (haystack)
1433 * @val: the xmlChar to search (needle)
1434 *
1435 * a strstr for xmlChar's
1436 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001437 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001438 */
1439
1440const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001441xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001442 int n;
1443
1444 if (str == NULL) return(NULL);
1445 if (val == NULL) return(NULL);
1446 n = xmlStrlen(val);
1447
1448 if (n == 0) return(str);
1449 while (*str != 0) { /* non input consuming */
1450 if (*str == *val) {
1451 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1452 }
1453 str++;
1454 }
1455 return(NULL);
1456}
1457
1458/**
1459 * xmlStrcasestr:
1460 * @str: the xmlChar * array (haystack)
1461 * @val: the xmlChar to search (needle)
1462 *
1463 * a case-ignoring strstr for xmlChar's
1464 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001465 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001466 */
1467
1468const xmlChar *
1469xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1470 int n;
1471
1472 if (str == NULL) return(NULL);
1473 if (val == NULL) return(NULL);
1474 n = xmlStrlen(val);
1475
1476 if (n == 0) return(str);
1477 while (*str != 0) { /* non input consuming */
1478 if (casemap[*str] == casemap[*val])
1479 if (!xmlStrncasecmp(str, val, n)) return(str);
1480 str++;
1481 }
1482 return(NULL);
1483}
1484
1485/**
1486 * xmlStrsub:
1487 * @str: the xmlChar * array (haystack)
1488 * @start: the index of the first char (zero based)
1489 * @len: the length of the substring
1490 *
1491 * Extract a substring of a given string
1492 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001493 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001494 */
1495
1496xmlChar *
1497xmlStrsub(const xmlChar *str, int start, int len) {
1498 int i;
1499
1500 if (str == NULL) return(NULL);
1501 if (start < 0) return(NULL);
1502 if (len < 0) return(NULL);
1503
1504 for (i = 0;i < start;i++) {
1505 if (*str == 0) return(NULL);
1506 str++;
1507 }
1508 if (*str == 0) return(NULL);
1509 return(xmlStrndup(str, len));
1510}
1511
1512/**
1513 * xmlStrlen:
1514 * @str: the xmlChar * array
1515 *
1516 * length of a xmlChar's string
1517 *
1518 * Returns the number of xmlChar contained in the ARRAY.
1519 */
1520
1521int
1522xmlStrlen(const xmlChar *str) {
1523 int len = 0;
1524
1525 if (str == NULL) return(0);
1526 while (*str != 0) { /* non input consuming */
1527 str++;
1528 len++;
1529 }
1530 return(len);
1531}
1532
1533/**
1534 * xmlStrncat:
1535 * @cur: the original xmlChar * array
1536 * @add: the xmlChar * array added
1537 * @len: the length of @add
1538 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001539 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001540 * first bytes of @add.
1541 *
1542 * Returns a new xmlChar *, the original @cur is reallocated if needed
1543 * and should not be freed
1544 */
1545
1546xmlChar *
1547xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1548 int size;
1549 xmlChar *ret;
1550
1551 if ((add == NULL) || (len == 0))
1552 return(cur);
1553 if (cur == NULL)
1554 return(xmlStrndup(add, len));
1555
1556 size = xmlStrlen(cur);
1557 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1558 if (ret == NULL) {
1559 xmlGenericError(xmlGenericErrorContext,
1560 "xmlStrncat: realloc of %ld byte failed\n",
1561 (size + len + 1) * (long)sizeof(xmlChar));
1562 return(cur);
1563 }
1564 memcpy(&ret[size], add, len * sizeof(xmlChar));
1565 ret[size + len] = 0;
1566 return(ret);
1567}
1568
1569/**
1570 * xmlStrcat:
1571 * @cur: the original xmlChar * array
1572 * @add: the xmlChar * array added
1573 *
1574 * a strcat for array of xmlChar's. Since they are supposed to be
1575 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1576 * a termination mark of '0'.
1577 *
1578 * Returns a new xmlChar * containing the concatenated string.
1579 */
1580xmlChar *
1581xmlStrcat(xmlChar *cur, const xmlChar *add) {
1582 const xmlChar *p = add;
1583
1584 if (add == NULL) return(cur);
1585 if (cur == NULL)
1586 return(xmlStrdup(add));
1587
1588 while (*p != 0) p++; /* non input consuming */
1589 return(xmlStrncat(cur, add, p - add));
1590}
1591
1592/************************************************************************
1593 * *
1594 * Commodity functions, cleanup needed ? *
1595 * *
1596 ************************************************************************/
1597
1598/**
1599 * areBlanks:
1600 * @ctxt: an XML parser context
1601 * @str: a xmlChar *
1602 * @len: the size of @str
1603 *
1604 * Is this a sequence of blank chars that one can ignore ?
1605 *
1606 * Returns 1 if ignorable 0 otherwise.
1607 */
1608
1609static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1610 int i, ret;
1611 xmlNodePtr lastChild;
1612
Daniel Veillard05c13a22001-09-09 08:38:09 +00001613 /*
1614 * Don't spend time trying to differentiate them, the same callback is
1615 * used !
1616 */
1617 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001618 return(0);
1619
Owen Taylor3473f882001-02-23 17:55:21 +00001620 /*
1621 * Check for xml:space value.
1622 */
1623 if (*(ctxt->space) == 1)
1624 return(0);
1625
1626 /*
1627 * Check that the string is made of blanks
1628 */
1629 for (i = 0;i < len;i++)
1630 if (!(IS_BLANK(str[i]))) return(0);
1631
1632 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001633 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001634 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001635 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001636 if (ctxt->myDoc != NULL) {
1637 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1638 if (ret == 0) return(1);
1639 if (ret == 1) return(0);
1640 }
1641
1642 /*
1643 * Otherwise, heuristic :-\
1644 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001645 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001646 if ((ctxt->node->children == NULL) &&
1647 (RAW == '<') && (NXT(1) == '/')) return(0);
1648
1649 lastChild = xmlGetLastChild(ctxt->node);
1650 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001651 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1652 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001653 } else if (xmlNodeIsText(lastChild))
1654 return(0);
1655 else if ((ctxt->node->children != NULL) &&
1656 (xmlNodeIsText(ctxt->node->children)))
1657 return(0);
1658 return(1);
1659}
1660
Owen Taylor3473f882001-02-23 17:55:21 +00001661/************************************************************************
1662 * *
1663 * Extra stuff for namespace support *
1664 * Relates to http://www.w3.org/TR/WD-xml-names *
1665 * *
1666 ************************************************************************/
1667
1668/**
1669 * xmlSplitQName:
1670 * @ctxt: an XML parser context
1671 * @name: an XML parser context
1672 * @prefix: a xmlChar **
1673 *
1674 * parse an UTF8 encoded XML qualified name string
1675 *
1676 * [NS 5] QName ::= (Prefix ':')? LocalPart
1677 *
1678 * [NS 6] Prefix ::= NCName
1679 *
1680 * [NS 7] LocalPart ::= NCName
1681 *
1682 * Returns the local part, and prefix is updated
1683 * to get the Prefix if any.
1684 */
1685
1686xmlChar *
1687xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1688 xmlChar buf[XML_MAX_NAMELEN + 5];
1689 xmlChar *buffer = NULL;
1690 int len = 0;
1691 int max = XML_MAX_NAMELEN;
1692 xmlChar *ret = NULL;
1693 const xmlChar *cur = name;
1694 int c;
1695
1696 *prefix = NULL;
1697
1698#ifndef XML_XML_NAMESPACE
1699 /* xml: prefix is not really a namespace */
1700 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1701 (cur[2] == 'l') && (cur[3] == ':'))
1702 return(xmlStrdup(name));
1703#endif
1704
1705 /* nasty but valid */
1706 if (cur[0] == ':')
1707 return(xmlStrdup(name));
1708
1709 c = *cur++;
1710 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1711 buf[len++] = c;
1712 c = *cur++;
1713 }
1714 if (len >= max) {
1715 /*
1716 * Okay someone managed to make a huge name, so he's ready to pay
1717 * for the processing speed.
1718 */
1719 max = len * 2;
1720
1721 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1722 if (buffer == NULL) {
1723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1724 ctxt->sax->error(ctxt->userData,
1725 "xmlSplitQName: out of memory\n");
1726 return(NULL);
1727 }
1728 memcpy(buffer, buf, len);
1729 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1730 if (len + 10 > max) {
1731 max *= 2;
1732 buffer = (xmlChar *) xmlRealloc(buffer,
1733 max * sizeof(xmlChar));
1734 if (buffer == NULL) {
1735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1736 ctxt->sax->error(ctxt->userData,
1737 "xmlSplitQName: out of memory\n");
1738 return(NULL);
1739 }
1740 }
1741 buffer[len++] = c;
1742 c = *cur++;
1743 }
1744 buffer[len] = 0;
1745 }
1746
1747 if (buffer == NULL)
1748 ret = xmlStrndup(buf, len);
1749 else {
1750 ret = buffer;
1751 buffer = NULL;
1752 max = XML_MAX_NAMELEN;
1753 }
1754
1755
1756 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001757 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001758 if (c == 0) return(ret);
1759 *prefix = ret;
1760 len = 0;
1761
Daniel Veillardbb284f42002-10-16 18:02:47 +00001762 /*
1763 * Check that the first character is proper to start
1764 * a new name
1765 */
1766 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1767 ((c >= 0x41) && (c <= 0x5A)) ||
1768 (c == '_') || (c == ':'))) {
1769 int l;
1770 int first = CUR_SCHAR(cur, l);
1771
1772 if (!IS_LETTER(first) && (first != '_')) {
1773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1774 ctxt->sax->error(ctxt->userData,
1775 "Name %s is not XML Namespace compliant\n",
1776 name);
1777 }
1778 }
1779 cur++;
1780
Owen Taylor3473f882001-02-23 17:55:21 +00001781 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1782 buf[len++] = c;
1783 c = *cur++;
1784 }
1785 if (len >= max) {
1786 /*
1787 * Okay someone managed to make a huge name, so he's ready to pay
1788 * for the processing speed.
1789 */
1790 max = len * 2;
1791
1792 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1793 if (buffer == NULL) {
1794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1795 ctxt->sax->error(ctxt->userData,
1796 "xmlSplitQName: out of memory\n");
1797 return(NULL);
1798 }
1799 memcpy(buffer, buf, len);
1800 while (c != 0) { /* tested bigname2.xml */
1801 if (len + 10 > max) {
1802 max *= 2;
1803 buffer = (xmlChar *) xmlRealloc(buffer,
1804 max * sizeof(xmlChar));
1805 if (buffer == NULL) {
1806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1807 ctxt->sax->error(ctxt->userData,
1808 "xmlSplitQName: out of memory\n");
1809 return(NULL);
1810 }
1811 }
1812 buffer[len++] = c;
1813 c = *cur++;
1814 }
1815 buffer[len] = 0;
1816 }
1817
1818 if (buffer == NULL)
1819 ret = xmlStrndup(buf, len);
1820 else {
1821 ret = buffer;
1822 }
1823 }
1824
1825 return(ret);
1826}
1827
1828/************************************************************************
1829 * *
1830 * The parser itself *
1831 * Relates to http://www.w3.org/TR/REC-xml *
1832 * *
1833 ************************************************************************/
1834
Daniel Veillard76d66f42001-05-16 21:05:17 +00001835static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001836/**
1837 * xmlParseName:
1838 * @ctxt: an XML parser context
1839 *
1840 * parse an XML name.
1841 *
1842 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1843 * CombiningChar | Extender
1844 *
1845 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1846 *
1847 * [6] Names ::= Name (S Name)*
1848 *
1849 * Returns the Name parsed or NULL
1850 */
1851
1852xmlChar *
1853xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001854 const xmlChar *in;
1855 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001856 int count = 0;
1857
1858 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001859
1860 /*
1861 * Accelerator for simple ASCII names
1862 */
1863 in = ctxt->input->cur;
1864 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1865 ((*in >= 0x41) && (*in <= 0x5A)) ||
1866 (*in == '_') || (*in == ':')) {
1867 in++;
1868 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1869 ((*in >= 0x41) && (*in <= 0x5A)) ||
1870 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001871 (*in == '_') || (*in == '-') ||
1872 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001873 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001874 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001875 count = in - ctxt->input->cur;
1876 ret = xmlStrndup(ctxt->input->cur, count);
1877 ctxt->input->cur = in;
1878 return(ret);
1879 }
1880 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001881 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001882}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001883
Daniel Veillard46de64e2002-05-29 08:21:33 +00001884/**
1885 * xmlParseNameAndCompare:
1886 * @ctxt: an XML parser context
1887 *
1888 * parse an XML name and compares for match
1889 * (specialized for endtag parsing)
1890 *
1891 *
1892 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1893 * and the name for mismatch
1894 */
1895
Daniel Veillardf4862f02002-09-10 11:13:43 +00001896static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001897xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1898 const xmlChar *cmp = other;
1899 const xmlChar *in;
1900 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001901
1902 GROW;
1903
1904 in = ctxt->input->cur;
1905 while (*in != 0 && *in == *cmp) {
1906 ++in;
1907 ++cmp;
1908 }
1909 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1910 /* success */
1911 ctxt->input->cur = in;
1912 return (xmlChar*) 1;
1913 }
1914 /* failure (or end of input buffer), check with full function */
1915 ret = xmlParseName (ctxt);
1916 if (ret != 0 && xmlStrEqual (ret, other)) {
1917 xmlFree (ret);
1918 return (xmlChar*) 1;
1919 }
1920 return ret;
1921}
1922
Daniel Veillard76d66f42001-05-16 21:05:17 +00001923static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001924xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1925 xmlChar buf[XML_MAX_NAMELEN + 5];
1926 int len = 0, l;
1927 int c;
1928 int count = 0;
1929
1930 /*
1931 * Handler for more complex cases
1932 */
1933 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001934 c = CUR_CHAR(l);
1935 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1936 (!IS_LETTER(c) && (c != '_') &&
1937 (c != ':'))) {
1938 return(NULL);
1939 }
1940
1941 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1942 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1943 (c == '.') || (c == '-') ||
1944 (c == '_') || (c == ':') ||
1945 (IS_COMBINING(c)) ||
1946 (IS_EXTENDER(c)))) {
1947 if (count++ > 100) {
1948 count = 0;
1949 GROW;
1950 }
1951 COPY_BUF(l,buf,len,c);
1952 NEXTL(l);
1953 c = CUR_CHAR(l);
1954 if (len >= XML_MAX_NAMELEN) {
1955 /*
1956 * Okay someone managed to make a huge name, so he's ready to pay
1957 * for the processing speed.
1958 */
1959 xmlChar *buffer;
1960 int max = len * 2;
1961
1962 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1963 if (buffer == NULL) {
1964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1965 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001966 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001967 return(NULL);
1968 }
1969 memcpy(buffer, buf, len);
1970 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1971 (c == '.') || (c == '-') ||
1972 (c == '_') || (c == ':') ||
1973 (IS_COMBINING(c)) ||
1974 (IS_EXTENDER(c))) {
1975 if (count++ > 100) {
1976 count = 0;
1977 GROW;
1978 }
1979 if (len + 10 > max) {
1980 max *= 2;
1981 buffer = (xmlChar *) xmlRealloc(buffer,
1982 max * sizeof(xmlChar));
1983 if (buffer == NULL) {
1984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1985 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001986 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001987 return(NULL);
1988 }
1989 }
1990 COPY_BUF(l,buffer,len,c);
1991 NEXTL(l);
1992 c = CUR_CHAR(l);
1993 }
1994 buffer[len] = 0;
1995 return(buffer);
1996 }
1997 }
1998 return(xmlStrndup(buf, len));
1999}
2000
2001/**
2002 * xmlParseStringName:
2003 * @ctxt: an XML parser context
2004 * @str: a pointer to the string pointer (IN/OUT)
2005 *
2006 * parse an XML name.
2007 *
2008 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2009 * CombiningChar | Extender
2010 *
2011 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2012 *
2013 * [6] Names ::= Name (S Name)*
2014 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002015 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002016 * is updated to the current location in the string.
2017 */
2018
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002019static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002020xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2021 xmlChar buf[XML_MAX_NAMELEN + 5];
2022 const xmlChar *cur = *str;
2023 int len = 0, l;
2024 int c;
2025
2026 c = CUR_SCHAR(cur, l);
2027 if (!IS_LETTER(c) && (c != '_') &&
2028 (c != ':')) {
2029 return(NULL);
2030 }
2031
2032 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2033 (c == '.') || (c == '-') ||
2034 (c == '_') || (c == ':') ||
2035 (IS_COMBINING(c)) ||
2036 (IS_EXTENDER(c))) {
2037 COPY_BUF(l,buf,len,c);
2038 cur += l;
2039 c = CUR_SCHAR(cur, l);
2040 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2041 /*
2042 * Okay someone managed to make a huge name, so he's ready to pay
2043 * for the processing speed.
2044 */
2045 xmlChar *buffer;
2046 int max = len * 2;
2047
2048 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2049 if (buffer == NULL) {
2050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2051 ctxt->sax->error(ctxt->userData,
2052 "xmlParseStringName: out of memory\n");
2053 return(NULL);
2054 }
2055 memcpy(buffer, buf, len);
2056 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2057 (c == '.') || (c == '-') ||
2058 (c == '_') || (c == ':') ||
2059 (IS_COMBINING(c)) ||
2060 (IS_EXTENDER(c))) {
2061 if (len + 10 > max) {
2062 max *= 2;
2063 buffer = (xmlChar *) xmlRealloc(buffer,
2064 max * sizeof(xmlChar));
2065 if (buffer == NULL) {
2066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2067 ctxt->sax->error(ctxt->userData,
2068 "xmlParseStringName: out of memory\n");
2069 return(NULL);
2070 }
2071 }
2072 COPY_BUF(l,buffer,len,c);
2073 cur += l;
2074 c = CUR_SCHAR(cur, l);
2075 }
2076 buffer[len] = 0;
2077 *str = cur;
2078 return(buffer);
2079 }
2080 }
2081 *str = cur;
2082 return(xmlStrndup(buf, len));
2083}
2084
2085/**
2086 * xmlParseNmtoken:
2087 * @ctxt: an XML parser context
2088 *
2089 * parse an XML Nmtoken.
2090 *
2091 * [7] Nmtoken ::= (NameChar)+
2092 *
2093 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2094 *
2095 * Returns the Nmtoken parsed or NULL
2096 */
2097
2098xmlChar *
2099xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2100 xmlChar buf[XML_MAX_NAMELEN + 5];
2101 int len = 0, l;
2102 int c;
2103 int count = 0;
2104
2105 GROW;
2106 c = CUR_CHAR(l);
2107
2108 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2109 (c == '.') || (c == '-') ||
2110 (c == '_') || (c == ':') ||
2111 (IS_COMBINING(c)) ||
2112 (IS_EXTENDER(c))) {
2113 if (count++ > 100) {
2114 count = 0;
2115 GROW;
2116 }
2117 COPY_BUF(l,buf,len,c);
2118 NEXTL(l);
2119 c = CUR_CHAR(l);
2120 if (len >= XML_MAX_NAMELEN) {
2121 /*
2122 * Okay someone managed to make a huge token, so he's ready to pay
2123 * for the processing speed.
2124 */
2125 xmlChar *buffer;
2126 int max = len * 2;
2127
2128 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2129 if (buffer == NULL) {
2130 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2131 ctxt->sax->error(ctxt->userData,
2132 "xmlParseNmtoken: out of memory\n");
2133 return(NULL);
2134 }
2135 memcpy(buffer, buf, len);
2136 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2137 (c == '.') || (c == '-') ||
2138 (c == '_') || (c == ':') ||
2139 (IS_COMBINING(c)) ||
2140 (IS_EXTENDER(c))) {
2141 if (count++ > 100) {
2142 count = 0;
2143 GROW;
2144 }
2145 if (len + 10 > max) {
2146 max *= 2;
2147 buffer = (xmlChar *) xmlRealloc(buffer,
2148 max * sizeof(xmlChar));
2149 if (buffer == NULL) {
2150 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2151 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002152 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002153 return(NULL);
2154 }
2155 }
2156 COPY_BUF(l,buffer,len,c);
2157 NEXTL(l);
2158 c = CUR_CHAR(l);
2159 }
2160 buffer[len] = 0;
2161 return(buffer);
2162 }
2163 }
2164 if (len == 0)
2165 return(NULL);
2166 return(xmlStrndup(buf, len));
2167}
2168
2169/**
2170 * xmlParseEntityValue:
2171 * @ctxt: an XML parser context
2172 * @orig: if non-NULL store a copy of the original entity value
2173 *
2174 * parse a value for ENTITY declarations
2175 *
2176 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2177 * "'" ([^%&'] | PEReference | Reference)* "'"
2178 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002179 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002180 */
2181
2182xmlChar *
2183xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2184 xmlChar *buf = NULL;
2185 int len = 0;
2186 int size = XML_PARSER_BUFFER_SIZE;
2187 int c, l;
2188 xmlChar stop;
2189 xmlChar *ret = NULL;
2190 const xmlChar *cur = NULL;
2191 xmlParserInputPtr input;
2192
2193 if (RAW == '"') stop = '"';
2194 else if (RAW == '\'') stop = '\'';
2195 else {
2196 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2198 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2199 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002200 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002201 return(NULL);
2202 }
2203 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2204 if (buf == NULL) {
2205 xmlGenericError(xmlGenericErrorContext,
2206 "malloc of %d byte failed\n", size);
2207 return(NULL);
2208 }
2209
2210 /*
2211 * The content of the entity definition is copied in a buffer.
2212 */
2213
2214 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2215 input = ctxt->input;
2216 GROW;
2217 NEXT;
2218 c = CUR_CHAR(l);
2219 /*
2220 * NOTE: 4.4.5 Included in Literal
2221 * When a parameter entity reference appears in a literal entity
2222 * value, ... a single or double quote character in the replacement
2223 * text is always treated as a normal data character and will not
2224 * terminate the literal.
2225 * In practice it means we stop the loop only when back at parsing
2226 * the initial entity and the quote is found
2227 */
2228 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2229 (ctxt->input != input))) {
2230 if (len + 5 >= size) {
2231 size *= 2;
2232 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2233 if (buf == NULL) {
2234 xmlGenericError(xmlGenericErrorContext,
2235 "realloc of %d byte failed\n", size);
2236 return(NULL);
2237 }
2238 }
2239 COPY_BUF(l,buf,len,c);
2240 NEXTL(l);
2241 /*
2242 * Pop-up of finished entities.
2243 */
2244 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2245 xmlPopInput(ctxt);
2246
2247 GROW;
2248 c = CUR_CHAR(l);
2249 if (c == 0) {
2250 GROW;
2251 c = CUR_CHAR(l);
2252 }
2253 }
2254 buf[len] = 0;
2255
2256 /*
2257 * Raise problem w.r.t. '&' and '%' being used in non-entities
2258 * reference constructs. Note Charref will be handled in
2259 * xmlStringDecodeEntities()
2260 */
2261 cur = buf;
2262 while (*cur != 0) { /* non input consuming */
2263 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2264 xmlChar *name;
2265 xmlChar tmp = *cur;
2266
2267 cur++;
2268 name = xmlParseStringName(ctxt, &cur);
2269 if ((name == NULL) || (*cur != ';')) {
2270 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2272 ctxt->sax->error(ctxt->userData,
2273 "EntityValue: '%c' forbidden except for entities references\n",
2274 tmp);
2275 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002276 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002277 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002278 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2279 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002280 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2281 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2282 ctxt->sax->error(ctxt->userData,
2283 "EntityValue: PEReferences forbidden in internal subset\n",
2284 tmp);
2285 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002286 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002287 }
2288 if (name != NULL)
2289 xmlFree(name);
2290 }
2291 cur++;
2292 }
2293
2294 /*
2295 * Then PEReference entities are substituted.
2296 */
2297 if (c != stop) {
2298 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2300 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2301 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002302 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002303 xmlFree(buf);
2304 } else {
2305 NEXT;
2306 /*
2307 * NOTE: 4.4.7 Bypassed
2308 * When a general entity reference appears in the EntityValue in
2309 * an entity declaration, it is bypassed and left as is.
2310 * so XML_SUBSTITUTE_REF is not set here.
2311 */
2312 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2313 0, 0, 0);
2314 if (orig != NULL)
2315 *orig = buf;
2316 else
2317 xmlFree(buf);
2318 }
2319
2320 return(ret);
2321}
2322
2323/**
2324 * xmlParseAttValue:
2325 * @ctxt: an XML parser context
2326 *
2327 * parse a value for an attribute
2328 * Note: the parser won't do substitution of entities here, this
2329 * will be handled later in xmlStringGetNodeList
2330 *
2331 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2332 * "'" ([^<&'] | Reference)* "'"
2333 *
2334 * 3.3.3 Attribute-Value Normalization:
2335 * Before the value of an attribute is passed to the application or
2336 * checked for validity, the XML processor must normalize it as follows:
2337 * - a character reference is processed by appending the referenced
2338 * character to the attribute value
2339 * - an entity reference is processed by recursively processing the
2340 * replacement text of the entity
2341 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2342 * appending #x20 to the normalized value, except that only a single
2343 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2344 * parsed entity or the literal entity value of an internal parsed entity
2345 * - other characters are processed by appending them to the normalized value
2346 * If the declared value is not CDATA, then the XML processor must further
2347 * process the normalized attribute value by discarding any leading and
2348 * trailing space (#x20) characters, and by replacing sequences of space
2349 * (#x20) characters by a single space (#x20) character.
2350 * All attributes for which no declaration has been read should be treated
2351 * by a non-validating parser as if declared CDATA.
2352 *
2353 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2354 */
2355
2356xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002357xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2358
2359xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002360xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2361 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002362 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002363 xmlChar *ret = NULL;
2364 SHRINK;
2365 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002366 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002367 if (*in != '"' && *in != '\'') {
2368 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2370 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2371 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002372 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002373 return(NULL);
2374 }
2375 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2376 limit = *in;
2377 ++in;
2378
2379 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2380 *in != '&' && *in != '<'
2381 ) {
2382 ++in;
2383 }
2384 if (*in != limit) {
2385 return xmlParseAttValueComplex(ctxt);
2386 }
2387 ++in;
2388 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2389 CUR_PTR = in;
2390 return ret;
2391}
2392
2393xmlChar *
2394xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2395 xmlChar limit = 0;
2396 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002397 int len = 0;
2398 int buf_size = 0;
2399 int c, l;
2400 xmlChar *current = NULL;
2401 xmlEntityPtr ent;
2402
2403
2404 SHRINK;
2405 if (NXT(0) == '"') {
2406 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2407 limit = '"';
2408 NEXT;
2409 } else if (NXT(0) == '\'') {
2410 limit = '\'';
2411 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2412 NEXT;
2413 } else {
2414 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2416 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2417 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002418 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002419 return(NULL);
2420 }
2421
2422 /*
2423 * allocate a translation buffer.
2424 */
2425 buf_size = XML_PARSER_BUFFER_SIZE;
2426 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2427 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002428 xmlGenericError(xmlGenericErrorContext,
2429 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002430 return(NULL);
2431 }
2432
2433 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002434 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002435 */
2436 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002437 while ((NXT(0) != limit) && /* checked */
2438 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002439 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002440 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002441 if (NXT(1) == '#') {
2442 int val = xmlParseCharRef(ctxt);
2443 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002444 if (ctxt->replaceEntities) {
2445 if (len > buf_size - 10) {
2446 growBuffer(buf);
2447 }
2448 buf[len++] = '&';
2449 } else {
2450 /*
2451 * The reparsing will be done in xmlStringGetNodeList()
2452 * called by the attribute() function in SAX.c
2453 */
2454 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002455
Daniel Veillard319a7422001-09-11 09:27:09 +00002456 if (len > buf_size - 10) {
2457 growBuffer(buf);
2458 }
2459 current = &buffer[0];
2460 while (*current != 0) { /* non input consuming */
2461 buf[len++] = *current++;
2462 }
Owen Taylor3473f882001-02-23 17:55:21 +00002463 }
2464 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002465 if (len > buf_size - 10) {
2466 growBuffer(buf);
2467 }
Owen Taylor3473f882001-02-23 17:55:21 +00002468 len += xmlCopyChar(0, &buf[len], val);
2469 }
2470 } else {
2471 ent = xmlParseEntityRef(ctxt);
2472 if ((ent != NULL) &&
2473 (ctxt->replaceEntities != 0)) {
2474 xmlChar *rep;
2475
2476 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2477 rep = xmlStringDecodeEntities(ctxt, ent->content,
2478 XML_SUBSTITUTE_REF, 0, 0, 0);
2479 if (rep != NULL) {
2480 current = rep;
2481 while (*current != 0) { /* non input consuming */
2482 buf[len++] = *current++;
2483 if (len > buf_size - 10) {
2484 growBuffer(buf);
2485 }
2486 }
2487 xmlFree(rep);
2488 }
2489 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002490 if (len > buf_size - 10) {
2491 growBuffer(buf);
2492 }
Owen Taylor3473f882001-02-23 17:55:21 +00002493 if (ent->content != NULL)
2494 buf[len++] = ent->content[0];
2495 }
2496 } else if (ent != NULL) {
2497 int i = xmlStrlen(ent->name);
2498 const xmlChar *cur = ent->name;
2499
2500 /*
2501 * This may look absurd but is needed to detect
2502 * entities problems
2503 */
2504 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2505 (ent->content != NULL)) {
2506 xmlChar *rep;
2507 rep = xmlStringDecodeEntities(ctxt, ent->content,
2508 XML_SUBSTITUTE_REF, 0, 0, 0);
2509 if (rep != NULL)
2510 xmlFree(rep);
2511 }
2512
2513 /*
2514 * Just output the reference
2515 */
2516 buf[len++] = '&';
2517 if (len > buf_size - i - 10) {
2518 growBuffer(buf);
2519 }
2520 for (;i > 0;i--)
2521 buf[len++] = *cur++;
2522 buf[len++] = ';';
2523 }
2524 }
2525 } else {
2526 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2527 COPY_BUF(l,buf,len,0x20);
2528 if (len > buf_size - 10) {
2529 growBuffer(buf);
2530 }
2531 } else {
2532 COPY_BUF(l,buf,len,c);
2533 if (len > buf_size - 10) {
2534 growBuffer(buf);
2535 }
2536 }
2537 NEXTL(l);
2538 }
2539 GROW;
2540 c = CUR_CHAR(l);
2541 }
2542 buf[len++] = 0;
2543 if (RAW == '<') {
2544 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2546 ctxt->sax->error(ctxt->userData,
2547 "Unescaped '<' not allowed in attributes values\n");
2548 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002549 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002550 } else if (RAW != limit) {
2551 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2553 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2554 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002555 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002556 } else
2557 NEXT;
2558 return(buf);
2559}
2560
2561/**
2562 * xmlParseSystemLiteral:
2563 * @ctxt: an XML parser context
2564 *
2565 * parse an XML Literal
2566 *
2567 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2568 *
2569 * Returns the SystemLiteral parsed or NULL
2570 */
2571
2572xmlChar *
2573xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2574 xmlChar *buf = NULL;
2575 int len = 0;
2576 int size = XML_PARSER_BUFFER_SIZE;
2577 int cur, l;
2578 xmlChar stop;
2579 int state = ctxt->instate;
2580 int count = 0;
2581
2582 SHRINK;
2583 if (RAW == '"') {
2584 NEXT;
2585 stop = '"';
2586 } else if (RAW == '\'') {
2587 NEXT;
2588 stop = '\'';
2589 } else {
2590 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2592 ctxt->sax->error(ctxt->userData,
2593 "SystemLiteral \" or ' expected\n");
2594 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002595 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002596 return(NULL);
2597 }
2598
2599 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2600 if (buf == NULL) {
2601 xmlGenericError(xmlGenericErrorContext,
2602 "malloc of %d byte failed\n", size);
2603 return(NULL);
2604 }
2605 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2606 cur = CUR_CHAR(l);
2607 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2608 if (len + 5 >= size) {
2609 size *= 2;
2610 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2611 if (buf == NULL) {
2612 xmlGenericError(xmlGenericErrorContext,
2613 "realloc of %d byte failed\n", size);
2614 ctxt->instate = (xmlParserInputState) state;
2615 return(NULL);
2616 }
2617 }
2618 count++;
2619 if (count > 50) {
2620 GROW;
2621 count = 0;
2622 }
2623 COPY_BUF(l,buf,len,cur);
2624 NEXTL(l);
2625 cur = CUR_CHAR(l);
2626 if (cur == 0) {
2627 GROW;
2628 SHRINK;
2629 cur = CUR_CHAR(l);
2630 }
2631 }
2632 buf[len] = 0;
2633 ctxt->instate = (xmlParserInputState) state;
2634 if (!IS_CHAR(cur)) {
2635 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2637 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2638 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002639 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002640 } else {
2641 NEXT;
2642 }
2643 return(buf);
2644}
2645
2646/**
2647 * xmlParsePubidLiteral:
2648 * @ctxt: an XML parser context
2649 *
2650 * parse an XML public literal
2651 *
2652 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2653 *
2654 * Returns the PubidLiteral parsed or NULL.
2655 */
2656
2657xmlChar *
2658xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2659 xmlChar *buf = NULL;
2660 int len = 0;
2661 int size = XML_PARSER_BUFFER_SIZE;
2662 xmlChar cur;
2663 xmlChar stop;
2664 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002665 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002666
2667 SHRINK;
2668 if (RAW == '"') {
2669 NEXT;
2670 stop = '"';
2671 } else if (RAW == '\'') {
2672 NEXT;
2673 stop = '\'';
2674 } else {
2675 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2677 ctxt->sax->error(ctxt->userData,
2678 "SystemLiteral \" or ' expected\n");
2679 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002680 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002681 return(NULL);
2682 }
2683 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2684 if (buf == NULL) {
2685 xmlGenericError(xmlGenericErrorContext,
2686 "malloc of %d byte failed\n", size);
2687 return(NULL);
2688 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002689 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002690 cur = CUR;
2691 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2692 if (len + 1 >= size) {
2693 size *= 2;
2694 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2695 if (buf == NULL) {
2696 xmlGenericError(xmlGenericErrorContext,
2697 "realloc of %d byte failed\n", size);
2698 return(NULL);
2699 }
2700 }
2701 buf[len++] = cur;
2702 count++;
2703 if (count > 50) {
2704 GROW;
2705 count = 0;
2706 }
2707 NEXT;
2708 cur = CUR;
2709 if (cur == 0) {
2710 GROW;
2711 SHRINK;
2712 cur = CUR;
2713 }
2714 }
2715 buf[len] = 0;
2716 if (cur != stop) {
2717 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2718 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2719 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2720 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002721 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002722 } else {
2723 NEXT;
2724 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002725 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002726 return(buf);
2727}
2728
Daniel Veillard48b2f892001-02-25 16:11:03 +00002729void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002730/**
2731 * xmlParseCharData:
2732 * @ctxt: an XML parser context
2733 * @cdata: int indicating whether we are within a CDATA section
2734 *
2735 * parse a CharData section.
2736 * if we are within a CDATA section ']]>' marks an end of section.
2737 *
2738 * The right angle bracket (>) may be represented using the string "&gt;",
2739 * and must, for compatibility, be escaped using "&gt;" or a character
2740 * reference when it appears in the string "]]>" in content, when that
2741 * string is not marking the end of a CDATA section.
2742 *
2743 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2744 */
2745
2746void
2747xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002748 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002749 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002750 int line = ctxt->input->line;
2751 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002752
2753 SHRINK;
2754 GROW;
2755 /*
2756 * Accelerated common case where input don't need to be
2757 * modified before passing it to the handler.
2758 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002759 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002760 in = ctxt->input->cur;
2761 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002762get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002763 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2764 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002765 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002766 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002767 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002768 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002769 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002770 ctxt->input->line++;
2771 in++;
2772 }
2773 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002774 }
2775 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002776 if ((in[1] == ']') && (in[2] == '>')) {
2777 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2779 ctxt->sax->error(ctxt->userData,
2780 "Sequence ']]>' not allowed in content\n");
2781 ctxt->input->cur = in;
2782 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002783 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002784 return;
2785 }
2786 in++;
2787 goto get_more;
2788 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002789 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002790 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002791 if (IS_BLANK(*ctxt->input->cur)) {
2792 const xmlChar *tmp = ctxt->input->cur;
2793 ctxt->input->cur = in;
2794 if (areBlanks(ctxt, tmp, nbchar)) {
2795 if (ctxt->sax->ignorableWhitespace != NULL)
2796 ctxt->sax->ignorableWhitespace(ctxt->userData,
2797 tmp, nbchar);
2798 } else {
2799 if (ctxt->sax->characters != NULL)
2800 ctxt->sax->characters(ctxt->userData,
2801 tmp, nbchar);
2802 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002803 line = ctxt->input->line;
2804 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002805 } else {
2806 if (ctxt->sax->characters != NULL)
2807 ctxt->sax->characters(ctxt->userData,
2808 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002809 line = ctxt->input->line;
2810 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002811 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002812 }
2813 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002814 if (*in == 0xD) {
2815 in++;
2816 if (*in == 0xA) {
2817 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002818 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002819 ctxt->input->line++;
2820 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002821 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002822 in--;
2823 }
2824 if (*in == '<') {
2825 return;
2826 }
2827 if (*in == '&') {
2828 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002829 }
2830 SHRINK;
2831 GROW;
2832 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002833 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002834 nbchar = 0;
2835 }
Daniel Veillard50582112001-03-26 22:52:16 +00002836 ctxt->input->line = line;
2837 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002838 xmlParseCharDataComplex(ctxt, cdata);
2839}
2840
2841void
2842xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002843 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2844 int nbchar = 0;
2845 int cur, l;
2846 int count = 0;
2847
2848 SHRINK;
2849 GROW;
2850 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002851 while ((cur != '<') && /* checked */
2852 (cur != '&') &&
2853 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002854 if ((cur == ']') && (NXT(1) == ']') &&
2855 (NXT(2) == '>')) {
2856 if (cdata) break;
2857 else {
2858 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2860 ctxt->sax->error(ctxt->userData,
2861 "Sequence ']]>' not allowed in content\n");
2862 /* Should this be relaxed ??? I see a "must here */
2863 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002864 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002865 }
2866 }
2867 COPY_BUF(l,buf,nbchar,cur);
2868 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2869 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002870 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002871 */
2872 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2873 if (areBlanks(ctxt, buf, nbchar)) {
2874 if (ctxt->sax->ignorableWhitespace != NULL)
2875 ctxt->sax->ignorableWhitespace(ctxt->userData,
2876 buf, nbchar);
2877 } else {
2878 if (ctxt->sax->characters != NULL)
2879 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2880 }
2881 }
2882 nbchar = 0;
2883 }
2884 count++;
2885 if (count > 50) {
2886 GROW;
2887 count = 0;
2888 }
2889 NEXTL(l);
2890 cur = CUR_CHAR(l);
2891 }
2892 if (nbchar != 0) {
2893 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002894 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002895 */
2896 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2897 if (areBlanks(ctxt, buf, nbchar)) {
2898 if (ctxt->sax->ignorableWhitespace != NULL)
2899 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2900 } else {
2901 if (ctxt->sax->characters != NULL)
2902 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2903 }
2904 }
2905 }
2906}
2907
2908/**
2909 * xmlParseExternalID:
2910 * @ctxt: an XML parser context
2911 * @publicID: a xmlChar** receiving PubidLiteral
2912 * @strict: indicate whether we should restrict parsing to only
2913 * production [75], see NOTE below
2914 *
2915 * Parse an External ID or a Public ID
2916 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002917 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002918 * 'PUBLIC' S PubidLiteral S SystemLiteral
2919 *
2920 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2921 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2922 *
2923 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2924 *
2925 * Returns the function returns SystemLiteral and in the second
2926 * case publicID receives PubidLiteral, is strict is off
2927 * it is possible to return NULL and have publicID set.
2928 */
2929
2930xmlChar *
2931xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2932 xmlChar *URI = NULL;
2933
2934 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002935
2936 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002937 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2938 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2939 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2940 SKIP(6);
2941 if (!IS_BLANK(CUR)) {
2942 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2944 ctxt->sax->error(ctxt->userData,
2945 "Space required after 'SYSTEM'\n");
2946 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002947 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002948 }
2949 SKIP_BLANKS;
2950 URI = xmlParseSystemLiteral(ctxt);
2951 if (URI == NULL) {
2952 ctxt->errNo = XML_ERR_URI_REQUIRED;
2953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2954 ctxt->sax->error(ctxt->userData,
2955 "xmlParseExternalID: SYSTEM, no URI\n");
2956 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002957 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002958 }
2959 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2960 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2961 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2962 SKIP(6);
2963 if (!IS_BLANK(CUR)) {
2964 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2966 ctxt->sax->error(ctxt->userData,
2967 "Space required after 'PUBLIC'\n");
2968 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002969 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002970 }
2971 SKIP_BLANKS;
2972 *publicID = xmlParsePubidLiteral(ctxt);
2973 if (*publicID == NULL) {
2974 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2976 ctxt->sax->error(ctxt->userData,
2977 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2978 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002979 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002980 }
2981 if (strict) {
2982 /*
2983 * We don't handle [83] so "S SystemLiteral" is required.
2984 */
2985 if (!IS_BLANK(CUR)) {
2986 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2988 ctxt->sax->error(ctxt->userData,
2989 "Space required after the Public Identifier\n");
2990 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002991 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002992 }
2993 } else {
2994 /*
2995 * We handle [83] so we return immediately, if
2996 * "S SystemLiteral" is not detected. From a purely parsing
2997 * point of view that's a nice mess.
2998 */
2999 const xmlChar *ptr;
3000 GROW;
3001
3002 ptr = CUR_PTR;
3003 if (!IS_BLANK(*ptr)) return(NULL);
3004
3005 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3006 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3007 }
3008 SKIP_BLANKS;
3009 URI = xmlParseSystemLiteral(ctxt);
3010 if (URI == NULL) {
3011 ctxt->errNo = XML_ERR_URI_REQUIRED;
3012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3013 ctxt->sax->error(ctxt->userData,
3014 "xmlParseExternalID: PUBLIC, no URI\n");
3015 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003016 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003017 }
3018 }
3019 return(URI);
3020}
3021
3022/**
3023 * xmlParseComment:
3024 * @ctxt: an XML parser context
3025 *
3026 * Skip an XML (SGML) comment <!-- .... -->
3027 * The spec says that "For compatibility, the string "--" (double-hyphen)
3028 * must not occur within comments. "
3029 *
3030 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3031 */
3032void
3033xmlParseComment(xmlParserCtxtPtr ctxt) {
3034 xmlChar *buf = NULL;
3035 int len;
3036 int size = XML_PARSER_BUFFER_SIZE;
3037 int q, ql;
3038 int r, rl;
3039 int cur, l;
3040 xmlParserInputState state;
3041 xmlParserInputPtr input = ctxt->input;
3042 int count = 0;
3043
3044 /*
3045 * Check that there is a comment right here.
3046 */
3047 if ((RAW != '<') || (NXT(1) != '!') ||
3048 (NXT(2) != '-') || (NXT(3) != '-')) return;
3049
3050 state = ctxt->instate;
3051 ctxt->instate = XML_PARSER_COMMENT;
3052 SHRINK;
3053 SKIP(4);
3054 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3055 if (buf == NULL) {
3056 xmlGenericError(xmlGenericErrorContext,
3057 "malloc of %d byte failed\n", size);
3058 ctxt->instate = state;
3059 return;
3060 }
3061 q = CUR_CHAR(ql);
3062 NEXTL(ql);
3063 r = CUR_CHAR(rl);
3064 NEXTL(rl);
3065 cur = CUR_CHAR(l);
3066 len = 0;
3067 while (IS_CHAR(cur) && /* checked */
3068 ((cur != '>') ||
3069 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003070 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003071 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3073 ctxt->sax->error(ctxt->userData,
3074 "Comment must not contain '--' (double-hyphen)`\n");
3075 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003076 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003077 }
3078 if (len + 5 >= size) {
3079 size *= 2;
3080 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3081 if (buf == NULL) {
3082 xmlGenericError(xmlGenericErrorContext,
3083 "realloc of %d byte failed\n", size);
3084 ctxt->instate = state;
3085 return;
3086 }
3087 }
3088 COPY_BUF(ql,buf,len,q);
3089 q = r;
3090 ql = rl;
3091 r = cur;
3092 rl = l;
3093
3094 count++;
3095 if (count > 50) {
3096 GROW;
3097 count = 0;
3098 }
3099 NEXTL(l);
3100 cur = CUR_CHAR(l);
3101 if (cur == 0) {
3102 SHRINK;
3103 GROW;
3104 cur = CUR_CHAR(l);
3105 }
3106 }
3107 buf[len] = 0;
3108 if (!IS_CHAR(cur)) {
3109 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3110 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3111 ctxt->sax->error(ctxt->userData,
3112 "Comment not terminated \n<!--%.50s\n", buf);
3113 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003114 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003115 xmlFree(buf);
3116 } else {
3117 if (input != ctxt->input) {
3118 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3119 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3120 ctxt->sax->error(ctxt->userData,
3121"Comment doesn't start and stop in the same entity\n");
3122 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003123 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003124 }
3125 NEXT;
3126 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3127 (!ctxt->disableSAX))
3128 ctxt->sax->comment(ctxt->userData, buf);
3129 xmlFree(buf);
3130 }
3131 ctxt->instate = state;
3132}
3133
3134/**
3135 * xmlParsePITarget:
3136 * @ctxt: an XML parser context
3137 *
3138 * parse the name of a PI
3139 *
3140 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3141 *
3142 * Returns the PITarget name or NULL
3143 */
3144
3145xmlChar *
3146xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3147 xmlChar *name;
3148
3149 name = xmlParseName(ctxt);
3150 if ((name != NULL) &&
3151 ((name[0] == 'x') || (name[0] == 'X')) &&
3152 ((name[1] == 'm') || (name[1] == 'M')) &&
3153 ((name[2] == 'l') || (name[2] == 'L'))) {
3154 int i;
3155 if ((name[0] == 'x') && (name[1] == 'm') &&
3156 (name[2] == 'l') && (name[3] == 0)) {
3157 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3158 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3159 ctxt->sax->error(ctxt->userData,
3160 "XML declaration allowed only at the start of the document\n");
3161 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003162 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003163 return(name);
3164 } else if (name[3] == 0) {
3165 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3167 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3168 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003169 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003170 return(name);
3171 }
3172 for (i = 0;;i++) {
3173 if (xmlW3CPIs[i] == NULL) break;
3174 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3175 return(name);
3176 }
3177 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3178 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3179 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003180 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003181 }
3182 }
3183 return(name);
3184}
3185
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003186#ifdef LIBXML_CATALOG_ENABLED
3187/**
3188 * xmlParseCatalogPI:
3189 * @ctxt: an XML parser context
3190 * @catalog: the PI value string
3191 *
3192 * parse an XML Catalog Processing Instruction.
3193 *
3194 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3195 *
3196 * Occurs only if allowed by the user and if happening in the Misc
3197 * part of the document before any doctype informations
3198 * This will add the given catalog to the parsing context in order
3199 * to be used if there is a resolution need further down in the document
3200 */
3201
3202static void
3203xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3204 xmlChar *URL = NULL;
3205 const xmlChar *tmp, *base;
3206 xmlChar marker;
3207
3208 tmp = catalog;
3209 while (IS_BLANK(*tmp)) tmp++;
3210 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3211 goto error;
3212 tmp += 7;
3213 while (IS_BLANK(*tmp)) tmp++;
3214 if (*tmp != '=') {
3215 return;
3216 }
3217 tmp++;
3218 while (IS_BLANK(*tmp)) tmp++;
3219 marker = *tmp;
3220 if ((marker != '\'') && (marker != '"'))
3221 goto error;
3222 tmp++;
3223 base = tmp;
3224 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3225 if (*tmp == 0)
3226 goto error;
3227 URL = xmlStrndup(base, tmp - base);
3228 tmp++;
3229 while (IS_BLANK(*tmp)) tmp++;
3230 if (*tmp != 0)
3231 goto error;
3232
3233 if (URL != NULL) {
3234 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3235 xmlFree(URL);
3236 }
3237 return;
3238
3239error:
3240 ctxt->errNo = XML_WAR_CATALOG_PI;
3241 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3242 ctxt->sax->warning(ctxt->userData,
3243 "Catalog PI syntax error: %s\n", catalog);
3244 if (URL != NULL)
3245 xmlFree(URL);
3246}
3247#endif
3248
Owen Taylor3473f882001-02-23 17:55:21 +00003249/**
3250 * xmlParsePI:
3251 * @ctxt: an XML parser context
3252 *
3253 * parse an XML Processing Instruction.
3254 *
3255 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3256 *
3257 * The processing is transfered to SAX once parsed.
3258 */
3259
3260void
3261xmlParsePI(xmlParserCtxtPtr ctxt) {
3262 xmlChar *buf = NULL;
3263 int len = 0;
3264 int size = XML_PARSER_BUFFER_SIZE;
3265 int cur, l;
3266 xmlChar *target;
3267 xmlParserInputState state;
3268 int count = 0;
3269
3270 if ((RAW == '<') && (NXT(1) == '?')) {
3271 xmlParserInputPtr input = ctxt->input;
3272 state = ctxt->instate;
3273 ctxt->instate = XML_PARSER_PI;
3274 /*
3275 * this is a Processing Instruction.
3276 */
3277 SKIP(2);
3278 SHRINK;
3279
3280 /*
3281 * Parse the target name and check for special support like
3282 * namespace.
3283 */
3284 target = xmlParsePITarget(ctxt);
3285 if (target != NULL) {
3286 if ((RAW == '?') && (NXT(1) == '>')) {
3287 if (input != ctxt->input) {
3288 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3289 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3290 ctxt->sax->error(ctxt->userData,
3291 "PI declaration doesn't start and stop in the same entity\n");
3292 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003293 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003294 }
3295 SKIP(2);
3296
3297 /*
3298 * SAX: PI detected.
3299 */
3300 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3301 (ctxt->sax->processingInstruction != NULL))
3302 ctxt->sax->processingInstruction(ctxt->userData,
3303 target, NULL);
3304 ctxt->instate = state;
3305 xmlFree(target);
3306 return;
3307 }
3308 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3309 if (buf == NULL) {
3310 xmlGenericError(xmlGenericErrorContext,
3311 "malloc of %d byte failed\n", size);
3312 ctxt->instate = state;
3313 return;
3314 }
3315 cur = CUR;
3316 if (!IS_BLANK(cur)) {
3317 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3318 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3319 ctxt->sax->error(ctxt->userData,
3320 "xmlParsePI: PI %s space expected\n", target);
3321 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003322 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003323 }
3324 SKIP_BLANKS;
3325 cur = CUR_CHAR(l);
3326 while (IS_CHAR(cur) && /* checked */
3327 ((cur != '?') || (NXT(1) != '>'))) {
3328 if (len + 5 >= size) {
3329 size *= 2;
3330 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3331 if (buf == NULL) {
3332 xmlGenericError(xmlGenericErrorContext,
3333 "realloc of %d byte failed\n", size);
3334 ctxt->instate = state;
3335 return;
3336 }
3337 }
3338 count++;
3339 if (count > 50) {
3340 GROW;
3341 count = 0;
3342 }
3343 COPY_BUF(l,buf,len,cur);
3344 NEXTL(l);
3345 cur = CUR_CHAR(l);
3346 if (cur == 0) {
3347 SHRINK;
3348 GROW;
3349 cur = CUR_CHAR(l);
3350 }
3351 }
3352 buf[len] = 0;
3353 if (cur != '?') {
3354 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3356 ctxt->sax->error(ctxt->userData,
3357 "xmlParsePI: PI %s never end ...\n", target);
3358 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003359 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003360 } else {
3361 if (input != ctxt->input) {
3362 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3364 ctxt->sax->error(ctxt->userData,
3365 "PI declaration doesn't start and stop in the same entity\n");
3366 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003367 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003368 }
3369 SKIP(2);
3370
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003371#ifdef LIBXML_CATALOG_ENABLED
3372 if (((state == XML_PARSER_MISC) ||
3373 (state == XML_PARSER_START)) &&
3374 (xmlStrEqual(target, XML_CATALOG_PI))) {
3375 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3376 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3377 (allow == XML_CATA_ALLOW_ALL))
3378 xmlParseCatalogPI(ctxt, buf);
3379 }
3380#endif
3381
3382
Owen Taylor3473f882001-02-23 17:55:21 +00003383 /*
3384 * SAX: PI detected.
3385 */
3386 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3387 (ctxt->sax->processingInstruction != NULL))
3388 ctxt->sax->processingInstruction(ctxt->userData,
3389 target, buf);
3390 }
3391 xmlFree(buf);
3392 xmlFree(target);
3393 } else {
3394 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3395 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3396 ctxt->sax->error(ctxt->userData,
3397 "xmlParsePI : no target name\n");
3398 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003399 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003400 }
3401 ctxt->instate = state;
3402 }
3403}
3404
3405/**
3406 * xmlParseNotationDecl:
3407 * @ctxt: an XML parser context
3408 *
3409 * parse a notation declaration
3410 *
3411 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3412 *
3413 * Hence there is actually 3 choices:
3414 * 'PUBLIC' S PubidLiteral
3415 * 'PUBLIC' S PubidLiteral S SystemLiteral
3416 * and 'SYSTEM' S SystemLiteral
3417 *
3418 * See the NOTE on xmlParseExternalID().
3419 */
3420
3421void
3422xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3423 xmlChar *name;
3424 xmlChar *Pubid;
3425 xmlChar *Systemid;
3426
3427 if ((RAW == '<') && (NXT(1) == '!') &&
3428 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3429 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3430 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3431 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3432 xmlParserInputPtr input = ctxt->input;
3433 SHRINK;
3434 SKIP(10);
3435 if (!IS_BLANK(CUR)) {
3436 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3437 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3438 ctxt->sax->error(ctxt->userData,
3439 "Space required after '<!NOTATION'\n");
3440 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003441 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003442 return;
3443 }
3444 SKIP_BLANKS;
3445
Daniel Veillard76d66f42001-05-16 21:05:17 +00003446 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003447 if (name == NULL) {
3448 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3450 ctxt->sax->error(ctxt->userData,
3451 "NOTATION: Name expected here\n");
3452 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003453 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003454 return;
3455 }
3456 if (!IS_BLANK(CUR)) {
3457 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3459 ctxt->sax->error(ctxt->userData,
3460 "Space required after the NOTATION name'\n");
3461 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003462 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003463 return;
3464 }
3465 SKIP_BLANKS;
3466
3467 /*
3468 * Parse the IDs.
3469 */
3470 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3471 SKIP_BLANKS;
3472
3473 if (RAW == '>') {
3474 if (input != ctxt->input) {
3475 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3477 ctxt->sax->error(ctxt->userData,
3478"Notation declaration doesn't start and stop in the same entity\n");
3479 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003480 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003481 }
3482 NEXT;
3483 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3484 (ctxt->sax->notationDecl != NULL))
3485 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3486 } else {
3487 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3488 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3489 ctxt->sax->error(ctxt->userData,
3490 "'>' required to close NOTATION declaration\n");
3491 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003492 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003493 }
3494 xmlFree(name);
3495 if (Systemid != NULL) xmlFree(Systemid);
3496 if (Pubid != NULL) xmlFree(Pubid);
3497 }
3498}
3499
3500/**
3501 * xmlParseEntityDecl:
3502 * @ctxt: an XML parser context
3503 *
3504 * parse <!ENTITY declarations
3505 *
3506 * [70] EntityDecl ::= GEDecl | PEDecl
3507 *
3508 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3509 *
3510 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3511 *
3512 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3513 *
3514 * [74] PEDef ::= EntityValue | ExternalID
3515 *
3516 * [76] NDataDecl ::= S 'NDATA' S Name
3517 *
3518 * [ VC: Notation Declared ]
3519 * The Name must match the declared name of a notation.
3520 */
3521
3522void
3523xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3524 xmlChar *name = NULL;
3525 xmlChar *value = NULL;
3526 xmlChar *URI = NULL, *literal = NULL;
3527 xmlChar *ndata = NULL;
3528 int isParameter = 0;
3529 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003530 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003531
3532 GROW;
3533 if ((RAW == '<') && (NXT(1) == '!') &&
3534 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3535 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3536 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3537 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003538 SHRINK;
3539 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003540 skipped = SKIP_BLANKS;
3541 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003542 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3544 ctxt->sax->error(ctxt->userData,
3545 "Space required after '<!ENTITY'\n");
3546 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003547 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003548 }
Owen Taylor3473f882001-02-23 17:55:21 +00003549
3550 if (RAW == '%') {
3551 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003552 skipped = SKIP_BLANKS;
3553 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003554 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3556 ctxt->sax->error(ctxt->userData,
3557 "Space required after '%'\n");
3558 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003559 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003560 }
Owen Taylor3473f882001-02-23 17:55:21 +00003561 isParameter = 1;
3562 }
3563
Daniel Veillard76d66f42001-05-16 21:05:17 +00003564 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003565 if (name == NULL) {
3566 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3568 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3569 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003570 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003571 return;
3572 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003573 skipped = SKIP_BLANKS;
3574 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003575 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3577 ctxt->sax->error(ctxt->userData,
3578 "Space required after the entity name\n");
3579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003581 }
Owen Taylor3473f882001-02-23 17:55:21 +00003582
Daniel Veillardf5582f12002-06-11 10:08:16 +00003583 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003584 /*
3585 * handle the various case of definitions...
3586 */
3587 if (isParameter) {
3588 if ((RAW == '"') || (RAW == '\'')) {
3589 value = xmlParseEntityValue(ctxt, &orig);
3590 if (value) {
3591 if ((ctxt->sax != NULL) &&
3592 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3593 ctxt->sax->entityDecl(ctxt->userData, name,
3594 XML_INTERNAL_PARAMETER_ENTITY,
3595 NULL, NULL, value);
3596 }
3597 } else {
3598 URI = xmlParseExternalID(ctxt, &literal, 1);
3599 if ((URI == NULL) && (literal == NULL)) {
3600 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3602 ctxt->sax->error(ctxt->userData,
3603 "Entity value required\n");
3604 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003605 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003606 }
3607 if (URI) {
3608 xmlURIPtr uri;
3609
3610 uri = xmlParseURI((const char *) URI);
3611 if (uri == NULL) {
3612 ctxt->errNo = XML_ERR_INVALID_URI;
3613 if ((ctxt->sax != NULL) &&
3614 (!ctxt->disableSAX) &&
3615 (ctxt->sax->error != NULL))
3616 ctxt->sax->error(ctxt->userData,
3617 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003618 /*
3619 * This really ought to be a well formedness error
3620 * but the XML Core WG decided otherwise c.f. issue
3621 * E26 of the XML erratas.
3622 */
Owen Taylor3473f882001-02-23 17:55:21 +00003623 } else {
3624 if (uri->fragment != NULL) {
3625 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3626 if ((ctxt->sax != NULL) &&
3627 (!ctxt->disableSAX) &&
3628 (ctxt->sax->error != NULL))
3629 ctxt->sax->error(ctxt->userData,
3630 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003631 /*
3632 * Okay this is foolish to block those but not
3633 * invalid URIs.
3634 */
Owen Taylor3473f882001-02-23 17:55:21 +00003635 ctxt->wellFormed = 0;
3636 } else {
3637 if ((ctxt->sax != NULL) &&
3638 (!ctxt->disableSAX) &&
3639 (ctxt->sax->entityDecl != NULL))
3640 ctxt->sax->entityDecl(ctxt->userData, name,
3641 XML_EXTERNAL_PARAMETER_ENTITY,
3642 literal, URI, NULL);
3643 }
3644 xmlFreeURI(uri);
3645 }
3646 }
3647 }
3648 } else {
3649 if ((RAW == '"') || (RAW == '\'')) {
3650 value = xmlParseEntityValue(ctxt, &orig);
3651 if ((ctxt->sax != NULL) &&
3652 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3653 ctxt->sax->entityDecl(ctxt->userData, name,
3654 XML_INTERNAL_GENERAL_ENTITY,
3655 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003656 /*
3657 * For expat compatibility in SAX mode.
3658 */
3659 if ((ctxt->myDoc == NULL) ||
3660 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3661 if (ctxt->myDoc == NULL) {
3662 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3663 }
3664 if (ctxt->myDoc->intSubset == NULL)
3665 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3666 BAD_CAST "fake", NULL, NULL);
3667
3668 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3669 NULL, NULL, value);
3670 }
Owen Taylor3473f882001-02-23 17:55:21 +00003671 } else {
3672 URI = xmlParseExternalID(ctxt, &literal, 1);
3673 if ((URI == NULL) && (literal == NULL)) {
3674 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3675 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3676 ctxt->sax->error(ctxt->userData,
3677 "Entity value required\n");
3678 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003679 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003680 }
3681 if (URI) {
3682 xmlURIPtr uri;
3683
3684 uri = xmlParseURI((const char *)URI);
3685 if (uri == NULL) {
3686 ctxt->errNo = XML_ERR_INVALID_URI;
3687 if ((ctxt->sax != NULL) &&
3688 (!ctxt->disableSAX) &&
3689 (ctxt->sax->error != NULL))
3690 ctxt->sax->error(ctxt->userData,
3691 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003692 /*
3693 * This really ought to be a well formedness error
3694 * but the XML Core WG decided otherwise c.f. issue
3695 * E26 of the XML erratas.
3696 */
Owen Taylor3473f882001-02-23 17:55:21 +00003697 } else {
3698 if (uri->fragment != NULL) {
3699 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3700 if ((ctxt->sax != NULL) &&
3701 (!ctxt->disableSAX) &&
3702 (ctxt->sax->error != NULL))
3703 ctxt->sax->error(ctxt->userData,
3704 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003705 /*
3706 * Okay this is foolish to block those but not
3707 * invalid URIs.
3708 */
Owen Taylor3473f882001-02-23 17:55:21 +00003709 ctxt->wellFormed = 0;
3710 }
3711 xmlFreeURI(uri);
3712 }
3713 }
3714 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3715 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3717 ctxt->sax->error(ctxt->userData,
3718 "Space required before 'NDATA'\n");
3719 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003720 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003721 }
3722 SKIP_BLANKS;
3723 if ((RAW == 'N') && (NXT(1) == 'D') &&
3724 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3725 (NXT(4) == 'A')) {
3726 SKIP(5);
3727 if (!IS_BLANK(CUR)) {
3728 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3730 ctxt->sax->error(ctxt->userData,
3731 "Space required after 'NDATA'\n");
3732 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003733 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003734 }
3735 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003736 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003737 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3738 (ctxt->sax->unparsedEntityDecl != NULL))
3739 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3740 literal, URI, ndata);
3741 } else {
3742 if ((ctxt->sax != NULL) &&
3743 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3744 ctxt->sax->entityDecl(ctxt->userData, name,
3745 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3746 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003747 /*
3748 * For expat compatibility in SAX mode.
3749 * assuming the entity repalcement was asked for
3750 */
3751 if ((ctxt->replaceEntities != 0) &&
3752 ((ctxt->myDoc == NULL) ||
3753 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3754 if (ctxt->myDoc == NULL) {
3755 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3756 }
3757
3758 if (ctxt->myDoc->intSubset == NULL)
3759 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3760 BAD_CAST "fake", NULL, NULL);
3761 entityDecl(ctxt, name,
3762 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3763 literal, URI, NULL);
3764 }
Owen Taylor3473f882001-02-23 17:55:21 +00003765 }
3766 }
3767 }
3768 SKIP_BLANKS;
3769 if (RAW != '>') {
3770 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3771 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3772 ctxt->sax->error(ctxt->userData,
3773 "xmlParseEntityDecl: entity %s not terminated\n", name);
3774 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003775 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003776 } else {
3777 if (input != ctxt->input) {
3778 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3779 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3780 ctxt->sax->error(ctxt->userData,
3781"Entity declaration doesn't start and stop in the same entity\n");
3782 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003783 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003784 }
3785 NEXT;
3786 }
3787 if (orig != NULL) {
3788 /*
3789 * Ugly mechanism to save the raw entity value.
3790 */
3791 xmlEntityPtr cur = NULL;
3792
3793 if (isParameter) {
3794 if ((ctxt->sax != NULL) &&
3795 (ctxt->sax->getParameterEntity != NULL))
3796 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3797 } else {
3798 if ((ctxt->sax != NULL) &&
3799 (ctxt->sax->getEntity != NULL))
3800 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003801 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3802 cur = getEntity(ctxt, name);
3803 }
Owen Taylor3473f882001-02-23 17:55:21 +00003804 }
3805 if (cur != NULL) {
3806 if (cur->orig != NULL)
3807 xmlFree(orig);
3808 else
3809 cur->orig = orig;
3810 } else
3811 xmlFree(orig);
3812 }
3813 if (name != NULL) xmlFree(name);
3814 if (value != NULL) xmlFree(value);
3815 if (URI != NULL) xmlFree(URI);
3816 if (literal != NULL) xmlFree(literal);
3817 if (ndata != NULL) xmlFree(ndata);
3818 }
3819}
3820
3821/**
3822 * xmlParseDefaultDecl:
3823 * @ctxt: an XML parser context
3824 * @value: Receive a possible fixed default value for the attribute
3825 *
3826 * Parse an attribute default declaration
3827 *
3828 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3829 *
3830 * [ VC: Required Attribute ]
3831 * if the default declaration is the keyword #REQUIRED, then the
3832 * attribute must be specified for all elements of the type in the
3833 * attribute-list declaration.
3834 *
3835 * [ VC: Attribute Default Legal ]
3836 * The declared default value must meet the lexical constraints of
3837 * the declared attribute type c.f. xmlValidateAttributeDecl()
3838 *
3839 * [ VC: Fixed Attribute Default ]
3840 * if an attribute has a default value declared with the #FIXED
3841 * keyword, instances of that attribute must match the default value.
3842 *
3843 * [ WFC: No < in Attribute Values ]
3844 * handled in xmlParseAttValue()
3845 *
3846 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3847 * or XML_ATTRIBUTE_FIXED.
3848 */
3849
3850int
3851xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3852 int val;
3853 xmlChar *ret;
3854
3855 *value = NULL;
3856 if ((RAW == '#') && (NXT(1) == 'R') &&
3857 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3858 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3859 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3860 (NXT(8) == 'D')) {
3861 SKIP(9);
3862 return(XML_ATTRIBUTE_REQUIRED);
3863 }
3864 if ((RAW == '#') && (NXT(1) == 'I') &&
3865 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3866 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3867 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3868 SKIP(8);
3869 return(XML_ATTRIBUTE_IMPLIED);
3870 }
3871 val = XML_ATTRIBUTE_NONE;
3872 if ((RAW == '#') && (NXT(1) == 'F') &&
3873 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3874 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3875 SKIP(6);
3876 val = XML_ATTRIBUTE_FIXED;
3877 if (!IS_BLANK(CUR)) {
3878 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3880 ctxt->sax->error(ctxt->userData,
3881 "Space required after '#FIXED'\n");
3882 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003883 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003884 }
3885 SKIP_BLANKS;
3886 }
3887 ret = xmlParseAttValue(ctxt);
3888 ctxt->instate = XML_PARSER_DTD;
3889 if (ret == NULL) {
3890 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3891 ctxt->sax->error(ctxt->userData,
3892 "Attribute default value declaration error\n");
3893 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003894 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003895 } else
3896 *value = ret;
3897 return(val);
3898}
3899
3900/**
3901 * xmlParseNotationType:
3902 * @ctxt: an XML parser context
3903 *
3904 * parse an Notation attribute type.
3905 *
3906 * Note: the leading 'NOTATION' S part has already being parsed...
3907 *
3908 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3909 *
3910 * [ VC: Notation Attributes ]
3911 * Values of this type must match one of the notation names included
3912 * in the declaration; all notation names in the declaration must be declared.
3913 *
3914 * Returns: the notation attribute tree built while parsing
3915 */
3916
3917xmlEnumerationPtr
3918xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3919 xmlChar *name;
3920 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3921
3922 if (RAW != '(') {
3923 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3924 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3925 ctxt->sax->error(ctxt->userData,
3926 "'(' required to start 'NOTATION'\n");
3927 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003928 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003929 return(NULL);
3930 }
3931 SHRINK;
3932 do {
3933 NEXT;
3934 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003935 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003936 if (name == NULL) {
3937 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3939 ctxt->sax->error(ctxt->userData,
3940 "Name expected in NOTATION declaration\n");
3941 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003942 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003943 return(ret);
3944 }
3945 cur = xmlCreateEnumeration(name);
3946 xmlFree(name);
3947 if (cur == NULL) return(ret);
3948 if (last == NULL) ret = last = cur;
3949 else {
3950 last->next = cur;
3951 last = cur;
3952 }
3953 SKIP_BLANKS;
3954 } while (RAW == '|');
3955 if (RAW != ')') {
3956 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3957 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3958 ctxt->sax->error(ctxt->userData,
3959 "')' required to finish NOTATION declaration\n");
3960 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003961 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003962 if ((last != NULL) && (last != ret))
3963 xmlFreeEnumeration(last);
3964 return(ret);
3965 }
3966 NEXT;
3967 return(ret);
3968}
3969
3970/**
3971 * xmlParseEnumerationType:
3972 * @ctxt: an XML parser context
3973 *
3974 * parse an Enumeration attribute type.
3975 *
3976 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3977 *
3978 * [ VC: Enumeration ]
3979 * Values of this type must match one of the Nmtoken tokens in
3980 * the declaration
3981 *
3982 * Returns: the enumeration attribute tree built while parsing
3983 */
3984
3985xmlEnumerationPtr
3986xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3987 xmlChar *name;
3988 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3989
3990 if (RAW != '(') {
3991 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3993 ctxt->sax->error(ctxt->userData,
3994 "'(' required to start ATTLIST enumeration\n");
3995 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003996 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003997 return(NULL);
3998 }
3999 SHRINK;
4000 do {
4001 NEXT;
4002 SKIP_BLANKS;
4003 name = xmlParseNmtoken(ctxt);
4004 if (name == NULL) {
4005 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4007 ctxt->sax->error(ctxt->userData,
4008 "NmToken expected in ATTLIST enumeration\n");
4009 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004011 return(ret);
4012 }
4013 cur = xmlCreateEnumeration(name);
4014 xmlFree(name);
4015 if (cur == NULL) return(ret);
4016 if (last == NULL) ret = last = cur;
4017 else {
4018 last->next = cur;
4019 last = cur;
4020 }
4021 SKIP_BLANKS;
4022 } while (RAW == '|');
4023 if (RAW != ')') {
4024 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4026 ctxt->sax->error(ctxt->userData,
4027 "')' required to finish ATTLIST enumeration\n");
4028 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004029 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004030 return(ret);
4031 }
4032 NEXT;
4033 return(ret);
4034}
4035
4036/**
4037 * xmlParseEnumeratedType:
4038 * @ctxt: an XML parser context
4039 * @tree: the enumeration tree built while parsing
4040 *
4041 * parse an Enumerated attribute type.
4042 *
4043 * [57] EnumeratedType ::= NotationType | Enumeration
4044 *
4045 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4046 *
4047 *
4048 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4049 */
4050
4051int
4052xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4053 if ((RAW == 'N') && (NXT(1) == 'O') &&
4054 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4055 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4056 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4057 SKIP(8);
4058 if (!IS_BLANK(CUR)) {
4059 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4061 ctxt->sax->error(ctxt->userData,
4062 "Space required after 'NOTATION'\n");
4063 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004064 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004065 return(0);
4066 }
4067 SKIP_BLANKS;
4068 *tree = xmlParseNotationType(ctxt);
4069 if (*tree == NULL) return(0);
4070 return(XML_ATTRIBUTE_NOTATION);
4071 }
4072 *tree = xmlParseEnumerationType(ctxt);
4073 if (*tree == NULL) return(0);
4074 return(XML_ATTRIBUTE_ENUMERATION);
4075}
4076
4077/**
4078 * xmlParseAttributeType:
4079 * @ctxt: an XML parser context
4080 * @tree: the enumeration tree built while parsing
4081 *
4082 * parse the Attribute list def for an element
4083 *
4084 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4085 *
4086 * [55] StringType ::= 'CDATA'
4087 *
4088 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4089 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4090 *
4091 * Validity constraints for attribute values syntax are checked in
4092 * xmlValidateAttributeValue()
4093 *
4094 * [ VC: ID ]
4095 * Values of type ID must match the Name production. A name must not
4096 * appear more than once in an XML document as a value of this type;
4097 * i.e., ID values must uniquely identify the elements which bear them.
4098 *
4099 * [ VC: One ID per Element Type ]
4100 * No element type may have more than one ID attribute specified.
4101 *
4102 * [ VC: ID Attribute Default ]
4103 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4104 *
4105 * [ VC: IDREF ]
4106 * Values of type IDREF must match the Name production, and values
4107 * of type IDREFS must match Names; each IDREF Name must match the value
4108 * of an ID attribute on some element in the XML document; i.e. IDREF
4109 * values must match the value of some ID attribute.
4110 *
4111 * [ VC: Entity Name ]
4112 * Values of type ENTITY must match the Name production, values
4113 * of type ENTITIES must match Names; each Entity Name must match the
4114 * name of an unparsed entity declared in the DTD.
4115 *
4116 * [ VC: Name Token ]
4117 * Values of type NMTOKEN must match the Nmtoken production; values
4118 * of type NMTOKENS must match Nmtokens.
4119 *
4120 * Returns the attribute type
4121 */
4122int
4123xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4124 SHRINK;
4125 if ((RAW == 'C') && (NXT(1) == 'D') &&
4126 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4127 (NXT(4) == 'A')) {
4128 SKIP(5);
4129 return(XML_ATTRIBUTE_CDATA);
4130 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4131 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4132 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4133 SKIP(6);
4134 return(XML_ATTRIBUTE_IDREFS);
4135 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4136 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4137 (NXT(4) == 'F')) {
4138 SKIP(5);
4139 return(XML_ATTRIBUTE_IDREF);
4140 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4141 SKIP(2);
4142 return(XML_ATTRIBUTE_ID);
4143 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4144 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4145 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4146 SKIP(6);
4147 return(XML_ATTRIBUTE_ENTITY);
4148 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4149 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4150 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4151 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4152 SKIP(8);
4153 return(XML_ATTRIBUTE_ENTITIES);
4154 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4155 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4156 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4157 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4158 SKIP(8);
4159 return(XML_ATTRIBUTE_NMTOKENS);
4160 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4161 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4162 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4163 (NXT(6) == 'N')) {
4164 SKIP(7);
4165 return(XML_ATTRIBUTE_NMTOKEN);
4166 }
4167 return(xmlParseEnumeratedType(ctxt, tree));
4168}
4169
4170/**
4171 * xmlParseAttributeListDecl:
4172 * @ctxt: an XML parser context
4173 *
4174 * : parse the Attribute list def for an element
4175 *
4176 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4177 *
4178 * [53] AttDef ::= S Name S AttType S DefaultDecl
4179 *
4180 */
4181void
4182xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4183 xmlChar *elemName;
4184 xmlChar *attrName;
4185 xmlEnumerationPtr tree;
4186
4187 if ((RAW == '<') && (NXT(1) == '!') &&
4188 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4189 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4190 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4191 (NXT(8) == 'T')) {
4192 xmlParserInputPtr input = ctxt->input;
4193
4194 SKIP(9);
4195 if (!IS_BLANK(CUR)) {
4196 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4198 ctxt->sax->error(ctxt->userData,
4199 "Space required after '<!ATTLIST'\n");
4200 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004201 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004202 }
4203 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004204 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004205 if (elemName == NULL) {
4206 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4208 ctxt->sax->error(ctxt->userData,
4209 "ATTLIST: no name for Element\n");
4210 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004211 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004212 return;
4213 }
4214 SKIP_BLANKS;
4215 GROW;
4216 while (RAW != '>') {
4217 const xmlChar *check = CUR_PTR;
4218 int type;
4219 int def;
4220 xmlChar *defaultValue = NULL;
4221
4222 GROW;
4223 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004224 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004225 if (attrName == NULL) {
4226 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4228 ctxt->sax->error(ctxt->userData,
4229 "ATTLIST: no name for Attribute\n");
4230 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004231 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004232 break;
4233 }
4234 GROW;
4235 if (!IS_BLANK(CUR)) {
4236 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4238 ctxt->sax->error(ctxt->userData,
4239 "Space required after the attribute name\n");
4240 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004241 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004242 if (attrName != NULL)
4243 xmlFree(attrName);
4244 if (defaultValue != NULL)
4245 xmlFree(defaultValue);
4246 break;
4247 }
4248 SKIP_BLANKS;
4249
4250 type = xmlParseAttributeType(ctxt, &tree);
4251 if (type <= 0) {
4252 if (attrName != NULL)
4253 xmlFree(attrName);
4254 if (defaultValue != NULL)
4255 xmlFree(defaultValue);
4256 break;
4257 }
4258
4259 GROW;
4260 if (!IS_BLANK(CUR)) {
4261 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4262 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4263 ctxt->sax->error(ctxt->userData,
4264 "Space required after the attribute type\n");
4265 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004266 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004267 if (attrName != NULL)
4268 xmlFree(attrName);
4269 if (defaultValue != NULL)
4270 xmlFree(defaultValue);
4271 if (tree != NULL)
4272 xmlFreeEnumeration(tree);
4273 break;
4274 }
4275 SKIP_BLANKS;
4276
4277 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4278 if (def <= 0) {
4279 if (attrName != NULL)
4280 xmlFree(attrName);
4281 if (defaultValue != NULL)
4282 xmlFree(defaultValue);
4283 if (tree != NULL)
4284 xmlFreeEnumeration(tree);
4285 break;
4286 }
4287
4288 GROW;
4289 if (RAW != '>') {
4290 if (!IS_BLANK(CUR)) {
4291 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4292 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4293 ctxt->sax->error(ctxt->userData,
4294 "Space required after the attribute default value\n");
4295 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004296 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004297 if (attrName != NULL)
4298 xmlFree(attrName);
4299 if (defaultValue != NULL)
4300 xmlFree(defaultValue);
4301 if (tree != NULL)
4302 xmlFreeEnumeration(tree);
4303 break;
4304 }
4305 SKIP_BLANKS;
4306 }
4307 if (check == CUR_PTR) {
4308 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4310 ctxt->sax->error(ctxt->userData,
4311 "xmlParseAttributeListDecl: detected internal error\n");
4312 if (attrName != NULL)
4313 xmlFree(attrName);
4314 if (defaultValue != NULL)
4315 xmlFree(defaultValue);
4316 if (tree != NULL)
4317 xmlFreeEnumeration(tree);
4318 break;
4319 }
4320 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4321 (ctxt->sax->attributeDecl != NULL))
4322 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4323 type, def, defaultValue, tree);
4324 if (attrName != NULL)
4325 xmlFree(attrName);
4326 if (defaultValue != NULL)
4327 xmlFree(defaultValue);
4328 GROW;
4329 }
4330 if (RAW == '>') {
4331 if (input != ctxt->input) {
4332 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4334 ctxt->sax->error(ctxt->userData,
4335"Attribute list declaration doesn't start and stop in the same entity\n");
4336 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004337 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004338 }
4339 NEXT;
4340 }
4341
4342 xmlFree(elemName);
4343 }
4344}
4345
4346/**
4347 * xmlParseElementMixedContentDecl:
4348 * @ctxt: an XML parser context
4349 *
4350 * parse the declaration for a Mixed Element content
4351 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4352 *
4353 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4354 * '(' S? '#PCDATA' S? ')'
4355 *
4356 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4357 *
4358 * [ VC: No Duplicate Types ]
4359 * The same name must not appear more than once in a single
4360 * mixed-content declaration.
4361 *
4362 * returns: the list of the xmlElementContentPtr describing the element choices
4363 */
4364xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004365xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004366 xmlElementContentPtr ret = NULL, cur = NULL, n;
4367 xmlChar *elem = NULL;
4368
4369 GROW;
4370 if ((RAW == '#') && (NXT(1) == 'P') &&
4371 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4372 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4373 (NXT(6) == 'A')) {
4374 SKIP(7);
4375 SKIP_BLANKS;
4376 SHRINK;
4377 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004378 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4379 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4380 if (ctxt->vctxt.error != NULL)
4381 ctxt->vctxt.error(ctxt->vctxt.userData,
4382"Element content declaration doesn't start and stop in the same entity\n");
4383 ctxt->valid = 0;
4384 }
Owen Taylor3473f882001-02-23 17:55:21 +00004385 NEXT;
4386 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4387 if (RAW == '*') {
4388 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4389 NEXT;
4390 }
4391 return(ret);
4392 }
4393 if ((RAW == '(') || (RAW == '|')) {
4394 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4395 if (ret == NULL) return(NULL);
4396 }
4397 while (RAW == '|') {
4398 NEXT;
4399 if (elem == NULL) {
4400 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4401 if (ret == NULL) return(NULL);
4402 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004403 if (cur != NULL)
4404 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004405 cur = ret;
4406 } else {
4407 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4408 if (n == NULL) return(NULL);
4409 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004410 if (n->c1 != NULL)
4411 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004412 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004413 if (n != NULL)
4414 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004415 cur = n;
4416 xmlFree(elem);
4417 }
4418 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004419 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004420 if (elem == NULL) {
4421 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4423 ctxt->sax->error(ctxt->userData,
4424 "xmlParseElementMixedContentDecl : Name expected\n");
4425 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004426 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004427 xmlFreeElementContent(cur);
4428 return(NULL);
4429 }
4430 SKIP_BLANKS;
4431 GROW;
4432 }
4433 if ((RAW == ')') && (NXT(1) == '*')) {
4434 if (elem != NULL) {
4435 cur->c2 = xmlNewElementContent(elem,
4436 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004437 if (cur->c2 != NULL)
4438 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004439 xmlFree(elem);
4440 }
4441 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004442 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4443 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4444 if (ctxt->vctxt.error != NULL)
4445 ctxt->vctxt.error(ctxt->vctxt.userData,
4446"Element content declaration doesn't start and stop in the same entity\n");
4447 ctxt->valid = 0;
4448 }
Owen Taylor3473f882001-02-23 17:55:21 +00004449 SKIP(2);
4450 } else {
4451 if (elem != NULL) xmlFree(elem);
4452 xmlFreeElementContent(ret);
4453 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4455 ctxt->sax->error(ctxt->userData,
4456 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4457 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004458 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004459 return(NULL);
4460 }
4461
4462 } else {
4463 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4464 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4465 ctxt->sax->error(ctxt->userData,
4466 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4467 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004468 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004469 }
4470 return(ret);
4471}
4472
4473/**
4474 * xmlParseElementChildrenContentDecl:
4475 * @ctxt: an XML parser context
4476 *
4477 * parse the declaration for a Mixed Element content
4478 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4479 *
4480 *
4481 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4482 *
4483 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4484 *
4485 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4486 *
4487 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4488 *
4489 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4490 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004491 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004492 * opening or closing parentheses in a choice, seq, or Mixed
4493 * construct is contained in the replacement text for a parameter
4494 * entity, both must be contained in the same replacement text. For
4495 * interoperability, if a parameter-entity reference appears in a
4496 * choice, seq, or Mixed construct, its replacement text should not
4497 * be empty, and neither the first nor last non-blank character of
4498 * the replacement text should be a connector (| or ,).
4499 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004500 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004501 * hierarchy.
4502 */
4503xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004504xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004505(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004506 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4507 xmlChar *elem;
4508 xmlChar type = 0;
4509
4510 SKIP_BLANKS;
4511 GROW;
4512 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004513 xmlParserInputPtr input = ctxt->input;
4514
Owen Taylor3473f882001-02-23 17:55:21 +00004515 /* Recurse on first child */
4516 NEXT;
4517 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004518 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004519 SKIP_BLANKS;
4520 GROW;
4521 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004522 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004523 if (elem == NULL) {
4524 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4526 ctxt->sax->error(ctxt->userData,
4527 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4528 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004529 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004530 return(NULL);
4531 }
4532 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4533 GROW;
4534 if (RAW == '?') {
4535 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4536 NEXT;
4537 } else if (RAW == '*') {
4538 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4539 NEXT;
4540 } else if (RAW == '+') {
4541 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4542 NEXT;
4543 } else {
4544 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4545 }
4546 xmlFree(elem);
4547 GROW;
4548 }
4549 SKIP_BLANKS;
4550 SHRINK;
4551 while (RAW != ')') {
4552 /*
4553 * Each loop we parse one separator and one element.
4554 */
4555 if (RAW == ',') {
4556 if (type == 0) type = CUR;
4557
4558 /*
4559 * Detect "Name | Name , Name" error
4560 */
4561 else if (type != CUR) {
4562 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4564 ctxt->sax->error(ctxt->userData,
4565 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4566 type);
4567 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004568 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004569 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004570 xmlFreeElementContent(last);
4571 if (ret != NULL)
4572 xmlFreeElementContent(ret);
4573 return(NULL);
4574 }
4575 NEXT;
4576
4577 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4578 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004579 if ((last != NULL) && (last != ret))
4580 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004581 xmlFreeElementContent(ret);
4582 return(NULL);
4583 }
4584 if (last == NULL) {
4585 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004586 if (ret != NULL)
4587 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004588 ret = cur = op;
4589 } else {
4590 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004591 if (op != NULL)
4592 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004593 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004594 if (last != NULL)
4595 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004596 cur =op;
4597 last = NULL;
4598 }
4599 } else if (RAW == '|') {
4600 if (type == 0) type = CUR;
4601
4602 /*
4603 * Detect "Name , Name | Name" error
4604 */
4605 else if (type != CUR) {
4606 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4608 ctxt->sax->error(ctxt->userData,
4609 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4610 type);
4611 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004612 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004613 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004614 xmlFreeElementContent(last);
4615 if (ret != NULL)
4616 xmlFreeElementContent(ret);
4617 return(NULL);
4618 }
4619 NEXT;
4620
4621 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4622 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004623 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004624 xmlFreeElementContent(last);
4625 if (ret != NULL)
4626 xmlFreeElementContent(ret);
4627 return(NULL);
4628 }
4629 if (last == NULL) {
4630 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004631 if (ret != NULL)
4632 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004633 ret = cur = op;
4634 } else {
4635 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004636 if (op != NULL)
4637 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004638 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004639 if (last != NULL)
4640 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004641 cur =op;
4642 last = NULL;
4643 }
4644 } else {
4645 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4647 ctxt->sax->error(ctxt->userData,
4648 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4649 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004650 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004651 if (ret != NULL)
4652 xmlFreeElementContent(ret);
4653 return(NULL);
4654 }
4655 GROW;
4656 SKIP_BLANKS;
4657 GROW;
4658 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004659 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004660 /* Recurse on second child */
4661 NEXT;
4662 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004663 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004664 SKIP_BLANKS;
4665 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004666 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004667 if (elem == NULL) {
4668 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4670 ctxt->sax->error(ctxt->userData,
4671 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4672 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004673 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004674 if (ret != NULL)
4675 xmlFreeElementContent(ret);
4676 return(NULL);
4677 }
4678 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4679 xmlFree(elem);
4680 if (RAW == '?') {
4681 last->ocur = XML_ELEMENT_CONTENT_OPT;
4682 NEXT;
4683 } else if (RAW == '*') {
4684 last->ocur = XML_ELEMENT_CONTENT_MULT;
4685 NEXT;
4686 } else if (RAW == '+') {
4687 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4688 NEXT;
4689 } else {
4690 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4691 }
4692 }
4693 SKIP_BLANKS;
4694 GROW;
4695 }
4696 if ((cur != NULL) && (last != NULL)) {
4697 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004698 if (last != NULL)
4699 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004700 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004701 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4702 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4703 if (ctxt->vctxt.error != NULL)
4704 ctxt->vctxt.error(ctxt->vctxt.userData,
4705"Element content declaration doesn't start and stop in the same entity\n");
4706 ctxt->valid = 0;
4707 }
Owen Taylor3473f882001-02-23 17:55:21 +00004708 NEXT;
4709 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004710 if (ret != NULL)
4711 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004712 NEXT;
4713 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004714 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004715 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004716 cur = ret;
4717 /*
4718 * Some normalization:
4719 * (a | b* | c?)* == (a | b | c)*
4720 */
4721 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4722 if ((cur->c1 != NULL) &&
4723 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4724 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4725 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4726 if ((cur->c2 != NULL) &&
4727 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4728 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4729 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4730 cur = cur->c2;
4731 }
4732 }
Owen Taylor3473f882001-02-23 17:55:21 +00004733 NEXT;
4734 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004735 if (ret != NULL) {
4736 int found = 0;
4737
Daniel Veillarde470df72001-04-18 21:41:07 +00004738 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004739 /*
4740 * Some normalization:
4741 * (a | b*)+ == (a | b)*
4742 * (a | b?)+ == (a | b)*
4743 */
4744 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4745 if ((cur->c1 != NULL) &&
4746 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4747 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4748 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4749 found = 1;
4750 }
4751 if ((cur->c2 != NULL) &&
4752 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4753 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4754 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4755 found = 1;
4756 }
4757 cur = cur->c2;
4758 }
4759 if (found)
4760 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4761 }
Owen Taylor3473f882001-02-23 17:55:21 +00004762 NEXT;
4763 }
4764 return(ret);
4765}
4766
4767/**
4768 * xmlParseElementContentDecl:
4769 * @ctxt: an XML parser context
4770 * @name: the name of the element being defined.
4771 * @result: the Element Content pointer will be stored here if any
4772 *
4773 * parse the declaration for an Element content either Mixed or Children,
4774 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4775 *
4776 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4777 *
4778 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4779 */
4780
4781int
4782xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4783 xmlElementContentPtr *result) {
4784
4785 xmlElementContentPtr tree = NULL;
4786 xmlParserInputPtr input = ctxt->input;
4787 int res;
4788
4789 *result = NULL;
4790
4791 if (RAW != '(') {
4792 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4794 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004795 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004796 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004797 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004798 return(-1);
4799 }
4800 NEXT;
4801 GROW;
4802 SKIP_BLANKS;
4803 if ((RAW == '#') && (NXT(1) == 'P') &&
4804 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4805 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4806 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004807 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004808 res = XML_ELEMENT_TYPE_MIXED;
4809 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004810 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004811 res = XML_ELEMENT_TYPE_ELEMENT;
4812 }
Owen Taylor3473f882001-02-23 17:55:21 +00004813 SKIP_BLANKS;
4814 *result = tree;
4815 return(res);
4816}
4817
4818/**
4819 * xmlParseElementDecl:
4820 * @ctxt: an XML parser context
4821 *
4822 * parse an Element declaration.
4823 *
4824 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4825 *
4826 * [ VC: Unique Element Type Declaration ]
4827 * No element type may be declared more than once
4828 *
4829 * Returns the type of the element, or -1 in case of error
4830 */
4831int
4832xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4833 xmlChar *name;
4834 int ret = -1;
4835 xmlElementContentPtr content = NULL;
4836
4837 GROW;
4838 if ((RAW == '<') && (NXT(1) == '!') &&
4839 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4840 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4841 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4842 (NXT(8) == 'T')) {
4843 xmlParserInputPtr input = ctxt->input;
4844
4845 SKIP(9);
4846 if (!IS_BLANK(CUR)) {
4847 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4849 ctxt->sax->error(ctxt->userData,
4850 "Space required after 'ELEMENT'\n");
4851 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004852 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004853 }
4854 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004855 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004856 if (name == NULL) {
4857 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4859 ctxt->sax->error(ctxt->userData,
4860 "xmlParseElementDecl: no name for Element\n");
4861 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004862 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004863 return(-1);
4864 }
4865 while ((RAW == 0) && (ctxt->inputNr > 1))
4866 xmlPopInput(ctxt);
4867 if (!IS_BLANK(CUR)) {
4868 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4870 ctxt->sax->error(ctxt->userData,
4871 "Space required after the element name\n");
4872 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004873 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004874 }
4875 SKIP_BLANKS;
4876 if ((RAW == 'E') && (NXT(1) == 'M') &&
4877 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4878 (NXT(4) == 'Y')) {
4879 SKIP(5);
4880 /*
4881 * Element must always be empty.
4882 */
4883 ret = XML_ELEMENT_TYPE_EMPTY;
4884 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4885 (NXT(2) == 'Y')) {
4886 SKIP(3);
4887 /*
4888 * Element is a generic container.
4889 */
4890 ret = XML_ELEMENT_TYPE_ANY;
4891 } else if (RAW == '(') {
4892 ret = xmlParseElementContentDecl(ctxt, name, &content);
4893 } else {
4894 /*
4895 * [ WFC: PEs in Internal Subset ] error handling.
4896 */
4897 if ((RAW == '%') && (ctxt->external == 0) &&
4898 (ctxt->inputNr == 1)) {
4899 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4901 ctxt->sax->error(ctxt->userData,
4902 "PEReference: forbidden within markup decl in internal subset\n");
4903 } else {
4904 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4906 ctxt->sax->error(ctxt->userData,
4907 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4908 }
4909 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004910 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004911 if (name != NULL) xmlFree(name);
4912 return(-1);
4913 }
4914
4915 SKIP_BLANKS;
4916 /*
4917 * Pop-up of finished entities.
4918 */
4919 while ((RAW == 0) && (ctxt->inputNr > 1))
4920 xmlPopInput(ctxt);
4921 SKIP_BLANKS;
4922
4923 if (RAW != '>') {
4924 ctxt->errNo = XML_ERR_GT_REQUIRED;
4925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4926 ctxt->sax->error(ctxt->userData,
4927 "xmlParseElementDecl: expected '>' at the end\n");
4928 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004929 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004930 } else {
4931 if (input != ctxt->input) {
4932 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4934 ctxt->sax->error(ctxt->userData,
4935"Element declaration doesn't start and stop in the same entity\n");
4936 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004937 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004938 }
4939
4940 NEXT;
4941 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4942 (ctxt->sax->elementDecl != NULL))
4943 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4944 content);
4945 }
4946 if (content != NULL) {
4947 xmlFreeElementContent(content);
4948 }
4949 if (name != NULL) {
4950 xmlFree(name);
4951 }
4952 }
4953 return(ret);
4954}
4955
4956/**
Owen Taylor3473f882001-02-23 17:55:21 +00004957 * xmlParseConditionalSections
4958 * @ctxt: an XML parser context
4959 *
4960 * [61] conditionalSect ::= includeSect | ignoreSect
4961 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4962 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4963 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4964 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4965 */
4966
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004967static void
Owen Taylor3473f882001-02-23 17:55:21 +00004968xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4969 SKIP(3);
4970 SKIP_BLANKS;
4971 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4972 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4973 (NXT(6) == 'E')) {
4974 SKIP(7);
4975 SKIP_BLANKS;
4976 if (RAW != '[') {
4977 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4979 ctxt->sax->error(ctxt->userData,
4980 "XML conditional section '[' expected\n");
4981 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004982 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004983 } else {
4984 NEXT;
4985 }
4986 if (xmlParserDebugEntities) {
4987 if ((ctxt->input != NULL) && (ctxt->input->filename))
4988 xmlGenericError(xmlGenericErrorContext,
4989 "%s(%d): ", ctxt->input->filename,
4990 ctxt->input->line);
4991 xmlGenericError(xmlGenericErrorContext,
4992 "Entering INCLUDE Conditional Section\n");
4993 }
4994
4995 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4996 (NXT(2) != '>'))) {
4997 const xmlChar *check = CUR_PTR;
4998 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00004999
5000 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5001 xmlParseConditionalSections(ctxt);
5002 } else if (IS_BLANK(CUR)) {
5003 NEXT;
5004 } else if (RAW == '%') {
5005 xmlParsePEReference(ctxt);
5006 } else
5007 xmlParseMarkupDecl(ctxt);
5008
5009 /*
5010 * Pop-up of finished entities.
5011 */
5012 while ((RAW == 0) && (ctxt->inputNr > 1))
5013 xmlPopInput(ctxt);
5014
Daniel Veillardfdc91562002-07-01 21:52:03 +00005015 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005016 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5018 ctxt->sax->error(ctxt->userData,
5019 "Content error in the external subset\n");
5020 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005021 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005022 break;
5023 }
5024 }
5025 if (xmlParserDebugEntities) {
5026 if ((ctxt->input != NULL) && (ctxt->input->filename))
5027 xmlGenericError(xmlGenericErrorContext,
5028 "%s(%d): ", ctxt->input->filename,
5029 ctxt->input->line);
5030 xmlGenericError(xmlGenericErrorContext,
5031 "Leaving INCLUDE Conditional Section\n");
5032 }
5033
5034 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5035 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5036 int state;
5037 int instate;
5038 int depth = 0;
5039
5040 SKIP(6);
5041 SKIP_BLANKS;
5042 if (RAW != '[') {
5043 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5045 ctxt->sax->error(ctxt->userData,
5046 "XML conditional section '[' expected\n");
5047 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005048 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005049 } else {
5050 NEXT;
5051 }
5052 if (xmlParserDebugEntities) {
5053 if ((ctxt->input != NULL) && (ctxt->input->filename))
5054 xmlGenericError(xmlGenericErrorContext,
5055 "%s(%d): ", ctxt->input->filename,
5056 ctxt->input->line);
5057 xmlGenericError(xmlGenericErrorContext,
5058 "Entering IGNORE Conditional Section\n");
5059 }
5060
5061 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005062 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005063 * But disable SAX event generating DTD building in the meantime
5064 */
5065 state = ctxt->disableSAX;
5066 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005067 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005068 ctxt->instate = XML_PARSER_IGNORE;
5069
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005070 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005071 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5072 depth++;
5073 SKIP(3);
5074 continue;
5075 }
5076 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5077 if (--depth >= 0) SKIP(3);
5078 continue;
5079 }
5080 NEXT;
5081 continue;
5082 }
5083
5084 ctxt->disableSAX = state;
5085 ctxt->instate = instate;
5086
5087 if (xmlParserDebugEntities) {
5088 if ((ctxt->input != NULL) && (ctxt->input->filename))
5089 xmlGenericError(xmlGenericErrorContext,
5090 "%s(%d): ", ctxt->input->filename,
5091 ctxt->input->line);
5092 xmlGenericError(xmlGenericErrorContext,
5093 "Leaving IGNORE Conditional Section\n");
5094 }
5095
5096 } else {
5097 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5099 ctxt->sax->error(ctxt->userData,
5100 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5101 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005102 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005103 }
5104
5105 if (RAW == 0)
5106 SHRINK;
5107
5108 if (RAW == 0) {
5109 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5110 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5111 ctxt->sax->error(ctxt->userData,
5112 "XML conditional section not closed\n");
5113 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005114 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005115 } else {
5116 SKIP(3);
5117 }
5118}
5119
5120/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005121 * xmlParseMarkupDecl:
5122 * @ctxt: an XML parser context
5123 *
5124 * parse Markup declarations
5125 *
5126 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5127 * NotationDecl | PI | Comment
5128 *
5129 * [ VC: Proper Declaration/PE Nesting ]
5130 * Parameter-entity replacement text must be properly nested with
5131 * markup declarations. That is to say, if either the first character
5132 * or the last character of a markup declaration (markupdecl above) is
5133 * contained in the replacement text for a parameter-entity reference,
5134 * both must be contained in the same replacement text.
5135 *
5136 * [ WFC: PEs in Internal Subset ]
5137 * In the internal DTD subset, parameter-entity references can occur
5138 * only where markup declarations can occur, not within markup declarations.
5139 * (This does not apply to references that occur in external parameter
5140 * entities or to the external subset.)
5141 */
5142void
5143xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5144 GROW;
5145 xmlParseElementDecl(ctxt);
5146 xmlParseAttributeListDecl(ctxt);
5147 xmlParseEntityDecl(ctxt);
5148 xmlParseNotationDecl(ctxt);
5149 xmlParsePI(ctxt);
5150 xmlParseComment(ctxt);
5151 /*
5152 * This is only for internal subset. On external entities,
5153 * the replacement is done before parsing stage
5154 */
5155 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5156 xmlParsePEReference(ctxt);
5157
5158 /*
5159 * Conditional sections are allowed from entities included
5160 * by PE References in the internal subset.
5161 */
5162 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5163 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5164 xmlParseConditionalSections(ctxt);
5165 }
5166 }
5167
5168 ctxt->instate = XML_PARSER_DTD;
5169}
5170
5171/**
5172 * xmlParseTextDecl:
5173 * @ctxt: an XML parser context
5174 *
5175 * parse an XML declaration header for external entities
5176 *
5177 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5178 *
5179 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5180 */
5181
5182void
5183xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5184 xmlChar *version;
5185
5186 /*
5187 * We know that '<?xml' is here.
5188 */
5189 if ((RAW == '<') && (NXT(1) == '?') &&
5190 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5191 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5192 SKIP(5);
5193 } else {
5194 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5196 ctxt->sax->error(ctxt->userData,
5197 "Text declaration '<?xml' required\n");
5198 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005199 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005200
5201 return;
5202 }
5203
5204 if (!IS_BLANK(CUR)) {
5205 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5206 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5207 ctxt->sax->error(ctxt->userData,
5208 "Space needed after '<?xml'\n");
5209 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005211 }
5212 SKIP_BLANKS;
5213
5214 /*
5215 * We may have the VersionInfo here.
5216 */
5217 version = xmlParseVersionInfo(ctxt);
5218 if (version == NULL)
5219 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005220 else {
5221 if (!IS_BLANK(CUR)) {
5222 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5224 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5225 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005226 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005227 }
5228 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005229 ctxt->input->version = version;
5230
5231 /*
5232 * We must have the encoding declaration
5233 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005234 xmlParseEncodingDecl(ctxt);
5235 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5236 /*
5237 * The XML REC instructs us to stop parsing right here
5238 */
5239 return;
5240 }
5241
5242 SKIP_BLANKS;
5243 if ((RAW == '?') && (NXT(1) == '>')) {
5244 SKIP(2);
5245 } else if (RAW == '>') {
5246 /* Deprecated old WD ... */
5247 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5249 ctxt->sax->error(ctxt->userData,
5250 "XML declaration must end-up with '?>'\n");
5251 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005252 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005253 NEXT;
5254 } else {
5255 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5257 ctxt->sax->error(ctxt->userData,
5258 "parsing XML declaration: '?>' expected\n");
5259 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005260 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005261 MOVETO_ENDTAG(CUR_PTR);
5262 NEXT;
5263 }
5264}
5265
5266/**
Owen Taylor3473f882001-02-23 17:55:21 +00005267 * xmlParseExternalSubset:
5268 * @ctxt: an XML parser context
5269 * @ExternalID: the external identifier
5270 * @SystemID: the system identifier (or URL)
5271 *
5272 * parse Markup declarations from an external subset
5273 *
5274 * [30] extSubset ::= textDecl? extSubsetDecl
5275 *
5276 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5277 */
5278void
5279xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5280 const xmlChar *SystemID) {
5281 GROW;
5282 if ((RAW == '<') && (NXT(1) == '?') &&
5283 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5284 (NXT(4) == 'l')) {
5285 xmlParseTextDecl(ctxt);
5286 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5287 /*
5288 * The XML REC instructs us to stop parsing right here
5289 */
5290 ctxt->instate = XML_PARSER_EOF;
5291 return;
5292 }
5293 }
5294 if (ctxt->myDoc == NULL) {
5295 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5296 }
5297 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5298 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5299
5300 ctxt->instate = XML_PARSER_DTD;
5301 ctxt->external = 1;
5302 while (((RAW == '<') && (NXT(1) == '?')) ||
5303 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005304 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005305 const xmlChar *check = CUR_PTR;
5306 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005307
5308 GROW;
5309 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5310 xmlParseConditionalSections(ctxt);
5311 } else if (IS_BLANK(CUR)) {
5312 NEXT;
5313 } else if (RAW == '%') {
5314 xmlParsePEReference(ctxt);
5315 } else
5316 xmlParseMarkupDecl(ctxt);
5317
5318 /*
5319 * Pop-up of finished entities.
5320 */
5321 while ((RAW == 0) && (ctxt->inputNr > 1))
5322 xmlPopInput(ctxt);
5323
Daniel Veillardfdc91562002-07-01 21:52:03 +00005324 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005325 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5327 ctxt->sax->error(ctxt->userData,
5328 "Content error in the external subset\n");
5329 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005330 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005331 break;
5332 }
5333 }
5334
5335 if (RAW != 0) {
5336 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5338 ctxt->sax->error(ctxt->userData,
5339 "Extra content at the end of the document\n");
5340 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005341 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005342 }
5343
5344}
5345
5346/**
5347 * xmlParseReference:
5348 * @ctxt: an XML parser context
5349 *
5350 * parse and handle entity references in content, depending on the SAX
5351 * interface, this may end-up in a call to character() if this is a
5352 * CharRef, a predefined entity, if there is no reference() callback.
5353 * or if the parser was asked to switch to that mode.
5354 *
5355 * [67] Reference ::= EntityRef | CharRef
5356 */
5357void
5358xmlParseReference(xmlParserCtxtPtr ctxt) {
5359 xmlEntityPtr ent;
5360 xmlChar *val;
5361 if (RAW != '&') return;
5362
5363 if (NXT(1) == '#') {
5364 int i = 0;
5365 xmlChar out[10];
5366 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005367 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005368
5369 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5370 /*
5371 * So we are using non-UTF-8 buffers
5372 * Check that the char fit on 8bits, if not
5373 * generate a CharRef.
5374 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005375 if (value <= 0xFF) {
5376 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005377 out[1] = 0;
5378 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5379 (!ctxt->disableSAX))
5380 ctxt->sax->characters(ctxt->userData, out, 1);
5381 } else {
5382 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005383 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005384 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005385 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005386 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5387 (!ctxt->disableSAX))
5388 ctxt->sax->reference(ctxt->userData, out);
5389 }
5390 } else {
5391 /*
5392 * Just encode the value in UTF-8
5393 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005394 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005395 out[i] = 0;
5396 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5397 (!ctxt->disableSAX))
5398 ctxt->sax->characters(ctxt->userData, out, i);
5399 }
5400 } else {
5401 ent = xmlParseEntityRef(ctxt);
5402 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005403 if (!ctxt->wellFormed)
5404 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005405 if ((ent->name != NULL) &&
5406 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5407 xmlNodePtr list = NULL;
5408 int ret;
5409
5410
5411 /*
5412 * The first reference to the entity trigger a parsing phase
5413 * where the ent->children is filled with the result from
5414 * the parsing.
5415 */
5416 if (ent->children == NULL) {
5417 xmlChar *value;
5418 value = ent->content;
5419
5420 /*
5421 * Check that this entity is well formed
5422 */
5423 if ((value != NULL) &&
5424 (value[1] == 0) && (value[0] == '<') &&
5425 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5426 /*
5427 * DONE: get definite answer on this !!!
5428 * Lots of entity decls are used to declare a single
5429 * char
5430 * <!ENTITY lt "<">
5431 * Which seems to be valid since
5432 * 2.4: The ampersand character (&) and the left angle
5433 * bracket (<) may appear in their literal form only
5434 * when used ... They are also legal within the literal
5435 * entity value of an internal entity declaration;i
5436 * see "4.3.2 Well-Formed Parsed Entities".
5437 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5438 * Looking at the OASIS test suite and James Clark
5439 * tests, this is broken. However the XML REC uses
5440 * it. Is the XML REC not well-formed ????
5441 * This is a hack to avoid this problem
5442 *
5443 * ANSWER: since lt gt amp .. are already defined,
5444 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005445 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005446 * is lousy but acceptable.
5447 */
5448 list = xmlNewDocText(ctxt->myDoc, value);
5449 if (list != NULL) {
5450 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5451 (ent->children == NULL)) {
5452 ent->children = list;
5453 ent->last = list;
5454 list->parent = (xmlNodePtr) ent;
5455 } else {
5456 xmlFreeNodeList(list);
5457 }
5458 } else if (list != NULL) {
5459 xmlFreeNodeList(list);
5460 }
5461 } else {
5462 /*
5463 * 4.3.2: An internal general parsed entity is well-formed
5464 * if its replacement text matches the production labeled
5465 * content.
5466 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005467
5468 void *user_data;
5469 /*
5470 * This is a bit hackish but this seems the best
5471 * way to make sure both SAX and DOM entity support
5472 * behaves okay.
5473 */
5474 if (ctxt->userData == ctxt)
5475 user_data = NULL;
5476 else
5477 user_data = ctxt->userData;
5478
Owen Taylor3473f882001-02-23 17:55:21 +00005479 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5480 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005481 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5482 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005483 ctxt->depth--;
5484 } else if (ent->etype ==
5485 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5486 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005487 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005488 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005489 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005490 ctxt->depth--;
5491 } else {
5492 ret = -1;
5493 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5494 ctxt->sax->error(ctxt->userData,
5495 "Internal: invalid entity type\n");
5496 }
5497 if (ret == XML_ERR_ENTITY_LOOP) {
5498 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5500 ctxt->sax->error(ctxt->userData,
5501 "Detected entity reference loop\n");
5502 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005503 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005504 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005505 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005506 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5507 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005508 (ent->children == NULL)) {
5509 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005510 if (ctxt->replaceEntities) {
5511 /*
5512 * Prune it directly in the generated document
5513 * except for single text nodes.
5514 */
5515 if ((list->type == XML_TEXT_NODE) &&
5516 (list->next == NULL)) {
5517 list->parent = (xmlNodePtr) ent;
5518 list = NULL;
5519 } else {
5520 while (list != NULL) {
5521 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005522 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005523 if (list->next == NULL)
5524 ent->last = list;
5525 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005526 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005527 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005528 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5529 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005530 }
5531 } else {
5532 while (list != NULL) {
5533 list->parent = (xmlNodePtr) ent;
5534 if (list->next == NULL)
5535 ent->last = list;
5536 list = list->next;
5537 }
Owen Taylor3473f882001-02-23 17:55:21 +00005538 }
5539 } else {
5540 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005541 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005542 }
5543 } else if (ret > 0) {
5544 ctxt->errNo = ret;
5545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5546 ctxt->sax->error(ctxt->userData,
5547 "Entity value required\n");
5548 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005549 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005550 } else if (list != NULL) {
5551 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005552 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005553 }
5554 }
5555 }
5556 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5557 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5558 /*
5559 * Create a node.
5560 */
5561 ctxt->sax->reference(ctxt->userData, ent->name);
5562 return;
5563 } else if (ctxt->replaceEntities) {
5564 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5565 /*
5566 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005567 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005568 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005569 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005570 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005571 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005572 cur = ent->children;
5573 while (cur != NULL) {
5574 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005575 if (firstChild == NULL){
5576 firstChild = new;
5577 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005578 xmlAddChild(ctxt->node, new);
5579 if (cur == ent->last)
5580 break;
5581 cur = cur->next;
5582 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005583 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5584 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005585 } else {
5586 /*
5587 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005588 * node with a possible previous text one which
5589 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005590 */
5591 if (ent->children->type == XML_TEXT_NODE)
5592 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5593 if ((ent->last != ent->children) &&
5594 (ent->last->type == XML_TEXT_NODE))
5595 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5596 xmlAddChildList(ctxt->node, ent->children);
5597 }
5598
Owen Taylor3473f882001-02-23 17:55:21 +00005599 /*
5600 * This is to avoid a nasty side effect, see
5601 * characters() in SAX.c
5602 */
5603 ctxt->nodemem = 0;
5604 ctxt->nodelen = 0;
5605 return;
5606 } else {
5607 /*
5608 * Probably running in SAX mode
5609 */
5610 xmlParserInputPtr input;
5611
5612 input = xmlNewEntityInputStream(ctxt, ent);
5613 xmlPushInput(ctxt, input);
5614 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5615 (RAW == '<') && (NXT(1) == '?') &&
5616 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5617 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5618 xmlParseTextDecl(ctxt);
5619 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5620 /*
5621 * The XML REC instructs us to stop parsing right here
5622 */
5623 ctxt->instate = XML_PARSER_EOF;
5624 return;
5625 }
5626 if (input->standalone == 1) {
5627 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5629 ctxt->sax->error(ctxt->userData,
5630 "external parsed entities cannot be standalone\n");
5631 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005632 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005633 }
5634 }
5635 return;
5636 }
5637 }
5638 } else {
5639 val = ent->content;
5640 if (val == NULL) return;
5641 /*
5642 * inline the entity.
5643 */
5644 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5645 (!ctxt->disableSAX))
5646 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5647 }
5648 }
5649}
5650
5651/**
5652 * xmlParseEntityRef:
5653 * @ctxt: an XML parser context
5654 *
5655 * parse ENTITY references declarations
5656 *
5657 * [68] EntityRef ::= '&' Name ';'
5658 *
5659 * [ WFC: Entity Declared ]
5660 * In a document without any DTD, a document with only an internal DTD
5661 * subset which contains no parameter entity references, or a document
5662 * with "standalone='yes'", the Name given in the entity reference
5663 * must match that in an entity declaration, except that well-formed
5664 * documents need not declare any of the following entities: amp, lt,
5665 * gt, apos, quot. The declaration of a parameter entity must precede
5666 * any reference to it. Similarly, the declaration of a general entity
5667 * must precede any reference to it which appears in a default value in an
5668 * attribute-list declaration. Note that if entities are declared in the
5669 * external subset or in external parameter entities, a non-validating
5670 * processor is not obligated to read and process their declarations;
5671 * for such documents, the rule that an entity must be declared is a
5672 * well-formedness constraint only if standalone='yes'.
5673 *
5674 * [ WFC: Parsed Entity ]
5675 * An entity reference must not contain the name of an unparsed entity
5676 *
5677 * Returns the xmlEntityPtr if found, or NULL otherwise.
5678 */
5679xmlEntityPtr
5680xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5681 xmlChar *name;
5682 xmlEntityPtr ent = NULL;
5683
5684 GROW;
5685
5686 if (RAW == '&') {
5687 NEXT;
5688 name = xmlParseName(ctxt);
5689 if (name == NULL) {
5690 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5692 ctxt->sax->error(ctxt->userData,
5693 "xmlParseEntityRef: no name\n");
5694 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005695 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005696 } else {
5697 if (RAW == ';') {
5698 NEXT;
5699 /*
5700 * Ask first SAX for entity resolution, otherwise try the
5701 * predefined set.
5702 */
5703 if (ctxt->sax != NULL) {
5704 if (ctxt->sax->getEntity != NULL)
5705 ent = ctxt->sax->getEntity(ctxt->userData, name);
5706 if (ent == NULL)
5707 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005708 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5709 ent = getEntity(ctxt, name);
5710 }
Owen Taylor3473f882001-02-23 17:55:21 +00005711 }
5712 /*
5713 * [ WFC: Entity Declared ]
5714 * In a document without any DTD, a document with only an
5715 * internal DTD subset which contains no parameter entity
5716 * references, or a document with "standalone='yes'", the
5717 * Name given in the entity reference must match that in an
5718 * entity declaration, except that well-formed documents
5719 * need not declare any of the following entities: amp, lt,
5720 * gt, apos, quot.
5721 * The declaration of a parameter entity must precede any
5722 * reference to it.
5723 * Similarly, the declaration of a general entity must
5724 * precede any reference to it which appears in a default
5725 * value in an attribute-list declaration. Note that if
5726 * entities are declared in the external subset or in
5727 * external parameter entities, a non-validating processor
5728 * is not obligated to read and process their declarations;
5729 * for such documents, the rule that an entity must be
5730 * declared is a well-formedness constraint only if
5731 * standalone='yes'.
5732 */
5733 if (ent == NULL) {
5734 if ((ctxt->standalone == 1) ||
5735 ((ctxt->hasExternalSubset == 0) &&
5736 (ctxt->hasPErefs == 0))) {
5737 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5739 ctxt->sax->error(ctxt->userData,
5740 "Entity '%s' not defined\n", name);
5741 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005742 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005743 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005744 } else {
5745 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005747 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005748 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005749 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005750 }
5751 }
5752
5753 /*
5754 * [ WFC: Parsed Entity ]
5755 * An entity reference must not contain the name of an
5756 * unparsed entity
5757 */
5758 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5759 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5761 ctxt->sax->error(ctxt->userData,
5762 "Entity reference to unparsed entity %s\n", name);
5763 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005764 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005765 }
5766
5767 /*
5768 * [ WFC: No External Entity References ]
5769 * Attribute values cannot contain direct or indirect
5770 * entity references to external entities.
5771 */
5772 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5773 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5774 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5776 ctxt->sax->error(ctxt->userData,
5777 "Attribute references external entity '%s'\n", name);
5778 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005779 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005780 }
5781 /*
5782 * [ WFC: No < in Attribute Values ]
5783 * The replacement text of any entity referred to directly or
5784 * indirectly in an attribute value (other than "&lt;") must
5785 * not contain a <.
5786 */
5787 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5788 (ent != NULL) &&
5789 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5790 (ent->content != NULL) &&
5791 (xmlStrchr(ent->content, '<'))) {
5792 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5794 ctxt->sax->error(ctxt->userData,
5795 "'<' in entity '%s' is not allowed in attributes values\n", name);
5796 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005797 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005798 }
5799
5800 /*
5801 * Internal check, no parameter entities here ...
5802 */
5803 else {
5804 switch (ent->etype) {
5805 case XML_INTERNAL_PARAMETER_ENTITY:
5806 case XML_EXTERNAL_PARAMETER_ENTITY:
5807 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5809 ctxt->sax->error(ctxt->userData,
5810 "Attempt to reference the parameter entity '%s'\n", name);
5811 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005812 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005813 break;
5814 default:
5815 break;
5816 }
5817 }
5818
5819 /*
5820 * [ WFC: No Recursion ]
5821 * A parsed entity must not contain a recursive reference
5822 * to itself, either directly or indirectly.
5823 * Done somewhere else
5824 */
5825
5826 } else {
5827 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5829 ctxt->sax->error(ctxt->userData,
5830 "xmlParseEntityRef: expecting ';'\n");
5831 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005832 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005833 }
5834 xmlFree(name);
5835 }
5836 }
5837 return(ent);
5838}
5839
5840/**
5841 * xmlParseStringEntityRef:
5842 * @ctxt: an XML parser context
5843 * @str: a pointer to an index in the string
5844 *
5845 * parse ENTITY references declarations, but this version parses it from
5846 * a string value.
5847 *
5848 * [68] EntityRef ::= '&' Name ';'
5849 *
5850 * [ WFC: Entity Declared ]
5851 * In a document without any DTD, a document with only an internal DTD
5852 * subset which contains no parameter entity references, or a document
5853 * with "standalone='yes'", the Name given in the entity reference
5854 * must match that in an entity declaration, except that well-formed
5855 * documents need not declare any of the following entities: amp, lt,
5856 * gt, apos, quot. The declaration of a parameter entity must precede
5857 * any reference to it. Similarly, the declaration of a general entity
5858 * must precede any reference to it which appears in a default value in an
5859 * attribute-list declaration. Note that if entities are declared in the
5860 * external subset or in external parameter entities, a non-validating
5861 * processor is not obligated to read and process their declarations;
5862 * for such documents, the rule that an entity must be declared is a
5863 * well-formedness constraint only if standalone='yes'.
5864 *
5865 * [ WFC: Parsed Entity ]
5866 * An entity reference must not contain the name of an unparsed entity
5867 *
5868 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5869 * is updated to the current location in the string.
5870 */
5871xmlEntityPtr
5872xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5873 xmlChar *name;
5874 const xmlChar *ptr;
5875 xmlChar cur;
5876 xmlEntityPtr ent = NULL;
5877
5878 if ((str == NULL) || (*str == NULL))
5879 return(NULL);
5880 ptr = *str;
5881 cur = *ptr;
5882 if (cur == '&') {
5883 ptr++;
5884 cur = *ptr;
5885 name = xmlParseStringName(ctxt, &ptr);
5886 if (name == NULL) {
5887 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5888 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5889 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005890 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005891 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005892 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005893 } else {
5894 if (*ptr == ';') {
5895 ptr++;
5896 /*
5897 * Ask first SAX for entity resolution, otherwise try the
5898 * predefined set.
5899 */
5900 if (ctxt->sax != NULL) {
5901 if (ctxt->sax->getEntity != NULL)
5902 ent = ctxt->sax->getEntity(ctxt->userData, name);
5903 if (ent == NULL)
5904 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005905 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5906 ent = getEntity(ctxt, name);
5907 }
Owen Taylor3473f882001-02-23 17:55:21 +00005908 }
5909 /*
5910 * [ WFC: Entity Declared ]
5911 * In a document without any DTD, a document with only an
5912 * internal DTD subset which contains no parameter entity
5913 * references, or a document with "standalone='yes'", the
5914 * Name given in the entity reference must match that in an
5915 * entity declaration, except that well-formed documents
5916 * need not declare any of the following entities: amp, lt,
5917 * gt, apos, quot.
5918 * The declaration of a parameter entity must precede any
5919 * reference to it.
5920 * Similarly, the declaration of a general entity must
5921 * precede any reference to it which appears in a default
5922 * value in an attribute-list declaration. Note that if
5923 * entities are declared in the external subset or in
5924 * external parameter entities, a non-validating processor
5925 * is not obligated to read and process their declarations;
5926 * for such documents, the rule that an entity must be
5927 * declared is a well-formedness constraint only if
5928 * standalone='yes'.
5929 */
5930 if (ent == NULL) {
5931 if ((ctxt->standalone == 1) ||
5932 ((ctxt->hasExternalSubset == 0) &&
5933 (ctxt->hasPErefs == 0))) {
5934 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5936 ctxt->sax->error(ctxt->userData,
5937 "Entity '%s' not defined\n", name);
5938 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005939 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005940 } else {
5941 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5942 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5943 ctxt->sax->warning(ctxt->userData,
5944 "Entity '%s' not defined\n", name);
5945 }
5946 }
5947
5948 /*
5949 * [ WFC: Parsed Entity ]
5950 * An entity reference must not contain the name of an
5951 * unparsed entity
5952 */
5953 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5954 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5955 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5956 ctxt->sax->error(ctxt->userData,
5957 "Entity reference to unparsed entity %s\n", name);
5958 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005959 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005960 }
5961
5962 /*
5963 * [ WFC: No External Entity References ]
5964 * Attribute values cannot contain direct or indirect
5965 * entity references to external entities.
5966 */
5967 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5968 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5969 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5970 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5971 ctxt->sax->error(ctxt->userData,
5972 "Attribute references external entity '%s'\n", name);
5973 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005974 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005975 }
5976 /*
5977 * [ WFC: No < in Attribute Values ]
5978 * The replacement text of any entity referred to directly or
5979 * indirectly in an attribute value (other than "&lt;") must
5980 * not contain a <.
5981 */
5982 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5983 (ent != NULL) &&
5984 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5985 (ent->content != NULL) &&
5986 (xmlStrchr(ent->content, '<'))) {
5987 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5988 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5989 ctxt->sax->error(ctxt->userData,
5990 "'<' in entity '%s' is not allowed in attributes values\n", name);
5991 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005992 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005993 }
5994
5995 /*
5996 * Internal check, no parameter entities here ...
5997 */
5998 else {
5999 switch (ent->etype) {
6000 case XML_INTERNAL_PARAMETER_ENTITY:
6001 case XML_EXTERNAL_PARAMETER_ENTITY:
6002 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6004 ctxt->sax->error(ctxt->userData,
6005 "Attempt to reference the parameter entity '%s'\n", name);
6006 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006007 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006008 break;
6009 default:
6010 break;
6011 }
6012 }
6013
6014 /*
6015 * [ WFC: No Recursion ]
6016 * A parsed entity must not contain a recursive reference
6017 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006018 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006019 */
6020
6021 } else {
6022 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6024 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006025 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006026 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006027 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006028 }
6029 xmlFree(name);
6030 }
6031 }
6032 *str = ptr;
6033 return(ent);
6034}
6035
6036/**
6037 * xmlParsePEReference:
6038 * @ctxt: an XML parser context
6039 *
6040 * parse PEReference declarations
6041 * The entity content is handled directly by pushing it's content as
6042 * a new input stream.
6043 *
6044 * [69] PEReference ::= '%' Name ';'
6045 *
6046 * [ WFC: No Recursion ]
6047 * A parsed entity must not contain a recursive
6048 * reference to itself, either directly or indirectly.
6049 *
6050 * [ WFC: Entity Declared ]
6051 * In a document without any DTD, a document with only an internal DTD
6052 * subset which contains no parameter entity references, or a document
6053 * with "standalone='yes'", ... ... The declaration of a parameter
6054 * entity must precede any reference to it...
6055 *
6056 * [ VC: Entity Declared ]
6057 * In a document with an external subset or external parameter entities
6058 * with "standalone='no'", ... ... The declaration of a parameter entity
6059 * must precede any reference to it...
6060 *
6061 * [ WFC: In DTD ]
6062 * Parameter-entity references may only appear in the DTD.
6063 * NOTE: misleading but this is handled.
6064 */
6065void
6066xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6067 xmlChar *name;
6068 xmlEntityPtr entity = NULL;
6069 xmlParserInputPtr input;
6070
6071 if (RAW == '%') {
6072 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006073 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006074 if (name == NULL) {
6075 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6076 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6077 ctxt->sax->error(ctxt->userData,
6078 "xmlParsePEReference: no name\n");
6079 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006080 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006081 } else {
6082 if (RAW == ';') {
6083 NEXT;
6084 if ((ctxt->sax != NULL) &&
6085 (ctxt->sax->getParameterEntity != NULL))
6086 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6087 name);
6088 if (entity == NULL) {
6089 /*
6090 * [ WFC: Entity Declared ]
6091 * In a document without any DTD, a document with only an
6092 * internal DTD subset which contains no parameter entity
6093 * references, or a document with "standalone='yes'", ...
6094 * ... The declaration of a parameter entity must precede
6095 * any reference to it...
6096 */
6097 if ((ctxt->standalone == 1) ||
6098 ((ctxt->hasExternalSubset == 0) &&
6099 (ctxt->hasPErefs == 0))) {
6100 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6101 if ((!ctxt->disableSAX) &&
6102 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6103 ctxt->sax->error(ctxt->userData,
6104 "PEReference: %%%s; not found\n", name);
6105 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006106 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006107 } else {
6108 /*
6109 * [ VC: Entity Declared ]
6110 * In a document with an external subset or external
6111 * parameter entities with "standalone='no'", ...
6112 * ... The declaration of a parameter entity must precede
6113 * any reference to it...
6114 */
6115 if ((!ctxt->disableSAX) &&
6116 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6117 ctxt->sax->warning(ctxt->userData,
6118 "PEReference: %%%s; not found\n", name);
6119 ctxt->valid = 0;
6120 }
6121 } else {
6122 /*
6123 * Internal checking in case the entity quest barfed
6124 */
6125 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6126 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6127 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6128 ctxt->sax->warning(ctxt->userData,
6129 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006130 } else if (ctxt->input->free != deallocblankswrapper) {
6131 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6132 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006133 } else {
6134 /*
6135 * TODO !!!
6136 * handle the extra spaces added before and after
6137 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6138 */
6139 input = xmlNewEntityInputStream(ctxt, entity);
6140 xmlPushInput(ctxt, input);
6141 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6142 (RAW == '<') && (NXT(1) == '?') &&
6143 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6144 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6145 xmlParseTextDecl(ctxt);
6146 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6147 /*
6148 * The XML REC instructs us to stop parsing
6149 * right here
6150 */
6151 ctxt->instate = XML_PARSER_EOF;
6152 xmlFree(name);
6153 return;
6154 }
6155 }
Owen Taylor3473f882001-02-23 17:55:21 +00006156 }
6157 }
6158 ctxt->hasPErefs = 1;
6159 } else {
6160 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6161 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6162 ctxt->sax->error(ctxt->userData,
6163 "xmlParsePEReference: expecting ';'\n");
6164 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006165 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006166 }
6167 xmlFree(name);
6168 }
6169 }
6170}
6171
6172/**
6173 * xmlParseStringPEReference:
6174 * @ctxt: an XML parser context
6175 * @str: a pointer to an index in the string
6176 *
6177 * parse PEReference declarations
6178 *
6179 * [69] PEReference ::= '%' Name ';'
6180 *
6181 * [ WFC: No Recursion ]
6182 * A parsed entity must not contain a recursive
6183 * reference to itself, either directly or indirectly.
6184 *
6185 * [ WFC: Entity Declared ]
6186 * In a document without any DTD, a document with only an internal DTD
6187 * subset which contains no parameter entity references, or a document
6188 * with "standalone='yes'", ... ... The declaration of a parameter
6189 * entity must precede any reference to it...
6190 *
6191 * [ VC: Entity Declared ]
6192 * In a document with an external subset or external parameter entities
6193 * with "standalone='no'", ... ... The declaration of a parameter entity
6194 * must precede any reference to it...
6195 *
6196 * [ WFC: In DTD ]
6197 * Parameter-entity references may only appear in the DTD.
6198 * NOTE: misleading but this is handled.
6199 *
6200 * Returns the string of the entity content.
6201 * str is updated to the current value of the index
6202 */
6203xmlEntityPtr
6204xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6205 const xmlChar *ptr;
6206 xmlChar cur;
6207 xmlChar *name;
6208 xmlEntityPtr entity = NULL;
6209
6210 if ((str == NULL) || (*str == NULL)) return(NULL);
6211 ptr = *str;
6212 cur = *ptr;
6213 if (cur == '%') {
6214 ptr++;
6215 cur = *ptr;
6216 name = xmlParseStringName(ctxt, &ptr);
6217 if (name == NULL) {
6218 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6220 ctxt->sax->error(ctxt->userData,
6221 "xmlParseStringPEReference: no name\n");
6222 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006223 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006224 } else {
6225 cur = *ptr;
6226 if (cur == ';') {
6227 ptr++;
6228 cur = *ptr;
6229 if ((ctxt->sax != NULL) &&
6230 (ctxt->sax->getParameterEntity != NULL))
6231 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6232 name);
6233 if (entity == NULL) {
6234 /*
6235 * [ WFC: Entity Declared ]
6236 * In a document without any DTD, a document with only an
6237 * internal DTD subset which contains no parameter entity
6238 * references, or a document with "standalone='yes'", ...
6239 * ... The declaration of a parameter entity must precede
6240 * any reference to it...
6241 */
6242 if ((ctxt->standalone == 1) ||
6243 ((ctxt->hasExternalSubset == 0) &&
6244 (ctxt->hasPErefs == 0))) {
6245 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6247 ctxt->sax->error(ctxt->userData,
6248 "PEReference: %%%s; not found\n", name);
6249 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006250 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006251 } else {
6252 /*
6253 * [ VC: Entity Declared ]
6254 * In a document with an external subset or external
6255 * parameter entities with "standalone='no'", ...
6256 * ... The declaration of a parameter entity must
6257 * precede any reference to it...
6258 */
6259 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6260 ctxt->sax->warning(ctxt->userData,
6261 "PEReference: %%%s; not found\n", name);
6262 ctxt->valid = 0;
6263 }
6264 } else {
6265 /*
6266 * Internal checking in case the entity quest barfed
6267 */
6268 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6269 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6270 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6271 ctxt->sax->warning(ctxt->userData,
6272 "Internal: %%%s; is not a parameter entity\n", name);
6273 }
6274 }
6275 ctxt->hasPErefs = 1;
6276 } else {
6277 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6279 ctxt->sax->error(ctxt->userData,
6280 "xmlParseStringPEReference: expecting ';'\n");
6281 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006282 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006283 }
6284 xmlFree(name);
6285 }
6286 }
6287 *str = ptr;
6288 return(entity);
6289}
6290
6291/**
6292 * xmlParseDocTypeDecl:
6293 * @ctxt: an XML parser context
6294 *
6295 * parse a DOCTYPE declaration
6296 *
6297 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6298 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6299 *
6300 * [ VC: Root Element Type ]
6301 * The Name in the document type declaration must match the element
6302 * type of the root element.
6303 */
6304
6305void
6306xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6307 xmlChar *name = NULL;
6308 xmlChar *ExternalID = NULL;
6309 xmlChar *URI = NULL;
6310
6311 /*
6312 * We know that '<!DOCTYPE' has been detected.
6313 */
6314 SKIP(9);
6315
6316 SKIP_BLANKS;
6317
6318 /*
6319 * Parse the DOCTYPE name.
6320 */
6321 name = xmlParseName(ctxt);
6322 if (name == NULL) {
6323 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6325 ctxt->sax->error(ctxt->userData,
6326 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6327 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006328 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006329 }
6330 ctxt->intSubName = name;
6331
6332 SKIP_BLANKS;
6333
6334 /*
6335 * Check for SystemID and ExternalID
6336 */
6337 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6338
6339 if ((URI != NULL) || (ExternalID != NULL)) {
6340 ctxt->hasExternalSubset = 1;
6341 }
6342 ctxt->extSubURI = URI;
6343 ctxt->extSubSystem = ExternalID;
6344
6345 SKIP_BLANKS;
6346
6347 /*
6348 * Create and update the internal subset.
6349 */
6350 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6351 (!ctxt->disableSAX))
6352 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6353
6354 /*
6355 * Is there any internal subset declarations ?
6356 * they are handled separately in xmlParseInternalSubset()
6357 */
6358 if (RAW == '[')
6359 return;
6360
6361 /*
6362 * We should be at the end of the DOCTYPE declaration.
6363 */
6364 if (RAW != '>') {
6365 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006367 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006368 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006369 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006370 }
6371 NEXT;
6372}
6373
6374/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006375 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006376 * @ctxt: an XML parser context
6377 *
6378 * parse the internal subset declaration
6379 *
6380 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6381 */
6382
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006383static void
Owen Taylor3473f882001-02-23 17:55:21 +00006384xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6385 /*
6386 * Is there any DTD definition ?
6387 */
6388 if (RAW == '[') {
6389 ctxt->instate = XML_PARSER_DTD;
6390 NEXT;
6391 /*
6392 * Parse the succession of Markup declarations and
6393 * PEReferences.
6394 * Subsequence (markupdecl | PEReference | S)*
6395 */
6396 while (RAW != ']') {
6397 const xmlChar *check = CUR_PTR;
6398 int cons = ctxt->input->consumed;
6399
6400 SKIP_BLANKS;
6401 xmlParseMarkupDecl(ctxt);
6402 xmlParsePEReference(ctxt);
6403
6404 /*
6405 * Pop-up of finished entities.
6406 */
6407 while ((RAW == 0) && (ctxt->inputNr > 1))
6408 xmlPopInput(ctxt);
6409
6410 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6411 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6413 ctxt->sax->error(ctxt->userData,
6414 "xmlParseInternalSubset: error detected in Markup declaration\n");
6415 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006416 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006417 break;
6418 }
6419 }
6420 if (RAW == ']') {
6421 NEXT;
6422 SKIP_BLANKS;
6423 }
6424 }
6425
6426 /*
6427 * We should be at the end of the DOCTYPE declaration.
6428 */
6429 if (RAW != '>') {
6430 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6431 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006432 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006433 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006434 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006435 }
6436 NEXT;
6437}
6438
6439/**
6440 * xmlParseAttribute:
6441 * @ctxt: an XML parser context
6442 * @value: a xmlChar ** used to store the value of the attribute
6443 *
6444 * parse an attribute
6445 *
6446 * [41] Attribute ::= Name Eq AttValue
6447 *
6448 * [ WFC: No External Entity References ]
6449 * Attribute values cannot contain direct or indirect entity references
6450 * to external entities.
6451 *
6452 * [ WFC: No < in Attribute Values ]
6453 * The replacement text of any entity referred to directly or indirectly in
6454 * an attribute value (other than "&lt;") must not contain a <.
6455 *
6456 * [ VC: Attribute Value Type ]
6457 * The attribute must have been declared; the value must be of the type
6458 * declared for it.
6459 *
6460 * [25] Eq ::= S? '=' S?
6461 *
6462 * With namespace:
6463 *
6464 * [NS 11] Attribute ::= QName Eq AttValue
6465 *
6466 * Also the case QName == xmlns:??? is handled independently as a namespace
6467 * definition.
6468 *
6469 * Returns the attribute name, and the value in *value.
6470 */
6471
6472xmlChar *
6473xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6474 xmlChar *name, *val;
6475
6476 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006477 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006478 name = xmlParseName(ctxt);
6479 if (name == NULL) {
6480 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6481 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6482 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6483 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006484 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006485 return(NULL);
6486 }
6487
6488 /*
6489 * read the value
6490 */
6491 SKIP_BLANKS;
6492 if (RAW == '=') {
6493 NEXT;
6494 SKIP_BLANKS;
6495 val = xmlParseAttValue(ctxt);
6496 ctxt->instate = XML_PARSER_CONTENT;
6497 } else {
6498 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6500 ctxt->sax->error(ctxt->userData,
6501 "Specification mandate value for attribute %s\n", name);
6502 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006503 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006504 xmlFree(name);
6505 return(NULL);
6506 }
6507
6508 /*
6509 * Check that xml:lang conforms to the specification
6510 * No more registered as an error, just generate a warning now
6511 * since this was deprecated in XML second edition
6512 */
6513 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6514 if (!xmlCheckLanguageID(val)) {
6515 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6516 ctxt->sax->warning(ctxt->userData,
6517 "Malformed value for xml:lang : %s\n", val);
6518 }
6519 }
6520
6521 /*
6522 * Check that xml:space conforms to the specification
6523 */
6524 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6525 if (xmlStrEqual(val, BAD_CAST "default"))
6526 *(ctxt->space) = 0;
6527 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6528 *(ctxt->space) = 1;
6529 else {
6530 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6532 ctxt->sax->error(ctxt->userData,
6533"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6534 val);
6535 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006536 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006537 }
6538 }
6539
6540 *value = val;
6541 return(name);
6542}
6543
6544/**
6545 * xmlParseStartTag:
6546 * @ctxt: an XML parser context
6547 *
6548 * parse a start of tag either for rule element or
6549 * EmptyElement. In both case we don't parse the tag closing chars.
6550 *
6551 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6552 *
6553 * [ WFC: Unique Att Spec ]
6554 * No attribute name may appear more than once in the same start-tag or
6555 * empty-element tag.
6556 *
6557 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6558 *
6559 * [ WFC: Unique Att Spec ]
6560 * No attribute name may appear more than once in the same start-tag or
6561 * empty-element tag.
6562 *
6563 * With namespace:
6564 *
6565 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6566 *
6567 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6568 *
6569 * Returns the element name parsed
6570 */
6571
6572xmlChar *
6573xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6574 xmlChar *name;
6575 xmlChar *attname;
6576 xmlChar *attvalue;
6577 const xmlChar **atts = NULL;
6578 int nbatts = 0;
6579 int maxatts = 0;
6580 int i;
6581
6582 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006583 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006584
6585 name = xmlParseName(ctxt);
6586 if (name == NULL) {
6587 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6589 ctxt->sax->error(ctxt->userData,
6590 "xmlParseStartTag: invalid element name\n");
6591 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006592 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006593 return(NULL);
6594 }
6595
6596 /*
6597 * Now parse the attributes, it ends up with the ending
6598 *
6599 * (S Attribute)* S?
6600 */
6601 SKIP_BLANKS;
6602 GROW;
6603
Daniel Veillard21a0f912001-02-25 19:54:14 +00006604 while ((RAW != '>') &&
6605 ((RAW != '/') || (NXT(1) != '>')) &&
6606 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006607 const xmlChar *q = CUR_PTR;
6608 int cons = ctxt->input->consumed;
6609
6610 attname = xmlParseAttribute(ctxt, &attvalue);
6611 if ((attname != NULL) && (attvalue != NULL)) {
6612 /*
6613 * [ WFC: Unique Att Spec ]
6614 * No attribute name may appear more than once in the same
6615 * start-tag or empty-element tag.
6616 */
6617 for (i = 0; i < nbatts;i += 2) {
6618 if (xmlStrEqual(atts[i], attname)) {
6619 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6621 ctxt->sax->error(ctxt->userData,
6622 "Attribute %s redefined\n",
6623 attname);
6624 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006625 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006626 xmlFree(attname);
6627 xmlFree(attvalue);
6628 goto failed;
6629 }
6630 }
6631
6632 /*
6633 * Add the pair to atts
6634 */
6635 if (atts == NULL) {
6636 maxatts = 10;
6637 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6638 if (atts == NULL) {
6639 xmlGenericError(xmlGenericErrorContext,
6640 "malloc of %ld byte failed\n",
6641 maxatts * (long)sizeof(xmlChar *));
6642 return(NULL);
6643 }
6644 } else if (nbatts + 4 > maxatts) {
6645 maxatts *= 2;
6646 atts = (const xmlChar **) xmlRealloc((void *) atts,
6647 maxatts * sizeof(xmlChar *));
6648 if (atts == NULL) {
6649 xmlGenericError(xmlGenericErrorContext,
6650 "realloc of %ld byte failed\n",
6651 maxatts * (long)sizeof(xmlChar *));
6652 return(NULL);
6653 }
6654 }
6655 atts[nbatts++] = attname;
6656 atts[nbatts++] = attvalue;
6657 atts[nbatts] = NULL;
6658 atts[nbatts + 1] = NULL;
6659 } else {
6660 if (attname != NULL)
6661 xmlFree(attname);
6662 if (attvalue != NULL)
6663 xmlFree(attvalue);
6664 }
6665
6666failed:
6667
6668 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6669 break;
6670 if (!IS_BLANK(RAW)) {
6671 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6673 ctxt->sax->error(ctxt->userData,
6674 "attributes construct error\n");
6675 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006676 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006677 }
6678 SKIP_BLANKS;
6679 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6680 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6681 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6682 ctxt->sax->error(ctxt->userData,
6683 "xmlParseStartTag: problem parsing attributes\n");
6684 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006685 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006686 break;
6687 }
6688 GROW;
6689 }
6690
6691 /*
6692 * SAX: Start of Element !
6693 */
6694 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6695 (!ctxt->disableSAX))
6696 ctxt->sax->startElement(ctxt->userData, name, atts);
6697
6698 if (atts != NULL) {
6699 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6700 xmlFree((void *) atts);
6701 }
6702 return(name);
6703}
6704
6705/**
6706 * xmlParseEndTag:
6707 * @ctxt: an XML parser context
6708 *
6709 * parse an end of tag
6710 *
6711 * [42] ETag ::= '</' Name S? '>'
6712 *
6713 * With namespace
6714 *
6715 * [NS 9] ETag ::= '</' QName S? '>'
6716 */
6717
6718void
6719xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6720 xmlChar *name;
6721 xmlChar *oldname;
6722
6723 GROW;
6724 if ((RAW != '<') || (NXT(1) != '/')) {
6725 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6727 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6728 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006729 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006730 return;
6731 }
6732 SKIP(2);
6733
Daniel Veillard46de64e2002-05-29 08:21:33 +00006734 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006735
6736 /*
6737 * We should definitely be at the ending "S? '>'" part
6738 */
6739 GROW;
6740 SKIP_BLANKS;
6741 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6742 ctxt->errNo = XML_ERR_GT_REQUIRED;
6743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6744 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6745 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006746 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006747 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006748 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006749
6750 /*
6751 * [ WFC: Element Type Match ]
6752 * The Name in an element's end-tag must match the element type in the
6753 * start-tag.
6754 *
6755 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006756 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006757 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006759 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006760 ctxt->sax->error(ctxt->userData,
6761 "Opening and ending tag mismatch: %s and %s\n",
6762 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006763 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006764 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006765 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006766 }
6767
6768 }
6769 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006770 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6771#if 0
6772 else {
6773 /*
6774 * Recover in case of one missing close
6775 */
6776 if ((ctxt->nameNr > 2) &&
6777 (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) {
6778 namePop(ctxt);
6779 spacePop(ctxt);
6780 }
6781 }
6782#endif
6783 if (name != NULL)
6784 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006785 }
6786
6787 /*
6788 * SAX: End of Tag
6789 */
6790 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6791 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006792 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006793
Owen Taylor3473f882001-02-23 17:55:21 +00006794 oldname = namePop(ctxt);
6795 spacePop(ctxt);
6796 if (oldname != NULL) {
6797#ifdef DEBUG_STACK
6798 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6799#endif
6800 xmlFree(oldname);
6801 }
6802 return;
6803}
6804
6805/**
6806 * xmlParseCDSect:
6807 * @ctxt: an XML parser context
6808 *
6809 * Parse escaped pure raw content.
6810 *
6811 * [18] CDSect ::= CDStart CData CDEnd
6812 *
6813 * [19] CDStart ::= '<![CDATA['
6814 *
6815 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6816 *
6817 * [21] CDEnd ::= ']]>'
6818 */
6819void
6820xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6821 xmlChar *buf = NULL;
6822 int len = 0;
6823 int size = XML_PARSER_BUFFER_SIZE;
6824 int r, rl;
6825 int s, sl;
6826 int cur, l;
6827 int count = 0;
6828
6829 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6830 (NXT(2) == '[') && (NXT(3) == 'C') &&
6831 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6832 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6833 (NXT(8) == '[')) {
6834 SKIP(9);
6835 } else
6836 return;
6837
6838 ctxt->instate = XML_PARSER_CDATA_SECTION;
6839 r = CUR_CHAR(rl);
6840 if (!IS_CHAR(r)) {
6841 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6843 ctxt->sax->error(ctxt->userData,
6844 "CData section not finished\n");
6845 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006846 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006847 ctxt->instate = XML_PARSER_CONTENT;
6848 return;
6849 }
6850 NEXTL(rl);
6851 s = CUR_CHAR(sl);
6852 if (!IS_CHAR(s)) {
6853 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6855 ctxt->sax->error(ctxt->userData,
6856 "CData section not finished\n");
6857 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006858 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006859 ctxt->instate = XML_PARSER_CONTENT;
6860 return;
6861 }
6862 NEXTL(sl);
6863 cur = CUR_CHAR(l);
6864 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6865 if (buf == NULL) {
6866 xmlGenericError(xmlGenericErrorContext,
6867 "malloc of %d byte failed\n", size);
6868 return;
6869 }
6870 while (IS_CHAR(cur) &&
6871 ((r != ']') || (s != ']') || (cur != '>'))) {
6872 if (len + 5 >= size) {
6873 size *= 2;
6874 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6875 if (buf == NULL) {
6876 xmlGenericError(xmlGenericErrorContext,
6877 "realloc of %d byte failed\n", size);
6878 return;
6879 }
6880 }
6881 COPY_BUF(rl,buf,len,r);
6882 r = s;
6883 rl = sl;
6884 s = cur;
6885 sl = l;
6886 count++;
6887 if (count > 50) {
6888 GROW;
6889 count = 0;
6890 }
6891 NEXTL(l);
6892 cur = CUR_CHAR(l);
6893 }
6894 buf[len] = 0;
6895 ctxt->instate = XML_PARSER_CONTENT;
6896 if (cur != '>') {
6897 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6899 ctxt->sax->error(ctxt->userData,
6900 "CData section not finished\n%.50s\n", buf);
6901 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006902 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006903 xmlFree(buf);
6904 return;
6905 }
6906 NEXTL(l);
6907
6908 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006909 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006910 */
6911 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6912 if (ctxt->sax->cdataBlock != NULL)
6913 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006914 else if (ctxt->sax->characters != NULL)
6915 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006916 }
6917 xmlFree(buf);
6918}
6919
6920/**
6921 * xmlParseContent:
6922 * @ctxt: an XML parser context
6923 *
6924 * Parse a content:
6925 *
6926 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6927 */
6928
6929void
6930xmlParseContent(xmlParserCtxtPtr ctxt) {
6931 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006932 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006933 ((RAW != '<') || (NXT(1) != '/'))) {
6934 const xmlChar *test = CUR_PTR;
6935 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006936 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006937
6938 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006939 * First case : a Processing Instruction.
6940 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006941 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006942 xmlParsePI(ctxt);
6943 }
6944
6945 /*
6946 * Second case : a CDSection
6947 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006948 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006949 (NXT(2) == '[') && (NXT(3) == 'C') &&
6950 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6951 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6952 (NXT(8) == '[')) {
6953 xmlParseCDSect(ctxt);
6954 }
6955
6956 /*
6957 * Third case : a comment
6958 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006959 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006960 (NXT(2) == '-') && (NXT(3) == '-')) {
6961 xmlParseComment(ctxt);
6962 ctxt->instate = XML_PARSER_CONTENT;
6963 }
6964
6965 /*
6966 * Fourth case : a sub-element.
6967 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006968 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006969 xmlParseElement(ctxt);
6970 }
6971
6972 /*
6973 * Fifth case : a reference. If if has not been resolved,
6974 * parsing returns it's Name, create the node
6975 */
6976
Daniel Veillard21a0f912001-02-25 19:54:14 +00006977 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006978 xmlParseReference(ctxt);
6979 }
6980
6981 /*
6982 * Last case, text. Note that References are handled directly.
6983 */
6984 else {
6985 xmlParseCharData(ctxt, 0);
6986 }
6987
6988 GROW;
6989 /*
6990 * Pop-up of finished entities.
6991 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00006992 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00006993 xmlPopInput(ctxt);
6994 SHRINK;
6995
Daniel Veillardfdc91562002-07-01 21:52:03 +00006996 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006997 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6999 ctxt->sax->error(ctxt->userData,
7000 "detected an error in element content\n");
7001 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007002 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007003 ctxt->instate = XML_PARSER_EOF;
7004 break;
7005 }
7006 }
7007}
7008
7009/**
7010 * xmlParseElement:
7011 * @ctxt: an XML parser context
7012 *
7013 * parse an XML element, this is highly recursive
7014 *
7015 * [39] element ::= EmptyElemTag | STag content ETag
7016 *
7017 * [ WFC: Element Type Match ]
7018 * The Name in an element's end-tag must match the element type in the
7019 * start-tag.
7020 *
7021 * [ VC: Element Valid ]
7022 * An element is valid if there is a declaration matching elementdecl
7023 * where the Name matches the element type and one of the following holds:
7024 * - The declaration matches EMPTY and the element has no content.
7025 * - The declaration matches children and the sequence of child elements
7026 * belongs to the language generated by the regular expression in the
7027 * content model, with optional white space (characters matching the
7028 * nonterminal S) between each pair of child elements.
7029 * - The declaration matches Mixed and the content consists of character
7030 * data and child elements whose types match names in the content model.
7031 * - The declaration matches ANY, and the types of any child elements have
7032 * been declared.
7033 */
7034
7035void
7036xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007037 xmlChar *name;
7038 xmlChar *oldname;
7039 xmlParserNodeInfo node_info;
7040 xmlNodePtr ret;
7041
7042 /* Capture start position */
7043 if (ctxt->record_info) {
7044 node_info.begin_pos = ctxt->input->consumed +
7045 (CUR_PTR - ctxt->input->base);
7046 node_info.begin_line = ctxt->input->line;
7047 }
7048
7049 if (ctxt->spaceNr == 0)
7050 spacePush(ctxt, -1);
7051 else
7052 spacePush(ctxt, *ctxt->space);
7053
7054 name = xmlParseStartTag(ctxt);
7055 if (name == NULL) {
7056 spacePop(ctxt);
7057 return;
7058 }
7059 namePush(ctxt, name);
7060 ret = ctxt->node;
7061
7062 /*
7063 * [ VC: Root Element Type ]
7064 * The Name in the document type declaration must match the element
7065 * type of the root element.
7066 */
7067 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7068 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7069 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7070
7071 /*
7072 * Check for an Empty Element.
7073 */
7074 if ((RAW == '/') && (NXT(1) == '>')) {
7075 SKIP(2);
7076 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7077 (!ctxt->disableSAX))
7078 ctxt->sax->endElement(ctxt->userData, name);
7079 oldname = namePop(ctxt);
7080 spacePop(ctxt);
7081 if (oldname != NULL) {
7082#ifdef DEBUG_STACK
7083 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7084#endif
7085 xmlFree(oldname);
7086 }
7087 if ( ret != NULL && ctxt->record_info ) {
7088 node_info.end_pos = ctxt->input->consumed +
7089 (CUR_PTR - ctxt->input->base);
7090 node_info.end_line = ctxt->input->line;
7091 node_info.node = ret;
7092 xmlParserAddNodeInfo(ctxt, &node_info);
7093 }
7094 return;
7095 }
7096 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007097 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007098 } else {
7099 ctxt->errNo = XML_ERR_GT_REQUIRED;
7100 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7101 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007102 "Couldn't find end of Start Tag %s\n",
7103 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007104 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007105 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007106
7107 /*
7108 * end of parsing of this node.
7109 */
7110 nodePop(ctxt);
7111 oldname = namePop(ctxt);
7112 spacePop(ctxt);
7113 if (oldname != NULL) {
7114#ifdef DEBUG_STACK
7115 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7116#endif
7117 xmlFree(oldname);
7118 }
7119
7120 /*
7121 * Capture end position and add node
7122 */
7123 if ( ret != NULL && ctxt->record_info ) {
7124 node_info.end_pos = ctxt->input->consumed +
7125 (CUR_PTR - ctxt->input->base);
7126 node_info.end_line = ctxt->input->line;
7127 node_info.node = ret;
7128 xmlParserAddNodeInfo(ctxt, &node_info);
7129 }
7130 return;
7131 }
7132
7133 /*
7134 * Parse the content of the element:
7135 */
7136 xmlParseContent(ctxt);
7137 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007138 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007139 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7140 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007141 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007142 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007143 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007144
7145 /*
7146 * end of parsing of this node.
7147 */
7148 nodePop(ctxt);
7149 oldname = namePop(ctxt);
7150 spacePop(ctxt);
7151 if (oldname != NULL) {
7152#ifdef DEBUG_STACK
7153 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7154#endif
7155 xmlFree(oldname);
7156 }
7157 return;
7158 }
7159
7160 /*
7161 * parse the end of tag: '</' should be here.
7162 */
7163 xmlParseEndTag(ctxt);
7164
7165 /*
7166 * Capture end position and add node
7167 */
7168 if ( ret != NULL && ctxt->record_info ) {
7169 node_info.end_pos = ctxt->input->consumed +
7170 (CUR_PTR - ctxt->input->base);
7171 node_info.end_line = ctxt->input->line;
7172 node_info.node = ret;
7173 xmlParserAddNodeInfo(ctxt, &node_info);
7174 }
7175}
7176
7177/**
7178 * xmlParseVersionNum:
7179 * @ctxt: an XML parser context
7180 *
7181 * parse the XML version value.
7182 *
7183 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7184 *
7185 * Returns the string giving the XML version number, or NULL
7186 */
7187xmlChar *
7188xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7189 xmlChar *buf = NULL;
7190 int len = 0;
7191 int size = 10;
7192 xmlChar cur;
7193
7194 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7195 if (buf == NULL) {
7196 xmlGenericError(xmlGenericErrorContext,
7197 "malloc of %d byte failed\n", size);
7198 return(NULL);
7199 }
7200 cur = CUR;
7201 while (((cur >= 'a') && (cur <= 'z')) ||
7202 ((cur >= 'A') && (cur <= 'Z')) ||
7203 ((cur >= '0') && (cur <= '9')) ||
7204 (cur == '_') || (cur == '.') ||
7205 (cur == ':') || (cur == '-')) {
7206 if (len + 1 >= size) {
7207 size *= 2;
7208 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7209 if (buf == NULL) {
7210 xmlGenericError(xmlGenericErrorContext,
7211 "realloc of %d byte failed\n", size);
7212 return(NULL);
7213 }
7214 }
7215 buf[len++] = cur;
7216 NEXT;
7217 cur=CUR;
7218 }
7219 buf[len] = 0;
7220 return(buf);
7221}
7222
7223/**
7224 * xmlParseVersionInfo:
7225 * @ctxt: an XML parser context
7226 *
7227 * parse the XML version.
7228 *
7229 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7230 *
7231 * [25] Eq ::= S? '=' S?
7232 *
7233 * Returns the version string, e.g. "1.0"
7234 */
7235
7236xmlChar *
7237xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7238 xmlChar *version = NULL;
7239 const xmlChar *q;
7240
7241 if ((RAW == 'v') && (NXT(1) == 'e') &&
7242 (NXT(2) == 'r') && (NXT(3) == 's') &&
7243 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7244 (NXT(6) == 'n')) {
7245 SKIP(7);
7246 SKIP_BLANKS;
7247 if (RAW != '=') {
7248 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7250 ctxt->sax->error(ctxt->userData,
7251 "xmlParseVersionInfo : expected '='\n");
7252 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007253 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007254 return(NULL);
7255 }
7256 NEXT;
7257 SKIP_BLANKS;
7258 if (RAW == '"') {
7259 NEXT;
7260 q = CUR_PTR;
7261 version = xmlParseVersionNum(ctxt);
7262 if (RAW != '"') {
7263 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7264 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7265 ctxt->sax->error(ctxt->userData,
7266 "String not closed\n%.50s\n", q);
7267 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007268 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007269 } else
7270 NEXT;
7271 } else if (RAW == '\''){
7272 NEXT;
7273 q = CUR_PTR;
7274 version = xmlParseVersionNum(ctxt);
7275 if (RAW != '\'') {
7276 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7277 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7278 ctxt->sax->error(ctxt->userData,
7279 "String not closed\n%.50s\n", q);
7280 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007281 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007282 } else
7283 NEXT;
7284 } else {
7285 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7287 ctxt->sax->error(ctxt->userData,
7288 "xmlParseVersionInfo : expected ' or \"\n");
7289 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007290 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007291 }
7292 }
7293 return(version);
7294}
7295
7296/**
7297 * xmlParseEncName:
7298 * @ctxt: an XML parser context
7299 *
7300 * parse the XML encoding name
7301 *
7302 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7303 *
7304 * Returns the encoding name value or NULL
7305 */
7306xmlChar *
7307xmlParseEncName(xmlParserCtxtPtr ctxt) {
7308 xmlChar *buf = NULL;
7309 int len = 0;
7310 int size = 10;
7311 xmlChar cur;
7312
7313 cur = CUR;
7314 if (((cur >= 'a') && (cur <= 'z')) ||
7315 ((cur >= 'A') && (cur <= 'Z'))) {
7316 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7317 if (buf == NULL) {
7318 xmlGenericError(xmlGenericErrorContext,
7319 "malloc of %d byte failed\n", size);
7320 return(NULL);
7321 }
7322
7323 buf[len++] = cur;
7324 NEXT;
7325 cur = CUR;
7326 while (((cur >= 'a') && (cur <= 'z')) ||
7327 ((cur >= 'A') && (cur <= 'Z')) ||
7328 ((cur >= '0') && (cur <= '9')) ||
7329 (cur == '.') || (cur == '_') ||
7330 (cur == '-')) {
7331 if (len + 1 >= size) {
7332 size *= 2;
7333 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7334 if (buf == NULL) {
7335 xmlGenericError(xmlGenericErrorContext,
7336 "realloc of %d byte failed\n", size);
7337 return(NULL);
7338 }
7339 }
7340 buf[len++] = cur;
7341 NEXT;
7342 cur = CUR;
7343 if (cur == 0) {
7344 SHRINK;
7345 GROW;
7346 cur = CUR;
7347 }
7348 }
7349 buf[len] = 0;
7350 } else {
7351 ctxt->errNo = XML_ERR_ENCODING_NAME;
7352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7353 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7354 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007355 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007356 }
7357 return(buf);
7358}
7359
7360/**
7361 * xmlParseEncodingDecl:
7362 * @ctxt: an XML parser context
7363 *
7364 * parse the XML encoding declaration
7365 *
7366 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7367 *
7368 * this setups the conversion filters.
7369 *
7370 * Returns the encoding value or NULL
7371 */
7372
7373xmlChar *
7374xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7375 xmlChar *encoding = NULL;
7376 const xmlChar *q;
7377
7378 SKIP_BLANKS;
7379 if ((RAW == 'e') && (NXT(1) == 'n') &&
7380 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7381 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7382 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7383 SKIP(8);
7384 SKIP_BLANKS;
7385 if (RAW != '=') {
7386 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7388 ctxt->sax->error(ctxt->userData,
7389 "xmlParseEncodingDecl : expected '='\n");
7390 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007391 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007392 return(NULL);
7393 }
7394 NEXT;
7395 SKIP_BLANKS;
7396 if (RAW == '"') {
7397 NEXT;
7398 q = CUR_PTR;
7399 encoding = xmlParseEncName(ctxt);
7400 if (RAW != '"') {
7401 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7403 ctxt->sax->error(ctxt->userData,
7404 "String not closed\n%.50s\n", q);
7405 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007406 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007407 } else
7408 NEXT;
7409 } else if (RAW == '\''){
7410 NEXT;
7411 q = CUR_PTR;
7412 encoding = xmlParseEncName(ctxt);
7413 if (RAW != '\'') {
7414 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7416 ctxt->sax->error(ctxt->userData,
7417 "String not closed\n%.50s\n", q);
7418 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007419 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007420 } else
7421 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007422 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007423 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7424 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7425 ctxt->sax->error(ctxt->userData,
7426 "xmlParseEncodingDecl : expected ' or \"\n");
7427 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007428 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007429 }
7430 if (encoding != NULL) {
7431 xmlCharEncoding enc;
7432 xmlCharEncodingHandlerPtr handler;
7433
7434 if (ctxt->input->encoding != NULL)
7435 xmlFree((xmlChar *) ctxt->input->encoding);
7436 ctxt->input->encoding = encoding;
7437
7438 enc = xmlParseCharEncoding((const char *) encoding);
7439 /*
7440 * registered set of known encodings
7441 */
7442 if (enc != XML_CHAR_ENCODING_ERROR) {
7443 xmlSwitchEncoding(ctxt, enc);
7444 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007445 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007446 xmlFree(encoding);
7447 return(NULL);
7448 }
7449 } else {
7450 /*
7451 * fallback for unknown encodings
7452 */
7453 handler = xmlFindCharEncodingHandler((const char *) encoding);
7454 if (handler != NULL) {
7455 xmlSwitchToEncoding(ctxt, handler);
7456 } else {
7457 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7459 ctxt->sax->error(ctxt->userData,
7460 "Unsupported encoding %s\n", encoding);
7461 return(NULL);
7462 }
7463 }
7464 }
7465 }
7466 return(encoding);
7467}
7468
7469/**
7470 * xmlParseSDDecl:
7471 * @ctxt: an XML parser context
7472 *
7473 * parse the XML standalone declaration
7474 *
7475 * [32] SDDecl ::= S 'standalone' Eq
7476 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7477 *
7478 * [ VC: Standalone Document Declaration ]
7479 * TODO The standalone document declaration must have the value "no"
7480 * if any external markup declarations contain declarations of:
7481 * - attributes with default values, if elements to which these
7482 * attributes apply appear in the document without specifications
7483 * of values for these attributes, or
7484 * - entities (other than amp, lt, gt, apos, quot), if references
7485 * to those entities appear in the document, or
7486 * - attributes with values subject to normalization, where the
7487 * attribute appears in the document with a value which will change
7488 * as a result of normalization, or
7489 * - element types with element content, if white space occurs directly
7490 * within any instance of those types.
7491 *
7492 * Returns 1 if standalone, 0 otherwise
7493 */
7494
7495int
7496xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7497 int standalone = -1;
7498
7499 SKIP_BLANKS;
7500 if ((RAW == 's') && (NXT(1) == 't') &&
7501 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7502 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7503 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7504 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7505 SKIP(10);
7506 SKIP_BLANKS;
7507 if (RAW != '=') {
7508 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7510 ctxt->sax->error(ctxt->userData,
7511 "XML standalone declaration : expected '='\n");
7512 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007513 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007514 return(standalone);
7515 }
7516 NEXT;
7517 SKIP_BLANKS;
7518 if (RAW == '\''){
7519 NEXT;
7520 if ((RAW == 'n') && (NXT(1) == 'o')) {
7521 standalone = 0;
7522 SKIP(2);
7523 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7524 (NXT(2) == 's')) {
7525 standalone = 1;
7526 SKIP(3);
7527 } else {
7528 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7530 ctxt->sax->error(ctxt->userData,
7531 "standalone accepts only 'yes' or 'no'\n");
7532 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007533 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007534 }
7535 if (RAW != '\'') {
7536 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7538 ctxt->sax->error(ctxt->userData, "String not closed\n");
7539 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007540 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007541 } else
7542 NEXT;
7543 } else if (RAW == '"'){
7544 NEXT;
7545 if ((RAW == 'n') && (NXT(1) == 'o')) {
7546 standalone = 0;
7547 SKIP(2);
7548 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7549 (NXT(2) == 's')) {
7550 standalone = 1;
7551 SKIP(3);
7552 } else {
7553 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7555 ctxt->sax->error(ctxt->userData,
7556 "standalone accepts only 'yes' or 'no'\n");
7557 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007558 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007559 }
7560 if (RAW != '"') {
7561 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7562 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7563 ctxt->sax->error(ctxt->userData, "String not closed\n");
7564 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007565 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007566 } else
7567 NEXT;
7568 } else {
7569 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7571 ctxt->sax->error(ctxt->userData,
7572 "Standalone value not found\n");
7573 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007574 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007575 }
7576 }
7577 return(standalone);
7578}
7579
7580/**
7581 * xmlParseXMLDecl:
7582 * @ctxt: an XML parser context
7583 *
7584 * parse an XML declaration header
7585 *
7586 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7587 */
7588
7589void
7590xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7591 xmlChar *version;
7592
7593 /*
7594 * We know that '<?xml' is here.
7595 */
7596 SKIP(5);
7597
7598 if (!IS_BLANK(RAW)) {
7599 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7601 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7602 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007603 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007604 }
7605 SKIP_BLANKS;
7606
7607 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007608 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007609 */
7610 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007611 if (version == NULL) {
7612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7613 ctxt->sax->error(ctxt->userData,
7614 "Malformed declaration expecting version\n");
7615 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007616 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007617 } else {
7618 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7619 /*
7620 * TODO: Blueberry should be detected here
7621 */
7622 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7623 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7624 version);
7625 }
7626 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007627 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007628 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007629 }
Owen Taylor3473f882001-02-23 17:55:21 +00007630
7631 /*
7632 * We may have the encoding declaration
7633 */
7634 if (!IS_BLANK(RAW)) {
7635 if ((RAW == '?') && (NXT(1) == '>')) {
7636 SKIP(2);
7637 return;
7638 }
7639 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7641 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7642 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007643 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007644 }
7645 xmlParseEncodingDecl(ctxt);
7646 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7647 /*
7648 * The XML REC instructs us to stop parsing right here
7649 */
7650 return;
7651 }
7652
7653 /*
7654 * We may have the standalone status.
7655 */
7656 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7657 if ((RAW == '?') && (NXT(1) == '>')) {
7658 SKIP(2);
7659 return;
7660 }
7661 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7663 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7664 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007665 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007666 }
7667 SKIP_BLANKS;
7668 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7669
7670 SKIP_BLANKS;
7671 if ((RAW == '?') && (NXT(1) == '>')) {
7672 SKIP(2);
7673 } else if (RAW == '>') {
7674 /* Deprecated old WD ... */
7675 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7677 ctxt->sax->error(ctxt->userData,
7678 "XML declaration must end-up with '?>'\n");
7679 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007680 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007681 NEXT;
7682 } else {
7683 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7684 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7685 ctxt->sax->error(ctxt->userData,
7686 "parsing XML declaration: '?>' expected\n");
7687 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007688 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007689 MOVETO_ENDTAG(CUR_PTR);
7690 NEXT;
7691 }
7692}
7693
7694/**
7695 * xmlParseMisc:
7696 * @ctxt: an XML parser context
7697 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007698 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007699 *
7700 * [27] Misc ::= Comment | PI | S
7701 */
7702
7703void
7704xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007705 while (((RAW == '<') && (NXT(1) == '?')) ||
7706 ((RAW == '<') && (NXT(1) == '!') &&
7707 (NXT(2) == '-') && (NXT(3) == '-')) ||
7708 IS_BLANK(CUR)) {
7709 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007710 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007711 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007712 NEXT;
7713 } else
7714 xmlParseComment(ctxt);
7715 }
7716}
7717
7718/**
7719 * xmlParseDocument:
7720 * @ctxt: an XML parser context
7721 *
7722 * parse an XML document (and build a tree if using the standard SAX
7723 * interface).
7724 *
7725 * [1] document ::= prolog element Misc*
7726 *
7727 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7728 *
7729 * Returns 0, -1 in case of error. the parser context is augmented
7730 * as a result of the parsing.
7731 */
7732
7733int
7734xmlParseDocument(xmlParserCtxtPtr ctxt) {
7735 xmlChar start[4];
7736 xmlCharEncoding enc;
7737
7738 xmlInitParser();
7739
7740 GROW;
7741
7742 /*
7743 * SAX: beginning of the document processing.
7744 */
7745 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7746 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7747
Daniel Veillard50f34372001-08-03 12:06:36 +00007748 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007749 /*
7750 * Get the 4 first bytes and decode the charset
7751 * if enc != XML_CHAR_ENCODING_NONE
7752 * plug some encoding conversion routines.
7753 */
7754 start[0] = RAW;
7755 start[1] = NXT(1);
7756 start[2] = NXT(2);
7757 start[3] = NXT(3);
7758 enc = xmlDetectCharEncoding(start, 4);
7759 if (enc != XML_CHAR_ENCODING_NONE) {
7760 xmlSwitchEncoding(ctxt, enc);
7761 }
Owen Taylor3473f882001-02-23 17:55:21 +00007762 }
7763
7764
7765 if (CUR == 0) {
7766 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7767 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7768 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7769 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007770 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007771 }
7772
7773 /*
7774 * Check for the XMLDecl in the Prolog.
7775 */
7776 GROW;
7777 if ((RAW == '<') && (NXT(1) == '?') &&
7778 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7779 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7780
7781 /*
7782 * Note that we will switch encoding on the fly.
7783 */
7784 xmlParseXMLDecl(ctxt);
7785 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7786 /*
7787 * The XML REC instructs us to stop parsing right here
7788 */
7789 return(-1);
7790 }
7791 ctxt->standalone = ctxt->input->standalone;
7792 SKIP_BLANKS;
7793 } else {
7794 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7795 }
7796 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7797 ctxt->sax->startDocument(ctxt->userData);
7798
7799 /*
7800 * The Misc part of the Prolog
7801 */
7802 GROW;
7803 xmlParseMisc(ctxt);
7804
7805 /*
7806 * Then possibly doc type declaration(s) and more Misc
7807 * (doctypedecl Misc*)?
7808 */
7809 GROW;
7810 if ((RAW == '<') && (NXT(1) == '!') &&
7811 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7812 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7813 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7814 (NXT(8) == 'E')) {
7815
7816 ctxt->inSubset = 1;
7817 xmlParseDocTypeDecl(ctxt);
7818 if (RAW == '[') {
7819 ctxt->instate = XML_PARSER_DTD;
7820 xmlParseInternalSubset(ctxt);
7821 }
7822
7823 /*
7824 * Create and update the external subset.
7825 */
7826 ctxt->inSubset = 2;
7827 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7828 (!ctxt->disableSAX))
7829 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7830 ctxt->extSubSystem, ctxt->extSubURI);
7831 ctxt->inSubset = 0;
7832
7833
7834 ctxt->instate = XML_PARSER_PROLOG;
7835 xmlParseMisc(ctxt);
7836 }
7837
7838 /*
7839 * Time to start parsing the tree itself
7840 */
7841 GROW;
7842 if (RAW != '<') {
7843 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7844 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7845 ctxt->sax->error(ctxt->userData,
7846 "Start tag expected, '<' not found\n");
7847 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007848 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007849 ctxt->instate = XML_PARSER_EOF;
7850 } else {
7851 ctxt->instate = XML_PARSER_CONTENT;
7852 xmlParseElement(ctxt);
7853 ctxt->instate = XML_PARSER_EPILOG;
7854
7855
7856 /*
7857 * The Misc part at the end
7858 */
7859 xmlParseMisc(ctxt);
7860
Daniel Veillard561b7f82002-03-20 21:55:57 +00007861 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007862 ctxt->errNo = XML_ERR_DOCUMENT_END;
7863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7864 ctxt->sax->error(ctxt->userData,
7865 "Extra content at the end of the document\n");
7866 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007867 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007868 }
7869 ctxt->instate = XML_PARSER_EOF;
7870 }
7871
7872 /*
7873 * SAX: end of the document processing.
7874 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007875 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007876 ctxt->sax->endDocument(ctxt->userData);
7877
Daniel Veillard5997aca2002-03-18 18:36:20 +00007878 /*
7879 * Remove locally kept entity definitions if the tree was not built
7880 */
7881 if ((ctxt->myDoc != NULL) &&
7882 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7883 xmlFreeDoc(ctxt->myDoc);
7884 ctxt->myDoc = NULL;
7885 }
7886
Daniel Veillardc7612992002-02-17 22:47:37 +00007887 if (! ctxt->wellFormed) {
7888 ctxt->valid = 0;
7889 return(-1);
7890 }
Owen Taylor3473f882001-02-23 17:55:21 +00007891 return(0);
7892}
7893
7894/**
7895 * xmlParseExtParsedEnt:
7896 * @ctxt: an XML parser context
7897 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007898 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007899 * An external general parsed entity is well-formed if it matches the
7900 * production labeled extParsedEnt.
7901 *
7902 * [78] extParsedEnt ::= TextDecl? content
7903 *
7904 * Returns 0, -1 in case of error. the parser context is augmented
7905 * as a result of the parsing.
7906 */
7907
7908int
7909xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7910 xmlChar start[4];
7911 xmlCharEncoding enc;
7912
7913 xmlDefaultSAXHandlerInit();
7914
7915 GROW;
7916
7917 /*
7918 * SAX: beginning of the document processing.
7919 */
7920 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7921 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7922
7923 /*
7924 * Get the 4 first bytes and decode the charset
7925 * if enc != XML_CHAR_ENCODING_NONE
7926 * plug some encoding conversion routines.
7927 */
7928 start[0] = RAW;
7929 start[1] = NXT(1);
7930 start[2] = NXT(2);
7931 start[3] = NXT(3);
7932 enc = xmlDetectCharEncoding(start, 4);
7933 if (enc != XML_CHAR_ENCODING_NONE) {
7934 xmlSwitchEncoding(ctxt, enc);
7935 }
7936
7937
7938 if (CUR == 0) {
7939 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7941 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7942 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007943 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007944 }
7945
7946 /*
7947 * Check for the XMLDecl in the Prolog.
7948 */
7949 GROW;
7950 if ((RAW == '<') && (NXT(1) == '?') &&
7951 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7952 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7953
7954 /*
7955 * Note that we will switch encoding on the fly.
7956 */
7957 xmlParseXMLDecl(ctxt);
7958 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7959 /*
7960 * The XML REC instructs us to stop parsing right here
7961 */
7962 return(-1);
7963 }
7964 SKIP_BLANKS;
7965 } else {
7966 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7967 }
7968 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7969 ctxt->sax->startDocument(ctxt->userData);
7970
7971 /*
7972 * Doing validity checking on chunk doesn't make sense
7973 */
7974 ctxt->instate = XML_PARSER_CONTENT;
7975 ctxt->validate = 0;
7976 ctxt->loadsubset = 0;
7977 ctxt->depth = 0;
7978
7979 xmlParseContent(ctxt);
7980
7981 if ((RAW == '<') && (NXT(1) == '/')) {
7982 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7984 ctxt->sax->error(ctxt->userData,
7985 "chunk is not well balanced\n");
7986 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007987 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007988 } else if (RAW != 0) {
7989 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7991 ctxt->sax->error(ctxt->userData,
7992 "extra content at the end of well balanced chunk\n");
7993 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007994 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007995 }
7996
7997 /*
7998 * SAX: end of the document processing.
7999 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008000 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008001 ctxt->sax->endDocument(ctxt->userData);
8002
8003 if (! ctxt->wellFormed) return(-1);
8004 return(0);
8005}
8006
8007/************************************************************************
8008 * *
8009 * Progressive parsing interfaces *
8010 * *
8011 ************************************************************************/
8012
8013/**
8014 * xmlParseLookupSequence:
8015 * @ctxt: an XML parser context
8016 * @first: the first char to lookup
8017 * @next: the next char to lookup or zero
8018 * @third: the next char to lookup or zero
8019 *
8020 * Try to find if a sequence (first, next, third) or just (first next) or
8021 * (first) is available in the input stream.
8022 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8023 * to avoid rescanning sequences of bytes, it DOES change the state of the
8024 * parser, do not use liberally.
8025 *
8026 * Returns the index to the current parsing point if the full sequence
8027 * is available, -1 otherwise.
8028 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008029static int
Owen Taylor3473f882001-02-23 17:55:21 +00008030xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8031 xmlChar next, xmlChar third) {
8032 int base, len;
8033 xmlParserInputPtr in;
8034 const xmlChar *buf;
8035
8036 in = ctxt->input;
8037 if (in == NULL) return(-1);
8038 base = in->cur - in->base;
8039 if (base < 0) return(-1);
8040 if (ctxt->checkIndex > base)
8041 base = ctxt->checkIndex;
8042 if (in->buf == NULL) {
8043 buf = in->base;
8044 len = in->length;
8045 } else {
8046 buf = in->buf->buffer->content;
8047 len = in->buf->buffer->use;
8048 }
8049 /* take into account the sequence length */
8050 if (third) len -= 2;
8051 else if (next) len --;
8052 for (;base < len;base++) {
8053 if (buf[base] == first) {
8054 if (third != 0) {
8055 if ((buf[base + 1] != next) ||
8056 (buf[base + 2] != third)) continue;
8057 } else if (next != 0) {
8058 if (buf[base + 1] != next) continue;
8059 }
8060 ctxt->checkIndex = 0;
8061#ifdef DEBUG_PUSH
8062 if (next == 0)
8063 xmlGenericError(xmlGenericErrorContext,
8064 "PP: lookup '%c' found at %d\n",
8065 first, base);
8066 else if (third == 0)
8067 xmlGenericError(xmlGenericErrorContext,
8068 "PP: lookup '%c%c' found at %d\n",
8069 first, next, base);
8070 else
8071 xmlGenericError(xmlGenericErrorContext,
8072 "PP: lookup '%c%c%c' found at %d\n",
8073 first, next, third, base);
8074#endif
8075 return(base - (in->cur - in->base));
8076 }
8077 }
8078 ctxt->checkIndex = base;
8079#ifdef DEBUG_PUSH
8080 if (next == 0)
8081 xmlGenericError(xmlGenericErrorContext,
8082 "PP: lookup '%c' failed\n", first);
8083 else if (third == 0)
8084 xmlGenericError(xmlGenericErrorContext,
8085 "PP: lookup '%c%c' failed\n", first, next);
8086 else
8087 xmlGenericError(xmlGenericErrorContext,
8088 "PP: lookup '%c%c%c' failed\n", first, next, third);
8089#endif
8090 return(-1);
8091}
8092
8093/**
8094 * xmlParseTryOrFinish:
8095 * @ctxt: an XML parser context
8096 * @terminate: last chunk indicator
8097 *
8098 * Try to progress on parsing
8099 *
8100 * Returns zero if no parsing was possible
8101 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008102static int
Owen Taylor3473f882001-02-23 17:55:21 +00008103xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8104 int ret = 0;
8105 int avail;
8106 xmlChar cur, next;
8107
8108#ifdef DEBUG_PUSH
8109 switch (ctxt->instate) {
8110 case XML_PARSER_EOF:
8111 xmlGenericError(xmlGenericErrorContext,
8112 "PP: try EOF\n"); break;
8113 case XML_PARSER_START:
8114 xmlGenericError(xmlGenericErrorContext,
8115 "PP: try START\n"); break;
8116 case XML_PARSER_MISC:
8117 xmlGenericError(xmlGenericErrorContext,
8118 "PP: try MISC\n");break;
8119 case XML_PARSER_COMMENT:
8120 xmlGenericError(xmlGenericErrorContext,
8121 "PP: try COMMENT\n");break;
8122 case XML_PARSER_PROLOG:
8123 xmlGenericError(xmlGenericErrorContext,
8124 "PP: try PROLOG\n");break;
8125 case XML_PARSER_START_TAG:
8126 xmlGenericError(xmlGenericErrorContext,
8127 "PP: try START_TAG\n");break;
8128 case XML_PARSER_CONTENT:
8129 xmlGenericError(xmlGenericErrorContext,
8130 "PP: try CONTENT\n");break;
8131 case XML_PARSER_CDATA_SECTION:
8132 xmlGenericError(xmlGenericErrorContext,
8133 "PP: try CDATA_SECTION\n");break;
8134 case XML_PARSER_END_TAG:
8135 xmlGenericError(xmlGenericErrorContext,
8136 "PP: try END_TAG\n");break;
8137 case XML_PARSER_ENTITY_DECL:
8138 xmlGenericError(xmlGenericErrorContext,
8139 "PP: try ENTITY_DECL\n");break;
8140 case XML_PARSER_ENTITY_VALUE:
8141 xmlGenericError(xmlGenericErrorContext,
8142 "PP: try ENTITY_VALUE\n");break;
8143 case XML_PARSER_ATTRIBUTE_VALUE:
8144 xmlGenericError(xmlGenericErrorContext,
8145 "PP: try ATTRIBUTE_VALUE\n");break;
8146 case XML_PARSER_DTD:
8147 xmlGenericError(xmlGenericErrorContext,
8148 "PP: try DTD\n");break;
8149 case XML_PARSER_EPILOG:
8150 xmlGenericError(xmlGenericErrorContext,
8151 "PP: try EPILOG\n");break;
8152 case XML_PARSER_PI:
8153 xmlGenericError(xmlGenericErrorContext,
8154 "PP: try PI\n");break;
8155 case XML_PARSER_IGNORE:
8156 xmlGenericError(xmlGenericErrorContext,
8157 "PP: try IGNORE\n");break;
8158 }
8159#endif
8160
8161 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008162 SHRINK;
8163
Owen Taylor3473f882001-02-23 17:55:21 +00008164 /*
8165 * Pop-up of finished entities.
8166 */
8167 while ((RAW == 0) && (ctxt->inputNr > 1))
8168 xmlPopInput(ctxt);
8169
8170 if (ctxt->input ==NULL) break;
8171 if (ctxt->input->buf == NULL)
8172 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008173 else {
8174 /*
8175 * If we are operating on converted input, try to flush
8176 * remainng chars to avoid them stalling in the non-converted
8177 * buffer.
8178 */
8179 if ((ctxt->input->buf->raw != NULL) &&
8180 (ctxt->input->buf->raw->use > 0)) {
8181 int base = ctxt->input->base -
8182 ctxt->input->buf->buffer->content;
8183 int current = ctxt->input->cur - ctxt->input->base;
8184
8185 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8186 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8187 ctxt->input->cur = ctxt->input->base + current;
8188 ctxt->input->end =
8189 &ctxt->input->buf->buffer->content[
8190 ctxt->input->buf->buffer->use];
8191 }
8192 avail = ctxt->input->buf->buffer->use -
8193 (ctxt->input->cur - ctxt->input->base);
8194 }
Owen Taylor3473f882001-02-23 17:55:21 +00008195 if (avail < 1)
8196 goto done;
8197 switch (ctxt->instate) {
8198 case XML_PARSER_EOF:
8199 /*
8200 * Document parsing is done !
8201 */
8202 goto done;
8203 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008204 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8205 xmlChar start[4];
8206 xmlCharEncoding enc;
8207
8208 /*
8209 * Very first chars read from the document flow.
8210 */
8211 if (avail < 4)
8212 goto done;
8213
8214 /*
8215 * Get the 4 first bytes and decode the charset
8216 * if enc != XML_CHAR_ENCODING_NONE
8217 * plug some encoding conversion routines.
8218 */
8219 start[0] = RAW;
8220 start[1] = NXT(1);
8221 start[2] = NXT(2);
8222 start[3] = NXT(3);
8223 enc = xmlDetectCharEncoding(start, 4);
8224 if (enc != XML_CHAR_ENCODING_NONE) {
8225 xmlSwitchEncoding(ctxt, enc);
8226 }
8227 break;
8228 }
Owen Taylor3473f882001-02-23 17:55:21 +00008229
8230 cur = ctxt->input->cur[0];
8231 next = ctxt->input->cur[1];
8232 if (cur == 0) {
8233 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8234 ctxt->sax->setDocumentLocator(ctxt->userData,
8235 &xmlDefaultSAXLocator);
8236 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8238 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8239 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008240 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008241 ctxt->instate = XML_PARSER_EOF;
8242#ifdef DEBUG_PUSH
8243 xmlGenericError(xmlGenericErrorContext,
8244 "PP: entering EOF\n");
8245#endif
8246 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8247 ctxt->sax->endDocument(ctxt->userData);
8248 goto done;
8249 }
8250 if ((cur == '<') && (next == '?')) {
8251 /* PI or XML decl */
8252 if (avail < 5) return(ret);
8253 if ((!terminate) &&
8254 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8255 return(ret);
8256 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8257 ctxt->sax->setDocumentLocator(ctxt->userData,
8258 &xmlDefaultSAXLocator);
8259 if ((ctxt->input->cur[2] == 'x') &&
8260 (ctxt->input->cur[3] == 'm') &&
8261 (ctxt->input->cur[4] == 'l') &&
8262 (IS_BLANK(ctxt->input->cur[5]))) {
8263 ret += 5;
8264#ifdef DEBUG_PUSH
8265 xmlGenericError(xmlGenericErrorContext,
8266 "PP: Parsing XML Decl\n");
8267#endif
8268 xmlParseXMLDecl(ctxt);
8269 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8270 /*
8271 * The XML REC instructs us to stop parsing right
8272 * here
8273 */
8274 ctxt->instate = XML_PARSER_EOF;
8275 return(0);
8276 }
8277 ctxt->standalone = ctxt->input->standalone;
8278 if ((ctxt->encoding == NULL) &&
8279 (ctxt->input->encoding != NULL))
8280 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8281 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8282 (!ctxt->disableSAX))
8283 ctxt->sax->startDocument(ctxt->userData);
8284 ctxt->instate = XML_PARSER_MISC;
8285#ifdef DEBUG_PUSH
8286 xmlGenericError(xmlGenericErrorContext,
8287 "PP: entering MISC\n");
8288#endif
8289 } else {
8290 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8291 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8292 (!ctxt->disableSAX))
8293 ctxt->sax->startDocument(ctxt->userData);
8294 ctxt->instate = XML_PARSER_MISC;
8295#ifdef DEBUG_PUSH
8296 xmlGenericError(xmlGenericErrorContext,
8297 "PP: entering MISC\n");
8298#endif
8299 }
8300 } else {
8301 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8302 ctxt->sax->setDocumentLocator(ctxt->userData,
8303 &xmlDefaultSAXLocator);
8304 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8305 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8306 (!ctxt->disableSAX))
8307 ctxt->sax->startDocument(ctxt->userData);
8308 ctxt->instate = XML_PARSER_MISC;
8309#ifdef DEBUG_PUSH
8310 xmlGenericError(xmlGenericErrorContext,
8311 "PP: entering MISC\n");
8312#endif
8313 }
8314 break;
8315 case XML_PARSER_MISC:
8316 SKIP_BLANKS;
8317 if (ctxt->input->buf == NULL)
8318 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8319 else
8320 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8321 if (avail < 2)
8322 goto done;
8323 cur = ctxt->input->cur[0];
8324 next = ctxt->input->cur[1];
8325 if ((cur == '<') && (next == '?')) {
8326 if ((!terminate) &&
8327 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8328 goto done;
8329#ifdef DEBUG_PUSH
8330 xmlGenericError(xmlGenericErrorContext,
8331 "PP: Parsing PI\n");
8332#endif
8333 xmlParsePI(ctxt);
8334 } else if ((cur == '<') && (next == '!') &&
8335 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8336 if ((!terminate) &&
8337 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8338 goto done;
8339#ifdef DEBUG_PUSH
8340 xmlGenericError(xmlGenericErrorContext,
8341 "PP: Parsing Comment\n");
8342#endif
8343 xmlParseComment(ctxt);
8344 ctxt->instate = XML_PARSER_MISC;
8345 } else if ((cur == '<') && (next == '!') &&
8346 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8347 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8348 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8349 (ctxt->input->cur[8] == 'E')) {
8350 if ((!terminate) &&
8351 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8352 goto done;
8353#ifdef DEBUG_PUSH
8354 xmlGenericError(xmlGenericErrorContext,
8355 "PP: Parsing internal subset\n");
8356#endif
8357 ctxt->inSubset = 1;
8358 xmlParseDocTypeDecl(ctxt);
8359 if (RAW == '[') {
8360 ctxt->instate = XML_PARSER_DTD;
8361#ifdef DEBUG_PUSH
8362 xmlGenericError(xmlGenericErrorContext,
8363 "PP: entering DTD\n");
8364#endif
8365 } else {
8366 /*
8367 * Create and update the external subset.
8368 */
8369 ctxt->inSubset = 2;
8370 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8371 (ctxt->sax->externalSubset != NULL))
8372 ctxt->sax->externalSubset(ctxt->userData,
8373 ctxt->intSubName, ctxt->extSubSystem,
8374 ctxt->extSubURI);
8375 ctxt->inSubset = 0;
8376 ctxt->instate = XML_PARSER_PROLOG;
8377#ifdef DEBUG_PUSH
8378 xmlGenericError(xmlGenericErrorContext,
8379 "PP: entering PROLOG\n");
8380#endif
8381 }
8382 } else if ((cur == '<') && (next == '!') &&
8383 (avail < 9)) {
8384 goto done;
8385 } else {
8386 ctxt->instate = XML_PARSER_START_TAG;
8387#ifdef DEBUG_PUSH
8388 xmlGenericError(xmlGenericErrorContext,
8389 "PP: entering START_TAG\n");
8390#endif
8391 }
8392 break;
8393 case XML_PARSER_IGNORE:
8394 xmlGenericError(xmlGenericErrorContext,
8395 "PP: internal error, state == IGNORE");
8396 ctxt->instate = XML_PARSER_DTD;
8397#ifdef DEBUG_PUSH
8398 xmlGenericError(xmlGenericErrorContext,
8399 "PP: entering DTD\n");
8400#endif
8401 break;
8402 case XML_PARSER_PROLOG:
8403 SKIP_BLANKS;
8404 if (ctxt->input->buf == NULL)
8405 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8406 else
8407 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8408 if (avail < 2)
8409 goto done;
8410 cur = ctxt->input->cur[0];
8411 next = ctxt->input->cur[1];
8412 if ((cur == '<') && (next == '?')) {
8413 if ((!terminate) &&
8414 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8415 goto done;
8416#ifdef DEBUG_PUSH
8417 xmlGenericError(xmlGenericErrorContext,
8418 "PP: Parsing PI\n");
8419#endif
8420 xmlParsePI(ctxt);
8421 } else if ((cur == '<') && (next == '!') &&
8422 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8423 if ((!terminate) &&
8424 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8425 goto done;
8426#ifdef DEBUG_PUSH
8427 xmlGenericError(xmlGenericErrorContext,
8428 "PP: Parsing Comment\n");
8429#endif
8430 xmlParseComment(ctxt);
8431 ctxt->instate = XML_PARSER_PROLOG;
8432 } else if ((cur == '<') && (next == '!') &&
8433 (avail < 4)) {
8434 goto done;
8435 } else {
8436 ctxt->instate = XML_PARSER_START_TAG;
8437#ifdef DEBUG_PUSH
8438 xmlGenericError(xmlGenericErrorContext,
8439 "PP: entering START_TAG\n");
8440#endif
8441 }
8442 break;
8443 case XML_PARSER_EPILOG:
8444 SKIP_BLANKS;
8445 if (ctxt->input->buf == NULL)
8446 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8447 else
8448 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8449 if (avail < 2)
8450 goto done;
8451 cur = ctxt->input->cur[0];
8452 next = ctxt->input->cur[1];
8453 if ((cur == '<') && (next == '?')) {
8454 if ((!terminate) &&
8455 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8456 goto done;
8457#ifdef DEBUG_PUSH
8458 xmlGenericError(xmlGenericErrorContext,
8459 "PP: Parsing PI\n");
8460#endif
8461 xmlParsePI(ctxt);
8462 ctxt->instate = XML_PARSER_EPILOG;
8463 } else if ((cur == '<') && (next == '!') &&
8464 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8465 if ((!terminate) &&
8466 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8467 goto done;
8468#ifdef DEBUG_PUSH
8469 xmlGenericError(xmlGenericErrorContext,
8470 "PP: Parsing Comment\n");
8471#endif
8472 xmlParseComment(ctxt);
8473 ctxt->instate = XML_PARSER_EPILOG;
8474 } else if ((cur == '<') && (next == '!') &&
8475 (avail < 4)) {
8476 goto done;
8477 } else {
8478 ctxt->errNo = XML_ERR_DOCUMENT_END;
8479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8480 ctxt->sax->error(ctxt->userData,
8481 "Extra content at the end of the document\n");
8482 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008483 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008484 ctxt->instate = XML_PARSER_EOF;
8485#ifdef DEBUG_PUSH
8486 xmlGenericError(xmlGenericErrorContext,
8487 "PP: entering EOF\n");
8488#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008489 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008490 ctxt->sax->endDocument(ctxt->userData);
8491 goto done;
8492 }
8493 break;
8494 case XML_PARSER_START_TAG: {
8495 xmlChar *name, *oldname;
8496
8497 if ((avail < 2) && (ctxt->inputNr == 1))
8498 goto done;
8499 cur = ctxt->input->cur[0];
8500 if (cur != '<') {
8501 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8503 ctxt->sax->error(ctxt->userData,
8504 "Start tag expect, '<' not found\n");
8505 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008506 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008507 ctxt->instate = XML_PARSER_EOF;
8508#ifdef DEBUG_PUSH
8509 xmlGenericError(xmlGenericErrorContext,
8510 "PP: entering EOF\n");
8511#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008512 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008513 ctxt->sax->endDocument(ctxt->userData);
8514 goto done;
8515 }
8516 if ((!terminate) &&
8517 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8518 goto done;
8519 if (ctxt->spaceNr == 0)
8520 spacePush(ctxt, -1);
8521 else
8522 spacePush(ctxt, *ctxt->space);
8523 name = xmlParseStartTag(ctxt);
8524 if (name == NULL) {
8525 spacePop(ctxt);
8526 ctxt->instate = XML_PARSER_EOF;
8527#ifdef DEBUG_PUSH
8528 xmlGenericError(xmlGenericErrorContext,
8529 "PP: entering EOF\n");
8530#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008531 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008532 ctxt->sax->endDocument(ctxt->userData);
8533 goto done;
8534 }
8535 namePush(ctxt, xmlStrdup(name));
8536
8537 /*
8538 * [ VC: Root Element Type ]
8539 * The Name in the document type declaration must match
8540 * the element type of the root element.
8541 */
8542 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8543 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8544 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8545
8546 /*
8547 * Check for an Empty Element.
8548 */
8549 if ((RAW == '/') && (NXT(1) == '>')) {
8550 SKIP(2);
8551 if ((ctxt->sax != NULL) &&
8552 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8553 ctxt->sax->endElement(ctxt->userData, name);
8554 xmlFree(name);
8555 oldname = namePop(ctxt);
8556 spacePop(ctxt);
8557 if (oldname != NULL) {
8558#ifdef DEBUG_STACK
8559 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8560#endif
8561 xmlFree(oldname);
8562 }
8563 if (ctxt->name == NULL) {
8564 ctxt->instate = XML_PARSER_EPILOG;
8565#ifdef DEBUG_PUSH
8566 xmlGenericError(xmlGenericErrorContext,
8567 "PP: entering EPILOG\n");
8568#endif
8569 } else {
8570 ctxt->instate = XML_PARSER_CONTENT;
8571#ifdef DEBUG_PUSH
8572 xmlGenericError(xmlGenericErrorContext,
8573 "PP: entering CONTENT\n");
8574#endif
8575 }
8576 break;
8577 }
8578 if (RAW == '>') {
8579 NEXT;
8580 } else {
8581 ctxt->errNo = XML_ERR_GT_REQUIRED;
8582 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8583 ctxt->sax->error(ctxt->userData,
8584 "Couldn't find end of Start Tag %s\n",
8585 name);
8586 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008587 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008588
8589 /*
8590 * end of parsing of this node.
8591 */
8592 nodePop(ctxt);
8593 oldname = namePop(ctxt);
8594 spacePop(ctxt);
8595 if (oldname != NULL) {
8596#ifdef DEBUG_STACK
8597 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8598#endif
8599 xmlFree(oldname);
8600 }
8601 }
8602 xmlFree(name);
8603 ctxt->instate = XML_PARSER_CONTENT;
8604#ifdef DEBUG_PUSH
8605 xmlGenericError(xmlGenericErrorContext,
8606 "PP: entering CONTENT\n");
8607#endif
8608 break;
8609 }
8610 case XML_PARSER_CONTENT: {
8611 const xmlChar *test;
8612 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008613 if ((avail < 2) && (ctxt->inputNr == 1))
8614 goto done;
8615 cur = ctxt->input->cur[0];
8616 next = ctxt->input->cur[1];
8617
8618 test = CUR_PTR;
8619 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008620 if ((cur == '<') && (next == '?')) {
8621 if ((!terminate) &&
8622 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8623 goto done;
8624#ifdef DEBUG_PUSH
8625 xmlGenericError(xmlGenericErrorContext,
8626 "PP: Parsing PI\n");
8627#endif
8628 xmlParsePI(ctxt);
8629 } else if ((cur == '<') && (next == '!') &&
8630 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8631 if ((!terminate) &&
8632 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8633 goto done;
8634#ifdef DEBUG_PUSH
8635 xmlGenericError(xmlGenericErrorContext,
8636 "PP: Parsing Comment\n");
8637#endif
8638 xmlParseComment(ctxt);
8639 ctxt->instate = XML_PARSER_CONTENT;
8640 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8641 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8642 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8643 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8644 (ctxt->input->cur[8] == '[')) {
8645 SKIP(9);
8646 ctxt->instate = XML_PARSER_CDATA_SECTION;
8647#ifdef DEBUG_PUSH
8648 xmlGenericError(xmlGenericErrorContext,
8649 "PP: entering CDATA_SECTION\n");
8650#endif
8651 break;
8652 } else if ((cur == '<') && (next == '!') &&
8653 (avail < 9)) {
8654 goto done;
8655 } else if ((cur == '<') && (next == '/')) {
8656 ctxt->instate = XML_PARSER_END_TAG;
8657#ifdef DEBUG_PUSH
8658 xmlGenericError(xmlGenericErrorContext,
8659 "PP: entering END_TAG\n");
8660#endif
8661 break;
8662 } else if (cur == '<') {
8663 ctxt->instate = XML_PARSER_START_TAG;
8664#ifdef DEBUG_PUSH
8665 xmlGenericError(xmlGenericErrorContext,
8666 "PP: entering START_TAG\n");
8667#endif
8668 break;
8669 } else if (cur == '&') {
8670 if ((!terminate) &&
8671 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8672 goto done;
8673#ifdef DEBUG_PUSH
8674 xmlGenericError(xmlGenericErrorContext,
8675 "PP: Parsing Reference\n");
8676#endif
8677 xmlParseReference(ctxt);
8678 } else {
8679 /* TODO Avoid the extra copy, handle directly !!! */
8680 /*
8681 * Goal of the following test is:
8682 * - minimize calls to the SAX 'character' callback
8683 * when they are mergeable
8684 * - handle an problem for isBlank when we only parse
8685 * a sequence of blank chars and the next one is
8686 * not available to check against '<' presence.
8687 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008688 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008689 * of the parser.
8690 */
8691 if ((ctxt->inputNr == 1) &&
8692 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8693 if ((!terminate) &&
8694 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8695 goto done;
8696 }
8697 ctxt->checkIndex = 0;
8698#ifdef DEBUG_PUSH
8699 xmlGenericError(xmlGenericErrorContext,
8700 "PP: Parsing char data\n");
8701#endif
8702 xmlParseCharData(ctxt, 0);
8703 }
8704 /*
8705 * Pop-up of finished entities.
8706 */
8707 while ((RAW == 0) && (ctxt->inputNr > 1))
8708 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008709 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008710 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8711 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8712 ctxt->sax->error(ctxt->userData,
8713 "detected an error in element content\n");
8714 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008715 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008716 ctxt->instate = XML_PARSER_EOF;
8717 break;
8718 }
8719 break;
8720 }
8721 case XML_PARSER_CDATA_SECTION: {
8722 /*
8723 * The Push mode need to have the SAX callback for
8724 * cdataBlock merge back contiguous callbacks.
8725 */
8726 int base;
8727
8728 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8729 if (base < 0) {
8730 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8731 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8732 if (ctxt->sax->cdataBlock != NULL)
8733 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8734 XML_PARSER_BIG_BUFFER_SIZE);
8735 }
8736 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8737 ctxt->checkIndex = 0;
8738 }
8739 goto done;
8740 } else {
8741 if ((ctxt->sax != NULL) && (base > 0) &&
8742 (!ctxt->disableSAX)) {
8743 if (ctxt->sax->cdataBlock != NULL)
8744 ctxt->sax->cdataBlock(ctxt->userData,
8745 ctxt->input->cur, base);
8746 }
8747 SKIP(base + 3);
8748 ctxt->checkIndex = 0;
8749 ctxt->instate = XML_PARSER_CONTENT;
8750#ifdef DEBUG_PUSH
8751 xmlGenericError(xmlGenericErrorContext,
8752 "PP: entering CONTENT\n");
8753#endif
8754 }
8755 break;
8756 }
8757 case XML_PARSER_END_TAG:
8758 if (avail < 2)
8759 goto done;
8760 if ((!terminate) &&
8761 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8762 goto done;
8763 xmlParseEndTag(ctxt);
8764 if (ctxt->name == NULL) {
8765 ctxt->instate = XML_PARSER_EPILOG;
8766#ifdef DEBUG_PUSH
8767 xmlGenericError(xmlGenericErrorContext,
8768 "PP: entering EPILOG\n");
8769#endif
8770 } else {
8771 ctxt->instate = XML_PARSER_CONTENT;
8772#ifdef DEBUG_PUSH
8773 xmlGenericError(xmlGenericErrorContext,
8774 "PP: entering CONTENT\n");
8775#endif
8776 }
8777 break;
8778 case XML_PARSER_DTD: {
8779 /*
8780 * Sorry but progressive parsing of the internal subset
8781 * is not expected to be supported. We first check that
8782 * the full content of the internal subset is available and
8783 * the parsing is launched only at that point.
8784 * Internal subset ends up with "']' S? '>'" in an unescaped
8785 * section and not in a ']]>' sequence which are conditional
8786 * sections (whoever argued to keep that crap in XML deserve
8787 * a place in hell !).
8788 */
8789 int base, i;
8790 xmlChar *buf;
8791 xmlChar quote = 0;
8792
8793 base = ctxt->input->cur - ctxt->input->base;
8794 if (base < 0) return(0);
8795 if (ctxt->checkIndex > base)
8796 base = ctxt->checkIndex;
8797 buf = ctxt->input->buf->buffer->content;
8798 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8799 base++) {
8800 if (quote != 0) {
8801 if (buf[base] == quote)
8802 quote = 0;
8803 continue;
8804 }
8805 if (buf[base] == '"') {
8806 quote = '"';
8807 continue;
8808 }
8809 if (buf[base] == '\'') {
8810 quote = '\'';
8811 continue;
8812 }
8813 if (buf[base] == ']') {
8814 if ((unsigned int) base +1 >=
8815 ctxt->input->buf->buffer->use)
8816 break;
8817 if (buf[base + 1] == ']') {
8818 /* conditional crap, skip both ']' ! */
8819 base++;
8820 continue;
8821 }
8822 for (i = 0;
8823 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8824 i++) {
8825 if (buf[base + i] == '>')
8826 goto found_end_int_subset;
8827 }
8828 break;
8829 }
8830 }
8831 /*
8832 * We didn't found the end of the Internal subset
8833 */
8834 if (quote == 0)
8835 ctxt->checkIndex = base;
8836#ifdef DEBUG_PUSH
8837 if (next == 0)
8838 xmlGenericError(xmlGenericErrorContext,
8839 "PP: lookup of int subset end filed\n");
8840#endif
8841 goto done;
8842
8843found_end_int_subset:
8844 xmlParseInternalSubset(ctxt);
8845 ctxt->inSubset = 2;
8846 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8847 (ctxt->sax->externalSubset != NULL))
8848 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8849 ctxt->extSubSystem, ctxt->extSubURI);
8850 ctxt->inSubset = 0;
8851 ctxt->instate = XML_PARSER_PROLOG;
8852 ctxt->checkIndex = 0;
8853#ifdef DEBUG_PUSH
8854 xmlGenericError(xmlGenericErrorContext,
8855 "PP: entering PROLOG\n");
8856#endif
8857 break;
8858 }
8859 case XML_PARSER_COMMENT:
8860 xmlGenericError(xmlGenericErrorContext,
8861 "PP: internal error, state == COMMENT\n");
8862 ctxt->instate = XML_PARSER_CONTENT;
8863#ifdef DEBUG_PUSH
8864 xmlGenericError(xmlGenericErrorContext,
8865 "PP: entering CONTENT\n");
8866#endif
8867 break;
8868 case XML_PARSER_PI:
8869 xmlGenericError(xmlGenericErrorContext,
8870 "PP: internal error, state == PI\n");
8871 ctxt->instate = XML_PARSER_CONTENT;
8872#ifdef DEBUG_PUSH
8873 xmlGenericError(xmlGenericErrorContext,
8874 "PP: entering CONTENT\n");
8875#endif
8876 break;
8877 case XML_PARSER_ENTITY_DECL:
8878 xmlGenericError(xmlGenericErrorContext,
8879 "PP: internal error, state == ENTITY_DECL\n");
8880 ctxt->instate = XML_PARSER_DTD;
8881#ifdef DEBUG_PUSH
8882 xmlGenericError(xmlGenericErrorContext,
8883 "PP: entering DTD\n");
8884#endif
8885 break;
8886 case XML_PARSER_ENTITY_VALUE:
8887 xmlGenericError(xmlGenericErrorContext,
8888 "PP: internal error, state == ENTITY_VALUE\n");
8889 ctxt->instate = XML_PARSER_CONTENT;
8890#ifdef DEBUG_PUSH
8891 xmlGenericError(xmlGenericErrorContext,
8892 "PP: entering DTD\n");
8893#endif
8894 break;
8895 case XML_PARSER_ATTRIBUTE_VALUE:
8896 xmlGenericError(xmlGenericErrorContext,
8897 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8898 ctxt->instate = XML_PARSER_START_TAG;
8899#ifdef DEBUG_PUSH
8900 xmlGenericError(xmlGenericErrorContext,
8901 "PP: entering START_TAG\n");
8902#endif
8903 break;
8904 case XML_PARSER_SYSTEM_LITERAL:
8905 xmlGenericError(xmlGenericErrorContext,
8906 "PP: internal error, state == SYSTEM_LITERAL\n");
8907 ctxt->instate = XML_PARSER_START_TAG;
8908#ifdef DEBUG_PUSH
8909 xmlGenericError(xmlGenericErrorContext,
8910 "PP: entering START_TAG\n");
8911#endif
8912 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008913 case XML_PARSER_PUBLIC_LITERAL:
8914 xmlGenericError(xmlGenericErrorContext,
8915 "PP: internal error, state == PUBLIC_LITERAL\n");
8916 ctxt->instate = XML_PARSER_START_TAG;
8917#ifdef DEBUG_PUSH
8918 xmlGenericError(xmlGenericErrorContext,
8919 "PP: entering START_TAG\n");
8920#endif
8921 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008922 }
8923 }
8924done:
8925#ifdef DEBUG_PUSH
8926 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8927#endif
8928 return(ret);
8929}
8930
8931/**
Owen Taylor3473f882001-02-23 17:55:21 +00008932 * xmlParseChunk:
8933 * @ctxt: an XML parser context
8934 * @chunk: an char array
8935 * @size: the size in byte of the chunk
8936 * @terminate: last chunk indicator
8937 *
8938 * Parse a Chunk of memory
8939 *
8940 * Returns zero if no error, the xmlParserErrors otherwise.
8941 */
8942int
8943xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8944 int terminate) {
8945 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8946 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8947 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8948 int cur = ctxt->input->cur - ctxt->input->base;
8949
8950 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8951 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8952 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008953 ctxt->input->end =
8954 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008955#ifdef DEBUG_PUSH
8956 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8957#endif
8958
8959 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8960 xmlParseTryOrFinish(ctxt, terminate);
8961 } else if (ctxt->instate != XML_PARSER_EOF) {
8962 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8963 xmlParserInputBufferPtr in = ctxt->input->buf;
8964 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8965 (in->raw != NULL)) {
8966 int nbchars;
8967
8968 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8969 if (nbchars < 0) {
8970 xmlGenericError(xmlGenericErrorContext,
8971 "xmlParseChunk: encoder error\n");
8972 return(XML_ERR_INVALID_ENCODING);
8973 }
8974 }
8975 }
8976 }
8977 xmlParseTryOrFinish(ctxt, terminate);
8978 if (terminate) {
8979 /*
8980 * Check for termination
8981 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00008982 int avail = 0;
8983 if (ctxt->input->buf == NULL)
8984 avail = ctxt->input->length -
8985 (ctxt->input->cur - ctxt->input->base);
8986 else
8987 avail = ctxt->input->buf->buffer->use -
8988 (ctxt->input->cur - ctxt->input->base);
8989
Owen Taylor3473f882001-02-23 17:55:21 +00008990 if ((ctxt->instate != XML_PARSER_EOF) &&
8991 (ctxt->instate != XML_PARSER_EPILOG)) {
8992 ctxt->errNo = XML_ERR_DOCUMENT_END;
8993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8994 ctxt->sax->error(ctxt->userData,
8995 "Extra content at the end of the document\n");
8996 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008997 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008998 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00008999 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9000 ctxt->errNo = XML_ERR_DOCUMENT_END;
9001 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9002 ctxt->sax->error(ctxt->userData,
9003 "Extra content at the end of the document\n");
9004 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009005 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009006
9007 }
Owen Taylor3473f882001-02-23 17:55:21 +00009008 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009009 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009010 ctxt->sax->endDocument(ctxt->userData);
9011 }
9012 ctxt->instate = XML_PARSER_EOF;
9013 }
9014 return((xmlParserErrors) ctxt->errNo);
9015}
9016
9017/************************************************************************
9018 * *
9019 * I/O front end functions to the parser *
9020 * *
9021 ************************************************************************/
9022
9023/**
9024 * xmlStopParser:
9025 * @ctxt: an XML parser context
9026 *
9027 * Blocks further parser processing
9028 */
9029void
9030xmlStopParser(xmlParserCtxtPtr ctxt) {
9031 ctxt->instate = XML_PARSER_EOF;
9032 if (ctxt->input != NULL)
9033 ctxt->input->cur = BAD_CAST"";
9034}
9035
9036/**
9037 * xmlCreatePushParserCtxt:
9038 * @sax: a SAX handler
9039 * @user_data: The user data returned on SAX callbacks
9040 * @chunk: a pointer to an array of chars
9041 * @size: number of chars in the array
9042 * @filename: an optional file name or URI
9043 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009044 * Create a parser context for using the XML parser in push mode.
9045 * If @buffer and @size are non-NULL, the data is used to detect
9046 * the encoding. The remaining characters will be parsed so they
9047 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009048 * To allow content encoding detection, @size should be >= 4
9049 * The value of @filename is used for fetching external entities
9050 * and error/warning reports.
9051 *
9052 * Returns the new parser context or NULL
9053 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009054
Owen Taylor3473f882001-02-23 17:55:21 +00009055xmlParserCtxtPtr
9056xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9057 const char *chunk, int size, const char *filename) {
9058 xmlParserCtxtPtr ctxt;
9059 xmlParserInputPtr inputStream;
9060 xmlParserInputBufferPtr buf;
9061 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9062
9063 /*
9064 * plug some encoding conversion routines
9065 */
9066 if ((chunk != NULL) && (size >= 4))
9067 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9068
9069 buf = xmlAllocParserInputBuffer(enc);
9070 if (buf == NULL) return(NULL);
9071
9072 ctxt = xmlNewParserCtxt();
9073 if (ctxt == NULL) {
9074 xmlFree(buf);
9075 return(NULL);
9076 }
9077 if (sax != NULL) {
9078 if (ctxt->sax != &xmlDefaultSAXHandler)
9079 xmlFree(ctxt->sax);
9080 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9081 if (ctxt->sax == NULL) {
9082 xmlFree(buf);
9083 xmlFree(ctxt);
9084 return(NULL);
9085 }
9086 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9087 if (user_data != NULL)
9088 ctxt->userData = user_data;
9089 }
9090 if (filename == NULL) {
9091 ctxt->directory = NULL;
9092 } else {
9093 ctxt->directory = xmlParserGetDirectory(filename);
9094 }
9095
9096 inputStream = xmlNewInputStream(ctxt);
9097 if (inputStream == NULL) {
9098 xmlFreeParserCtxt(ctxt);
9099 return(NULL);
9100 }
9101
9102 if (filename == NULL)
9103 inputStream->filename = NULL;
9104 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009105 inputStream->filename = (char *)
9106 xmlNormalizeWindowsPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009107 inputStream->buf = buf;
9108 inputStream->base = inputStream->buf->buffer->content;
9109 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009110 inputStream->end =
9111 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009112
9113 inputPush(ctxt, inputStream);
9114
9115 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9116 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009117 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9118 int cur = ctxt->input->cur - ctxt->input->base;
9119
Owen Taylor3473f882001-02-23 17:55:21 +00009120 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009121
9122 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9123 ctxt->input->cur = ctxt->input->base + cur;
9124 ctxt->input->end =
9125 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009126#ifdef DEBUG_PUSH
9127 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9128#endif
9129 }
9130
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009131 if (enc != XML_CHAR_ENCODING_NONE) {
9132 xmlSwitchEncoding(ctxt, enc);
9133 }
9134
Owen Taylor3473f882001-02-23 17:55:21 +00009135 return(ctxt);
9136}
9137
9138/**
9139 * xmlCreateIOParserCtxt:
9140 * @sax: a SAX handler
9141 * @user_data: The user data returned on SAX callbacks
9142 * @ioread: an I/O read function
9143 * @ioclose: an I/O close function
9144 * @ioctx: an I/O handler
9145 * @enc: the charset encoding if known
9146 *
9147 * Create a parser context for using the XML parser with an existing
9148 * I/O stream
9149 *
9150 * Returns the new parser context or NULL
9151 */
9152xmlParserCtxtPtr
9153xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9154 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9155 void *ioctx, xmlCharEncoding enc) {
9156 xmlParserCtxtPtr ctxt;
9157 xmlParserInputPtr inputStream;
9158 xmlParserInputBufferPtr buf;
9159
9160 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9161 if (buf == NULL) return(NULL);
9162
9163 ctxt = xmlNewParserCtxt();
9164 if (ctxt == NULL) {
9165 xmlFree(buf);
9166 return(NULL);
9167 }
9168 if (sax != NULL) {
9169 if (ctxt->sax != &xmlDefaultSAXHandler)
9170 xmlFree(ctxt->sax);
9171 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9172 if (ctxt->sax == NULL) {
9173 xmlFree(buf);
9174 xmlFree(ctxt);
9175 return(NULL);
9176 }
9177 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9178 if (user_data != NULL)
9179 ctxt->userData = user_data;
9180 }
9181
9182 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9183 if (inputStream == NULL) {
9184 xmlFreeParserCtxt(ctxt);
9185 return(NULL);
9186 }
9187 inputPush(ctxt, inputStream);
9188
9189 return(ctxt);
9190}
9191
9192/************************************************************************
9193 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009194 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009195 * *
9196 ************************************************************************/
9197
9198/**
9199 * xmlIOParseDTD:
9200 * @sax: the SAX handler block or NULL
9201 * @input: an Input Buffer
9202 * @enc: the charset encoding if known
9203 *
9204 * Load and parse a DTD
9205 *
9206 * Returns the resulting xmlDtdPtr or NULL in case of error.
9207 * @input will be freed at parsing end.
9208 */
9209
9210xmlDtdPtr
9211xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9212 xmlCharEncoding enc) {
9213 xmlDtdPtr ret = NULL;
9214 xmlParserCtxtPtr ctxt;
9215 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009216 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009217
9218 if (input == NULL)
9219 return(NULL);
9220
9221 ctxt = xmlNewParserCtxt();
9222 if (ctxt == NULL) {
9223 return(NULL);
9224 }
9225
9226 /*
9227 * Set-up the SAX context
9228 */
9229 if (sax != NULL) {
9230 if (ctxt->sax != NULL)
9231 xmlFree(ctxt->sax);
9232 ctxt->sax = sax;
9233 ctxt->userData = NULL;
9234 }
9235
9236 /*
9237 * generate a parser input from the I/O handler
9238 */
9239
9240 pinput = xmlNewIOInputStream(ctxt, input, enc);
9241 if (pinput == NULL) {
9242 if (sax != NULL) ctxt->sax = NULL;
9243 xmlFreeParserCtxt(ctxt);
9244 return(NULL);
9245 }
9246
9247 /*
9248 * plug some encoding conversion routines here.
9249 */
9250 xmlPushInput(ctxt, pinput);
9251
9252 pinput->filename = NULL;
9253 pinput->line = 1;
9254 pinput->col = 1;
9255 pinput->base = ctxt->input->cur;
9256 pinput->cur = ctxt->input->cur;
9257 pinput->free = NULL;
9258
9259 /*
9260 * let's parse that entity knowing it's an external subset.
9261 */
9262 ctxt->inSubset = 2;
9263 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9264 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9265 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009266
9267 if (enc == XML_CHAR_ENCODING_NONE) {
9268 /*
9269 * Get the 4 first bytes and decode the charset
9270 * if enc != XML_CHAR_ENCODING_NONE
9271 * plug some encoding conversion routines.
9272 */
9273 start[0] = RAW;
9274 start[1] = NXT(1);
9275 start[2] = NXT(2);
9276 start[3] = NXT(3);
9277 enc = xmlDetectCharEncoding(start, 4);
9278 if (enc != XML_CHAR_ENCODING_NONE) {
9279 xmlSwitchEncoding(ctxt, enc);
9280 }
9281 }
9282
Owen Taylor3473f882001-02-23 17:55:21 +00009283 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9284
9285 if (ctxt->myDoc != NULL) {
9286 if (ctxt->wellFormed) {
9287 ret = ctxt->myDoc->extSubset;
9288 ctxt->myDoc->extSubset = NULL;
9289 } else {
9290 ret = NULL;
9291 }
9292 xmlFreeDoc(ctxt->myDoc);
9293 ctxt->myDoc = NULL;
9294 }
9295 if (sax != NULL) ctxt->sax = NULL;
9296 xmlFreeParserCtxt(ctxt);
9297
9298 return(ret);
9299}
9300
9301/**
9302 * xmlSAXParseDTD:
9303 * @sax: the SAX handler block
9304 * @ExternalID: a NAME* containing the External ID of the DTD
9305 * @SystemID: a NAME* containing the URL to the DTD
9306 *
9307 * Load and parse an external subset.
9308 *
9309 * Returns the resulting xmlDtdPtr or NULL in case of error.
9310 */
9311
9312xmlDtdPtr
9313xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9314 const xmlChar *SystemID) {
9315 xmlDtdPtr ret = NULL;
9316 xmlParserCtxtPtr ctxt;
9317 xmlParserInputPtr input = NULL;
9318 xmlCharEncoding enc;
9319
9320 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9321
9322 ctxt = xmlNewParserCtxt();
9323 if (ctxt == NULL) {
9324 return(NULL);
9325 }
9326
9327 /*
9328 * Set-up the SAX context
9329 */
9330 if (sax != NULL) {
9331 if (ctxt->sax != NULL)
9332 xmlFree(ctxt->sax);
9333 ctxt->sax = sax;
9334 ctxt->userData = NULL;
9335 }
9336
9337 /*
9338 * Ask the Entity resolver to load the damn thing
9339 */
9340
9341 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9342 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9343 if (input == NULL) {
9344 if (sax != NULL) ctxt->sax = NULL;
9345 xmlFreeParserCtxt(ctxt);
9346 return(NULL);
9347 }
9348
9349 /*
9350 * plug some encoding conversion routines here.
9351 */
9352 xmlPushInput(ctxt, input);
9353 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9354 xmlSwitchEncoding(ctxt, enc);
9355
9356 if (input->filename == NULL)
9357 input->filename = (char *) xmlStrdup(SystemID);
9358 input->line = 1;
9359 input->col = 1;
9360 input->base = ctxt->input->cur;
9361 input->cur = ctxt->input->cur;
9362 input->free = NULL;
9363
9364 /*
9365 * let's parse that entity knowing it's an external subset.
9366 */
9367 ctxt->inSubset = 2;
9368 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9369 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9370 ExternalID, SystemID);
9371 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9372
9373 if (ctxt->myDoc != NULL) {
9374 if (ctxt->wellFormed) {
9375 ret = ctxt->myDoc->extSubset;
9376 ctxt->myDoc->extSubset = NULL;
9377 } else {
9378 ret = NULL;
9379 }
9380 xmlFreeDoc(ctxt->myDoc);
9381 ctxt->myDoc = NULL;
9382 }
9383 if (sax != NULL) ctxt->sax = NULL;
9384 xmlFreeParserCtxt(ctxt);
9385
9386 return(ret);
9387}
9388
9389/**
9390 * xmlParseDTD:
9391 * @ExternalID: a NAME* containing the External ID of the DTD
9392 * @SystemID: a NAME* containing the URL to the DTD
9393 *
9394 * Load and parse an external subset.
9395 *
9396 * Returns the resulting xmlDtdPtr or NULL in case of error.
9397 */
9398
9399xmlDtdPtr
9400xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9401 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9402}
9403
9404/************************************************************************
9405 * *
9406 * Front ends when parsing an Entity *
9407 * *
9408 ************************************************************************/
9409
9410/**
Owen Taylor3473f882001-02-23 17:55:21 +00009411 * xmlParseCtxtExternalEntity:
9412 * @ctx: the existing parsing context
9413 * @URL: the URL for the entity to load
9414 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009415 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009416 *
9417 * Parse an external general entity within an existing parsing context
9418 * An external general parsed entity is well-formed if it matches the
9419 * production labeled extParsedEnt.
9420 *
9421 * [78] extParsedEnt ::= TextDecl? content
9422 *
9423 * Returns 0 if the entity is well formed, -1 in case of args problem and
9424 * the parser error code otherwise
9425 */
9426
9427int
9428xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009429 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009430 xmlParserCtxtPtr ctxt;
9431 xmlDocPtr newDoc;
9432 xmlSAXHandlerPtr oldsax = NULL;
9433 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009434 xmlChar start[4];
9435 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009436
9437 if (ctx->depth > 40) {
9438 return(XML_ERR_ENTITY_LOOP);
9439 }
9440
Daniel Veillardcda96922001-08-21 10:56:31 +00009441 if (lst != NULL)
9442 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009443 if ((URL == NULL) && (ID == NULL))
9444 return(-1);
9445 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9446 return(-1);
9447
9448
9449 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9450 if (ctxt == NULL) return(-1);
9451 ctxt->userData = ctxt;
9452 oldsax = ctxt->sax;
9453 ctxt->sax = ctx->sax;
9454 newDoc = xmlNewDoc(BAD_CAST "1.0");
9455 if (newDoc == NULL) {
9456 xmlFreeParserCtxt(ctxt);
9457 return(-1);
9458 }
9459 if (ctx->myDoc != NULL) {
9460 newDoc->intSubset = ctx->myDoc->intSubset;
9461 newDoc->extSubset = ctx->myDoc->extSubset;
9462 }
9463 if (ctx->myDoc->URL != NULL) {
9464 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9465 }
9466 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9467 if (newDoc->children == NULL) {
9468 ctxt->sax = oldsax;
9469 xmlFreeParserCtxt(ctxt);
9470 newDoc->intSubset = NULL;
9471 newDoc->extSubset = NULL;
9472 xmlFreeDoc(newDoc);
9473 return(-1);
9474 }
9475 nodePush(ctxt, newDoc->children);
9476 if (ctx->myDoc == NULL) {
9477 ctxt->myDoc = newDoc;
9478 } else {
9479 ctxt->myDoc = ctx->myDoc;
9480 newDoc->children->doc = ctx->myDoc;
9481 }
9482
Daniel Veillard87a764e2001-06-20 17:41:10 +00009483 /*
9484 * Get the 4 first bytes and decode the charset
9485 * if enc != XML_CHAR_ENCODING_NONE
9486 * plug some encoding conversion routines.
9487 */
9488 GROW
9489 start[0] = RAW;
9490 start[1] = NXT(1);
9491 start[2] = NXT(2);
9492 start[3] = NXT(3);
9493 enc = xmlDetectCharEncoding(start, 4);
9494 if (enc != XML_CHAR_ENCODING_NONE) {
9495 xmlSwitchEncoding(ctxt, enc);
9496 }
9497
Owen Taylor3473f882001-02-23 17:55:21 +00009498 /*
9499 * Parse a possible text declaration first
9500 */
Owen Taylor3473f882001-02-23 17:55:21 +00009501 if ((RAW == '<') && (NXT(1) == '?') &&
9502 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9503 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9504 xmlParseTextDecl(ctxt);
9505 }
9506
9507 /*
9508 * Doing validity checking on chunk doesn't make sense
9509 */
9510 ctxt->instate = XML_PARSER_CONTENT;
9511 ctxt->validate = ctx->validate;
9512 ctxt->loadsubset = ctx->loadsubset;
9513 ctxt->depth = ctx->depth + 1;
9514 ctxt->replaceEntities = ctx->replaceEntities;
9515 if (ctxt->validate) {
9516 ctxt->vctxt.error = ctx->vctxt.error;
9517 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009518 } else {
9519 ctxt->vctxt.error = NULL;
9520 ctxt->vctxt.warning = NULL;
9521 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009522 ctxt->vctxt.nodeTab = NULL;
9523 ctxt->vctxt.nodeNr = 0;
9524 ctxt->vctxt.nodeMax = 0;
9525 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009526
9527 xmlParseContent(ctxt);
9528
9529 if ((RAW == '<') && (NXT(1) == '/')) {
9530 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9532 ctxt->sax->error(ctxt->userData,
9533 "chunk is not well balanced\n");
9534 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009535 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009536 } else if (RAW != 0) {
9537 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9539 ctxt->sax->error(ctxt->userData,
9540 "extra content at the end of well balanced chunk\n");
9541 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009542 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009543 }
9544 if (ctxt->node != newDoc->children) {
9545 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9547 ctxt->sax->error(ctxt->userData,
9548 "chunk is not well balanced\n");
9549 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009550 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009551 }
9552
9553 if (!ctxt->wellFormed) {
9554 if (ctxt->errNo == 0)
9555 ret = 1;
9556 else
9557 ret = ctxt->errNo;
9558 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009559 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009560 xmlNodePtr cur;
9561
9562 /*
9563 * Return the newly created nodeset after unlinking it from
9564 * they pseudo parent.
9565 */
9566 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009567 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009568 while (cur != NULL) {
9569 cur->parent = NULL;
9570 cur = cur->next;
9571 }
9572 newDoc->children->children = NULL;
9573 }
9574 ret = 0;
9575 }
9576 ctxt->sax = oldsax;
9577 xmlFreeParserCtxt(ctxt);
9578 newDoc->intSubset = NULL;
9579 newDoc->extSubset = NULL;
9580 xmlFreeDoc(newDoc);
9581
9582 return(ret);
9583}
9584
9585/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009586 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009587 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009588 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009589 * @sax: the SAX handler bloc (possibly NULL)
9590 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9591 * @depth: Used for loop detection, use 0
9592 * @URL: the URL for the entity to load
9593 * @ID: the System ID for the entity to load
9594 * @list: the return value for the set of parsed nodes
9595 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009596 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009597 *
9598 * Returns 0 if the entity is well formed, -1 in case of args problem and
9599 * the parser error code otherwise
9600 */
9601
Daniel Veillard257d9102001-05-08 10:41:44 +00009602static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009603xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9604 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009605 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009606 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009607 xmlParserCtxtPtr ctxt;
9608 xmlDocPtr newDoc;
9609 xmlSAXHandlerPtr oldsax = NULL;
9610 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009611 xmlChar start[4];
9612 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009613
9614 if (depth > 40) {
9615 return(XML_ERR_ENTITY_LOOP);
9616 }
9617
9618
9619
9620 if (list != NULL)
9621 *list = NULL;
9622 if ((URL == NULL) && (ID == NULL))
9623 return(-1);
9624 if (doc == NULL) /* @@ relax but check for dereferences */
9625 return(-1);
9626
9627
9628 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9629 if (ctxt == NULL) return(-1);
9630 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009631 if (oldctxt != NULL) {
9632 ctxt->_private = oldctxt->_private;
9633 ctxt->loadsubset = oldctxt->loadsubset;
9634 ctxt->validate = oldctxt->validate;
9635 ctxt->external = oldctxt->external;
9636 } else {
9637 /*
9638 * Doing validity checking on chunk without context
9639 * doesn't make sense
9640 */
9641 ctxt->_private = NULL;
9642 ctxt->validate = 0;
9643 ctxt->external = 2;
9644 ctxt->loadsubset = 0;
9645 }
Owen Taylor3473f882001-02-23 17:55:21 +00009646 if (sax != NULL) {
9647 oldsax = ctxt->sax;
9648 ctxt->sax = sax;
9649 if (user_data != NULL)
9650 ctxt->userData = user_data;
9651 }
9652 newDoc = xmlNewDoc(BAD_CAST "1.0");
9653 if (newDoc == NULL) {
9654 xmlFreeParserCtxt(ctxt);
9655 return(-1);
9656 }
9657 if (doc != NULL) {
9658 newDoc->intSubset = doc->intSubset;
9659 newDoc->extSubset = doc->extSubset;
9660 }
9661 if (doc->URL != NULL) {
9662 newDoc->URL = xmlStrdup(doc->URL);
9663 }
9664 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9665 if (newDoc->children == NULL) {
9666 if (sax != NULL)
9667 ctxt->sax = oldsax;
9668 xmlFreeParserCtxt(ctxt);
9669 newDoc->intSubset = NULL;
9670 newDoc->extSubset = NULL;
9671 xmlFreeDoc(newDoc);
9672 return(-1);
9673 }
9674 nodePush(ctxt, newDoc->children);
9675 if (doc == NULL) {
9676 ctxt->myDoc = newDoc;
9677 } else {
9678 ctxt->myDoc = doc;
9679 newDoc->children->doc = doc;
9680 }
9681
Daniel Veillard87a764e2001-06-20 17:41:10 +00009682 /*
9683 * Get the 4 first bytes and decode the charset
9684 * if enc != XML_CHAR_ENCODING_NONE
9685 * plug some encoding conversion routines.
9686 */
9687 GROW;
9688 start[0] = RAW;
9689 start[1] = NXT(1);
9690 start[2] = NXT(2);
9691 start[3] = NXT(3);
9692 enc = xmlDetectCharEncoding(start, 4);
9693 if (enc != XML_CHAR_ENCODING_NONE) {
9694 xmlSwitchEncoding(ctxt, enc);
9695 }
9696
Owen Taylor3473f882001-02-23 17:55:21 +00009697 /*
9698 * Parse a possible text declaration first
9699 */
Owen Taylor3473f882001-02-23 17:55:21 +00009700 if ((RAW == '<') && (NXT(1) == '?') &&
9701 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9702 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9703 xmlParseTextDecl(ctxt);
9704 }
9705
Owen Taylor3473f882001-02-23 17:55:21 +00009706 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009707 ctxt->depth = depth;
9708
9709 xmlParseContent(ctxt);
9710
Daniel Veillard561b7f82002-03-20 21:55:57 +00009711 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009712 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9714 ctxt->sax->error(ctxt->userData,
9715 "chunk is not well balanced\n");
9716 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009717 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009718 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009719 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9721 ctxt->sax->error(ctxt->userData,
9722 "extra content at the end of well balanced chunk\n");
9723 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009724 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009725 }
9726 if (ctxt->node != newDoc->children) {
9727 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9728 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9729 ctxt->sax->error(ctxt->userData,
9730 "chunk is not well balanced\n");
9731 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009732 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009733 }
9734
9735 if (!ctxt->wellFormed) {
9736 if (ctxt->errNo == 0)
9737 ret = 1;
9738 else
9739 ret = ctxt->errNo;
9740 } else {
9741 if (list != NULL) {
9742 xmlNodePtr cur;
9743
9744 /*
9745 * Return the newly created nodeset after unlinking it from
9746 * they pseudo parent.
9747 */
9748 cur = newDoc->children->children;
9749 *list = cur;
9750 while (cur != NULL) {
9751 cur->parent = NULL;
9752 cur = cur->next;
9753 }
9754 newDoc->children->children = NULL;
9755 }
9756 ret = 0;
9757 }
9758 if (sax != NULL)
9759 ctxt->sax = oldsax;
9760 xmlFreeParserCtxt(ctxt);
9761 newDoc->intSubset = NULL;
9762 newDoc->extSubset = NULL;
9763 xmlFreeDoc(newDoc);
9764
9765 return(ret);
9766}
9767
9768/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009769 * xmlParseExternalEntity:
9770 * @doc: the document the chunk pertains to
9771 * @sax: the SAX handler bloc (possibly NULL)
9772 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9773 * @depth: Used for loop detection, use 0
9774 * @URL: the URL for the entity to load
9775 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009776 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009777 *
9778 * Parse an external general entity
9779 * An external general parsed entity is well-formed if it matches the
9780 * production labeled extParsedEnt.
9781 *
9782 * [78] extParsedEnt ::= TextDecl? content
9783 *
9784 * Returns 0 if the entity is well formed, -1 in case of args problem and
9785 * the parser error code otherwise
9786 */
9787
9788int
9789xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009790 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009791 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009792 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009793}
9794
9795/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009796 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009797 * @doc: the document the chunk pertains to
9798 * @sax: the SAX handler bloc (possibly NULL)
9799 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9800 * @depth: Used for loop detection, use 0
9801 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009802 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009803 *
9804 * Parse a well-balanced chunk of an XML document
9805 * called by the parser
9806 * The allowed sequence for the Well Balanced Chunk is the one defined by
9807 * the content production in the XML grammar:
9808 *
9809 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9810 *
9811 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9812 * the parser error code otherwise
9813 */
9814
9815int
9816xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009817 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009818 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9819 depth, string, lst, 0 );
9820}
9821
9822/**
Daniel Veillard328f48c2002-11-15 15:24:34 +00009823 * xmlParseBalancedChunkMemoryInternal:
9824 * @oldctxt: the existing parsing context
9825 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9826 * @user_data: the user data field for the parser context
9827 * @lst: the return value for the set of parsed nodes
9828 *
9829 *
9830 * Parse a well-balanced chunk of an XML document
9831 * called by the parser
9832 * The allowed sequence for the Well Balanced Chunk is the one defined by
9833 * the content production in the XML grammar:
9834 *
9835 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9836 *
9837 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9838 * the parser error code otherwise
9839 *
9840 * In case recover is set to 1, the nodelist will not be empty even if
9841 * the parsed chunk is not well balanced.
9842 */
9843static int
9844xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9845 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9846 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009847 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009848 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009849 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009850 int size;
9851 int ret = 0;
9852
9853 if (oldctxt->depth > 40) {
9854 return(XML_ERR_ENTITY_LOOP);
9855 }
9856
9857
9858 if (lst != NULL)
9859 *lst = NULL;
9860 if (string == NULL)
9861 return(-1);
9862
9863 size = xmlStrlen(string);
9864
9865 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9866 if (ctxt == NULL) return(-1);
9867 if (user_data != NULL)
9868 ctxt->userData = user_data;
9869 else
9870 ctxt->userData = ctxt;
9871
9872 oldsax = ctxt->sax;
9873 ctxt->sax = oldctxt->sax;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009874 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +00009875 newDoc = xmlNewDoc(BAD_CAST "1.0");
9876 if (newDoc == NULL) {
9877 ctxt->sax = oldsax;
9878 xmlFreeParserCtxt(ctxt);
9879 return(-1);
9880 }
Daniel Veillard328f48c2002-11-15 15:24:34 +00009881 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009882 } else {
9883 ctxt->myDoc = oldctxt->myDoc;
9884 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009885 }
Daniel Veillard9bc53102002-11-25 13:20:04 +00009886 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +00009887 BAD_CAST "pseudoroot", NULL);
9888 if (ctxt->myDoc->children == NULL) {
9889 ctxt->sax = oldsax;
9890 xmlFreeParserCtxt(ctxt);
9891 if (newDoc != NULL)
9892 xmlFreeDoc(newDoc);
9893 return(-1);
9894 }
9895 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009896 ctxt->instate = XML_PARSER_CONTENT;
9897 ctxt->depth = oldctxt->depth + 1;
9898
9899 /*
9900 * Doing validity checking on chunk doesn't make sense
9901 */
9902 ctxt->validate = 0;
9903 ctxt->loadsubset = oldctxt->loadsubset;
9904
Daniel Veillard68e9e742002-11-16 15:35:11 +00009905 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009906 if ((RAW == '<') && (NXT(1) == '/')) {
9907 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9909 ctxt->sax->error(ctxt->userData,
9910 "chunk is not well balanced\n");
9911 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009912 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009913 } else if (RAW != 0) {
9914 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9916 ctxt->sax->error(ctxt->userData,
9917 "extra content at the end of well balanced chunk\n");
9918 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009919 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009920 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009921 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +00009922 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9924 ctxt->sax->error(ctxt->userData,
9925 "chunk is not well balanced\n");
9926 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009927 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009928 }
9929
9930 if (!ctxt->wellFormed) {
9931 if (ctxt->errNo == 0)
9932 ret = 1;
9933 else
9934 ret = ctxt->errNo;
9935 } else {
9936 ret = 0;
9937 }
9938
9939 if ((lst != NULL) && (ret == 0)) {
9940 xmlNodePtr cur;
9941
9942 /*
9943 * Return the newly created nodeset after unlinking it from
9944 * they pseudo parent.
9945 */
Daniel Veillard68e9e742002-11-16 15:35:11 +00009946 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009947 *lst = cur;
9948 while (cur != NULL) {
9949 cur->parent = NULL;
9950 cur = cur->next;
9951 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009952 ctxt->myDoc->children->children = NULL;
9953 }
9954 if (ctxt->myDoc != NULL) {
9955 xmlFreeNode(ctxt->myDoc->children);
9956 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009957 }
9958
9959 ctxt->sax = oldsax;
9960 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +00009961 if (newDoc != NULL)
9962 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009963
9964 return(ret);
9965}
9966
9967/**
Daniel Veillard58e44c92002-08-02 22:19:49 +00009968 * xmlParseBalancedChunkMemoryRecover:
9969 * @doc: the document the chunk pertains to
9970 * @sax: the SAX handler bloc (possibly NULL)
9971 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9972 * @depth: Used for loop detection, use 0
9973 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9974 * @lst: the return value for the set of parsed nodes
9975 * @recover: return nodes even if the data is broken (use 0)
9976 *
9977 *
9978 * Parse a well-balanced chunk of an XML document
9979 * called by the parser
9980 * The allowed sequence for the Well Balanced Chunk is the one defined by
9981 * the content production in the XML grammar:
9982 *
9983 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9984 *
9985 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9986 * the parser error code otherwise
9987 *
9988 * In case recover is set to 1, the nodelist will not be empty even if
9989 * the parsed chunk is not well balanced.
9990 */
9991int
9992xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9993 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
9994 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +00009995 xmlParserCtxtPtr ctxt;
9996 xmlDocPtr newDoc;
9997 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +00009998 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +00009999 int size;
10000 int ret = 0;
10001
10002 if (depth > 40) {
10003 return(XML_ERR_ENTITY_LOOP);
10004 }
10005
10006
Daniel Veillardcda96922001-08-21 10:56:31 +000010007 if (lst != NULL)
10008 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010009 if (string == NULL)
10010 return(-1);
10011
10012 size = xmlStrlen(string);
10013
10014 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10015 if (ctxt == NULL) return(-1);
10016 ctxt->userData = ctxt;
10017 if (sax != NULL) {
10018 oldsax = ctxt->sax;
10019 ctxt->sax = sax;
10020 if (user_data != NULL)
10021 ctxt->userData = user_data;
10022 }
10023 newDoc = xmlNewDoc(BAD_CAST "1.0");
10024 if (newDoc == NULL) {
10025 xmlFreeParserCtxt(ctxt);
10026 return(-1);
10027 }
10028 if (doc != NULL) {
10029 newDoc->intSubset = doc->intSubset;
10030 newDoc->extSubset = doc->extSubset;
10031 }
10032 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10033 if (newDoc->children == NULL) {
10034 if (sax != NULL)
10035 ctxt->sax = oldsax;
10036 xmlFreeParserCtxt(ctxt);
10037 newDoc->intSubset = NULL;
10038 newDoc->extSubset = NULL;
10039 xmlFreeDoc(newDoc);
10040 return(-1);
10041 }
10042 nodePush(ctxt, newDoc->children);
10043 if (doc == NULL) {
10044 ctxt->myDoc = newDoc;
10045 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010046 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010047 newDoc->children->doc = doc;
10048 }
10049 ctxt->instate = XML_PARSER_CONTENT;
10050 ctxt->depth = depth;
10051
10052 /*
10053 * Doing validity checking on chunk doesn't make sense
10054 */
10055 ctxt->validate = 0;
10056 ctxt->loadsubset = 0;
10057
Daniel Veillardb39bc392002-10-26 19:29:51 +000010058 if ( doc != NULL ){
10059 content = doc->children;
10060 doc->children = NULL;
10061 xmlParseContent(ctxt);
10062 doc->children = content;
10063 }
10064 else {
10065 xmlParseContent(ctxt);
10066 }
Owen Taylor3473f882001-02-23 17:55:21 +000010067 if ((RAW == '<') && (NXT(1) == '/')) {
10068 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10069 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10070 ctxt->sax->error(ctxt->userData,
10071 "chunk is not well balanced\n");
10072 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010073 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010074 } else if (RAW != 0) {
10075 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10076 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10077 ctxt->sax->error(ctxt->userData,
10078 "extra content at the end of well balanced chunk\n");
10079 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010080 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010081 }
10082 if (ctxt->node != newDoc->children) {
10083 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10085 ctxt->sax->error(ctxt->userData,
10086 "chunk is not well balanced\n");
10087 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010088 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010089 }
10090
10091 if (!ctxt->wellFormed) {
10092 if (ctxt->errNo == 0)
10093 ret = 1;
10094 else
10095 ret = ctxt->errNo;
10096 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010097 ret = 0;
10098 }
10099
10100 if (lst != NULL && (ret == 0 || recover == 1)) {
10101 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010102
10103 /*
10104 * Return the newly created nodeset after unlinking it from
10105 * they pseudo parent.
10106 */
10107 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010108 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010109 while (cur != NULL) {
10110 cur->parent = NULL;
10111 cur = cur->next;
10112 }
10113 newDoc->children->children = NULL;
10114 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010115
Owen Taylor3473f882001-02-23 17:55:21 +000010116 if (sax != NULL)
10117 ctxt->sax = oldsax;
10118 xmlFreeParserCtxt(ctxt);
10119 newDoc->intSubset = NULL;
10120 newDoc->extSubset = NULL;
10121 xmlFreeDoc(newDoc);
10122
10123 return(ret);
10124}
10125
10126/**
10127 * xmlSAXParseEntity:
10128 * @sax: the SAX handler block
10129 * @filename: the filename
10130 *
10131 * parse an XML external entity out of context and build a tree.
10132 * It use the given SAX function block to handle the parsing callback.
10133 * If sax is NULL, fallback to the default DOM tree building routines.
10134 *
10135 * [78] extParsedEnt ::= TextDecl? content
10136 *
10137 * This correspond to a "Well Balanced" chunk
10138 *
10139 * Returns the resulting document tree
10140 */
10141
10142xmlDocPtr
10143xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10144 xmlDocPtr ret;
10145 xmlParserCtxtPtr ctxt;
10146 char *directory = NULL;
10147
10148 ctxt = xmlCreateFileParserCtxt(filename);
10149 if (ctxt == NULL) {
10150 return(NULL);
10151 }
10152 if (sax != NULL) {
10153 if (ctxt->sax != NULL)
10154 xmlFree(ctxt->sax);
10155 ctxt->sax = sax;
10156 ctxt->userData = NULL;
10157 }
10158
10159 if ((ctxt->directory == NULL) && (directory == NULL))
10160 directory = xmlParserGetDirectory(filename);
10161
10162 xmlParseExtParsedEnt(ctxt);
10163
10164 if (ctxt->wellFormed)
10165 ret = ctxt->myDoc;
10166 else {
10167 ret = NULL;
10168 xmlFreeDoc(ctxt->myDoc);
10169 ctxt->myDoc = NULL;
10170 }
10171 if (sax != NULL)
10172 ctxt->sax = NULL;
10173 xmlFreeParserCtxt(ctxt);
10174
10175 return(ret);
10176}
10177
10178/**
10179 * xmlParseEntity:
10180 * @filename: the filename
10181 *
10182 * parse an XML external entity out of context and build a tree.
10183 *
10184 * [78] extParsedEnt ::= TextDecl? content
10185 *
10186 * This correspond to a "Well Balanced" chunk
10187 *
10188 * Returns the resulting document tree
10189 */
10190
10191xmlDocPtr
10192xmlParseEntity(const char *filename) {
10193 return(xmlSAXParseEntity(NULL, filename));
10194}
10195
10196/**
10197 * xmlCreateEntityParserCtxt:
10198 * @URL: the entity URL
10199 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010200 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010201 *
10202 * Create a parser context for an external entity
10203 * Automatic support for ZLIB/Compress compressed document is provided
10204 * by default if found at compile-time.
10205 *
10206 * Returns the new parser context or NULL
10207 */
10208xmlParserCtxtPtr
10209xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10210 const xmlChar *base) {
10211 xmlParserCtxtPtr ctxt;
10212 xmlParserInputPtr inputStream;
10213 char *directory = NULL;
10214 xmlChar *uri;
10215
10216 ctxt = xmlNewParserCtxt();
10217 if (ctxt == NULL) {
10218 return(NULL);
10219 }
10220
10221 uri = xmlBuildURI(URL, base);
10222
10223 if (uri == NULL) {
10224 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10225 if (inputStream == NULL) {
10226 xmlFreeParserCtxt(ctxt);
10227 return(NULL);
10228 }
10229
10230 inputPush(ctxt, inputStream);
10231
10232 if ((ctxt->directory == NULL) && (directory == NULL))
10233 directory = xmlParserGetDirectory((char *)URL);
10234 if ((ctxt->directory == NULL) && (directory != NULL))
10235 ctxt->directory = directory;
10236 } else {
10237 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10238 if (inputStream == NULL) {
10239 xmlFree(uri);
10240 xmlFreeParserCtxt(ctxt);
10241 return(NULL);
10242 }
10243
10244 inputPush(ctxt, inputStream);
10245
10246 if ((ctxt->directory == NULL) && (directory == NULL))
10247 directory = xmlParserGetDirectory((char *)uri);
10248 if ((ctxt->directory == NULL) && (directory != NULL))
10249 ctxt->directory = directory;
10250 xmlFree(uri);
10251 }
10252
10253 return(ctxt);
10254}
10255
10256/************************************************************************
10257 * *
10258 * Front ends when parsing from a file *
10259 * *
10260 ************************************************************************/
10261
10262/**
10263 * xmlCreateFileParserCtxt:
10264 * @filename: the filename
10265 *
10266 * Create a parser context for a file content.
10267 * Automatic support for ZLIB/Compress compressed document is provided
10268 * by default if found at compile-time.
10269 *
10270 * Returns the new parser context or NULL
10271 */
10272xmlParserCtxtPtr
10273xmlCreateFileParserCtxt(const char *filename)
10274{
10275 xmlParserCtxtPtr ctxt;
10276 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010277 char *directory = NULL;
Daniel Veillardf4862f02002-09-10 11:13:43 +000010278 xmlChar *normalized;
Owen Taylor3473f882001-02-23 17:55:21 +000010279
Owen Taylor3473f882001-02-23 17:55:21 +000010280 ctxt = xmlNewParserCtxt();
10281 if (ctxt == NULL) {
10282 if (xmlDefaultSAXHandler.error != NULL) {
10283 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10284 }
10285 return(NULL);
10286 }
10287
Daniel Veillardf4862f02002-09-10 11:13:43 +000010288 normalized = xmlNormalizeWindowsPath((const xmlChar *) filename);
10289 if (normalized == NULL) {
10290 xmlFreeParserCtxt(ctxt);
10291 return(NULL);
10292 }
10293 inputStream = xmlLoadExternalEntity((char *) normalized, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010294 if (inputStream == NULL) {
10295 xmlFreeParserCtxt(ctxt);
Daniel Veillardf4862f02002-09-10 11:13:43 +000010296 xmlFree(normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010297 return(NULL);
10298 }
10299
Owen Taylor3473f882001-02-23 17:55:21 +000010300 inputPush(ctxt, inputStream);
10301 if ((ctxt->directory == NULL) && (directory == NULL))
Daniel Veillardf4862f02002-09-10 11:13:43 +000010302 directory = xmlParserGetDirectory((char *) normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010303 if ((ctxt->directory == NULL) && (directory != NULL))
10304 ctxt->directory = directory;
10305
Daniel Veillardf4862f02002-09-10 11:13:43 +000010306 xmlFree(normalized);
10307
Owen Taylor3473f882001-02-23 17:55:21 +000010308 return(ctxt);
10309}
10310
10311/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010312 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010313 * @sax: the SAX handler block
10314 * @filename: the filename
10315 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10316 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010317 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010318 *
10319 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10320 * compressed document is provided by default if found at compile-time.
10321 * It use the given SAX function block to handle the parsing callback.
10322 * If sax is NULL, fallback to the default DOM tree building routines.
10323 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010324 * User data (void *) is stored within the parser context in the
10325 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010326 *
Owen Taylor3473f882001-02-23 17:55:21 +000010327 * Returns the resulting document tree
10328 */
10329
10330xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010331xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10332 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010333 xmlDocPtr ret;
10334 xmlParserCtxtPtr ctxt;
10335 char *directory = NULL;
10336
Daniel Veillard635ef722001-10-29 11:48:19 +000010337 xmlInitParser();
10338
Owen Taylor3473f882001-02-23 17:55:21 +000010339 ctxt = xmlCreateFileParserCtxt(filename);
10340 if (ctxt == NULL) {
10341 return(NULL);
10342 }
10343 if (sax != NULL) {
10344 if (ctxt->sax != NULL)
10345 xmlFree(ctxt->sax);
10346 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010347 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010348 if (data!=NULL) {
10349 ctxt->_private=data;
10350 }
Owen Taylor3473f882001-02-23 17:55:21 +000010351
10352 if ((ctxt->directory == NULL) && (directory == NULL))
10353 directory = xmlParserGetDirectory(filename);
10354 if ((ctxt->directory == NULL) && (directory != NULL))
10355 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10356
Daniel Veillarddad3f682002-11-17 16:47:27 +000010357 ctxt->recovery = recovery;
10358
Owen Taylor3473f882001-02-23 17:55:21 +000010359 xmlParseDocument(ctxt);
10360
10361 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10362 else {
10363 ret = NULL;
10364 xmlFreeDoc(ctxt->myDoc);
10365 ctxt->myDoc = NULL;
10366 }
10367 if (sax != NULL)
10368 ctxt->sax = NULL;
10369 xmlFreeParserCtxt(ctxt);
10370
10371 return(ret);
10372}
10373
10374/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010375 * xmlSAXParseFile:
10376 * @sax: the SAX handler block
10377 * @filename: the filename
10378 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10379 * documents
10380 *
10381 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10382 * compressed document is provided by default if found at compile-time.
10383 * It use the given SAX function block to handle the parsing callback.
10384 * If sax is NULL, fallback to the default DOM tree building routines.
10385 *
10386 * Returns the resulting document tree
10387 */
10388
10389xmlDocPtr
10390xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10391 int recovery) {
10392 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10393}
10394
10395/**
Owen Taylor3473f882001-02-23 17:55:21 +000010396 * xmlRecoverDoc:
10397 * @cur: a pointer to an array of xmlChar
10398 *
10399 * parse an XML in-memory document and build a tree.
10400 * In the case the document is not Well Formed, a tree is built anyway
10401 *
10402 * Returns the resulting document tree
10403 */
10404
10405xmlDocPtr
10406xmlRecoverDoc(xmlChar *cur) {
10407 return(xmlSAXParseDoc(NULL, cur, 1));
10408}
10409
10410/**
10411 * xmlParseFile:
10412 * @filename: the filename
10413 *
10414 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10415 * compressed document is provided by default if found at compile-time.
10416 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010417 * Returns the resulting document tree if the file was wellformed,
10418 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010419 */
10420
10421xmlDocPtr
10422xmlParseFile(const char *filename) {
10423 return(xmlSAXParseFile(NULL, filename, 0));
10424}
10425
10426/**
10427 * xmlRecoverFile:
10428 * @filename: the filename
10429 *
10430 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10431 * compressed document is provided by default if found at compile-time.
10432 * In the case the document is not Well Formed, a tree is built anyway
10433 *
10434 * Returns the resulting document tree
10435 */
10436
10437xmlDocPtr
10438xmlRecoverFile(const char *filename) {
10439 return(xmlSAXParseFile(NULL, filename, 1));
10440}
10441
10442
10443/**
10444 * xmlSetupParserForBuffer:
10445 * @ctxt: an XML parser context
10446 * @buffer: a xmlChar * buffer
10447 * @filename: a file name
10448 *
10449 * Setup the parser context to parse a new buffer; Clears any prior
10450 * contents from the parser context. The buffer parameter must not be
10451 * NULL, but the filename parameter can be
10452 */
10453void
10454xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10455 const char* filename)
10456{
10457 xmlParserInputPtr input;
10458
10459 input = xmlNewInputStream(ctxt);
10460 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010461 xmlGenericError(xmlGenericErrorContext,
10462 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010463 xmlFree(ctxt);
10464 return;
10465 }
10466
10467 xmlClearParserCtxt(ctxt);
10468 if (filename != NULL)
10469 input->filename = xmlMemStrdup(filename);
10470 input->base = buffer;
10471 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010472 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010473 inputPush(ctxt, input);
10474}
10475
10476/**
10477 * xmlSAXUserParseFile:
10478 * @sax: a SAX handler
10479 * @user_data: The user data returned on SAX callbacks
10480 * @filename: a file name
10481 *
10482 * parse an XML file and call the given SAX handler routines.
10483 * Automatic support for ZLIB/Compress compressed document is provided
10484 *
10485 * Returns 0 in case of success or a error number otherwise
10486 */
10487int
10488xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10489 const char *filename) {
10490 int ret = 0;
10491 xmlParserCtxtPtr ctxt;
10492
10493 ctxt = xmlCreateFileParserCtxt(filename);
10494 if (ctxt == NULL) return -1;
10495 if (ctxt->sax != &xmlDefaultSAXHandler)
10496 xmlFree(ctxt->sax);
10497 ctxt->sax = sax;
10498 if (user_data != NULL)
10499 ctxt->userData = user_data;
10500
10501 xmlParseDocument(ctxt);
10502
10503 if (ctxt->wellFormed)
10504 ret = 0;
10505 else {
10506 if (ctxt->errNo != 0)
10507 ret = ctxt->errNo;
10508 else
10509 ret = -1;
10510 }
10511 if (sax != NULL)
10512 ctxt->sax = NULL;
10513 xmlFreeParserCtxt(ctxt);
10514
10515 return ret;
10516}
10517
10518/************************************************************************
10519 * *
10520 * Front ends when parsing from memory *
10521 * *
10522 ************************************************************************/
10523
10524/**
10525 * xmlCreateMemoryParserCtxt:
10526 * @buffer: a pointer to a char array
10527 * @size: the size of the array
10528 *
10529 * Create a parser context for an XML in-memory document.
10530 *
10531 * Returns the new parser context or NULL
10532 */
10533xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010534xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010535 xmlParserCtxtPtr ctxt;
10536 xmlParserInputPtr input;
10537 xmlParserInputBufferPtr buf;
10538
10539 if (buffer == NULL)
10540 return(NULL);
10541 if (size <= 0)
10542 return(NULL);
10543
10544 ctxt = xmlNewParserCtxt();
10545 if (ctxt == NULL)
10546 return(NULL);
10547
10548 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010549 if (buf == NULL) {
10550 xmlFreeParserCtxt(ctxt);
10551 return(NULL);
10552 }
Owen Taylor3473f882001-02-23 17:55:21 +000010553
10554 input = xmlNewInputStream(ctxt);
10555 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010556 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010557 xmlFreeParserCtxt(ctxt);
10558 return(NULL);
10559 }
10560
10561 input->filename = NULL;
10562 input->buf = buf;
10563 input->base = input->buf->buffer->content;
10564 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010565 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010566
10567 inputPush(ctxt, input);
10568 return(ctxt);
10569}
10570
10571/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010572 * xmlSAXParseMemoryWithData:
10573 * @sax: the SAX handler block
10574 * @buffer: an pointer to a char array
10575 * @size: the size of the array
10576 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10577 * documents
10578 * @data: the userdata
10579 *
10580 * parse an XML in-memory block and use the given SAX function block
10581 * to handle the parsing callback. If sax is NULL, fallback to the default
10582 * DOM tree building routines.
10583 *
10584 * User data (void *) is stored within the parser context in the
10585 * context's _private member, so it is available nearly everywhere in libxml
10586 *
10587 * Returns the resulting document tree
10588 */
10589
10590xmlDocPtr
10591xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10592 int size, int recovery, void *data) {
10593 xmlDocPtr ret;
10594 xmlParserCtxtPtr ctxt;
10595
10596 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10597 if (ctxt == NULL) return(NULL);
10598 if (sax != NULL) {
10599 if (ctxt->sax != NULL)
10600 xmlFree(ctxt->sax);
10601 ctxt->sax = sax;
10602 }
10603 if (data!=NULL) {
10604 ctxt->_private=data;
10605 }
10606
10607 xmlParseDocument(ctxt);
10608
10609 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10610 else {
10611 ret = NULL;
10612 xmlFreeDoc(ctxt->myDoc);
10613 ctxt->myDoc = NULL;
10614 }
10615 if (sax != NULL)
10616 ctxt->sax = NULL;
10617 xmlFreeParserCtxt(ctxt);
10618
10619 return(ret);
10620}
10621
10622/**
Owen Taylor3473f882001-02-23 17:55:21 +000010623 * xmlSAXParseMemory:
10624 * @sax: the SAX handler block
10625 * @buffer: an pointer to a char array
10626 * @size: the size of the array
10627 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10628 * documents
10629 *
10630 * parse an XML in-memory block and use the given SAX function block
10631 * to handle the parsing callback. If sax is NULL, fallback to the default
10632 * DOM tree building routines.
10633 *
10634 * Returns the resulting document tree
10635 */
10636xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010637xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10638 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010639 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010640}
10641
10642/**
10643 * xmlParseMemory:
10644 * @buffer: an pointer to a char array
10645 * @size: the size of the array
10646 *
10647 * parse an XML in-memory block and build a tree.
10648 *
10649 * Returns the resulting document tree
10650 */
10651
Daniel Veillard50822cb2001-07-26 20:05:51 +000010652xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010653 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10654}
10655
10656/**
10657 * xmlRecoverMemory:
10658 * @buffer: an pointer to a char array
10659 * @size: the size of the array
10660 *
10661 * parse an XML in-memory block and build a tree.
10662 * In the case the document is not Well Formed, a tree is built anyway
10663 *
10664 * Returns the resulting document tree
10665 */
10666
Daniel Veillard50822cb2001-07-26 20:05:51 +000010667xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010668 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10669}
10670
10671/**
10672 * xmlSAXUserParseMemory:
10673 * @sax: a SAX handler
10674 * @user_data: The user data returned on SAX callbacks
10675 * @buffer: an in-memory XML document input
10676 * @size: the length of the XML document in bytes
10677 *
10678 * A better SAX parsing routine.
10679 * parse an XML in-memory buffer and call the given SAX handler routines.
10680 *
10681 * Returns 0 in case of success or a error number otherwise
10682 */
10683int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010684 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010685 int ret = 0;
10686 xmlParserCtxtPtr ctxt;
10687 xmlSAXHandlerPtr oldsax = NULL;
10688
Daniel Veillard9e923512002-08-14 08:48:52 +000010689 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010690 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10691 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010692 oldsax = ctxt->sax;
10693 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010694 if (user_data != NULL)
10695 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010696
10697 xmlParseDocument(ctxt);
10698
10699 if (ctxt->wellFormed)
10700 ret = 0;
10701 else {
10702 if (ctxt->errNo != 0)
10703 ret = ctxt->errNo;
10704 else
10705 ret = -1;
10706 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010707 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010708 xmlFreeParserCtxt(ctxt);
10709
10710 return ret;
10711}
10712
10713/**
10714 * xmlCreateDocParserCtxt:
10715 * @cur: a pointer to an array of xmlChar
10716 *
10717 * Creates a parser context for an XML in-memory document.
10718 *
10719 * Returns the new parser context or NULL
10720 */
10721xmlParserCtxtPtr
10722xmlCreateDocParserCtxt(xmlChar *cur) {
10723 int len;
10724
10725 if (cur == NULL)
10726 return(NULL);
10727 len = xmlStrlen(cur);
10728 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10729}
10730
10731/**
10732 * xmlSAXParseDoc:
10733 * @sax: the SAX handler block
10734 * @cur: a pointer to an array of xmlChar
10735 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10736 * documents
10737 *
10738 * parse an XML in-memory document and build a tree.
10739 * It use the given SAX function block to handle the parsing callback.
10740 * If sax is NULL, fallback to the default DOM tree building routines.
10741 *
10742 * Returns the resulting document tree
10743 */
10744
10745xmlDocPtr
10746xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10747 xmlDocPtr ret;
10748 xmlParserCtxtPtr ctxt;
10749
10750 if (cur == NULL) return(NULL);
10751
10752
10753 ctxt = xmlCreateDocParserCtxt(cur);
10754 if (ctxt == NULL) return(NULL);
10755 if (sax != NULL) {
10756 ctxt->sax = sax;
10757 ctxt->userData = NULL;
10758 }
10759
10760 xmlParseDocument(ctxt);
10761 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10762 else {
10763 ret = NULL;
10764 xmlFreeDoc(ctxt->myDoc);
10765 ctxt->myDoc = NULL;
10766 }
10767 if (sax != NULL)
10768 ctxt->sax = NULL;
10769 xmlFreeParserCtxt(ctxt);
10770
10771 return(ret);
10772}
10773
10774/**
10775 * xmlParseDoc:
10776 * @cur: a pointer to an array of xmlChar
10777 *
10778 * parse an XML in-memory document and build a tree.
10779 *
10780 * Returns the resulting document tree
10781 */
10782
10783xmlDocPtr
10784xmlParseDoc(xmlChar *cur) {
10785 return(xmlSAXParseDoc(NULL, cur, 0));
10786}
10787
Daniel Veillard8107a222002-01-13 14:10:10 +000010788/************************************************************************
10789 * *
10790 * Specific function to keep track of entities references *
10791 * and used by the XSLT debugger *
10792 * *
10793 ************************************************************************/
10794
10795static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10796
10797/**
10798 * xmlAddEntityReference:
10799 * @ent : A valid entity
10800 * @firstNode : A valid first node for children of entity
10801 * @lastNode : A valid last node of children entity
10802 *
10803 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10804 */
10805static void
10806xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10807 xmlNodePtr lastNode)
10808{
10809 if (xmlEntityRefFunc != NULL) {
10810 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10811 }
10812}
10813
10814
10815/**
10816 * xmlSetEntityReferenceFunc:
10817 * @func : A valid function
10818 *
10819 * Set the function to call call back when a xml reference has been made
10820 */
10821void
10822xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10823{
10824 xmlEntityRefFunc = func;
10825}
Owen Taylor3473f882001-02-23 17:55:21 +000010826
10827/************************************************************************
10828 * *
10829 * Miscellaneous *
10830 * *
10831 ************************************************************************/
10832
10833#ifdef LIBXML_XPATH_ENABLED
10834#include <libxml/xpath.h>
10835#endif
10836
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010837extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010838static int xmlParserInitialized = 0;
10839
10840/**
10841 * xmlInitParser:
10842 *
10843 * Initialization function for the XML parser.
10844 * This is not reentrant. Call once before processing in case of
10845 * use in multithreaded programs.
10846 */
10847
10848void
10849xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010850 if (xmlParserInitialized != 0)
10851 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010852
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010853 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10854 (xmlGenericError == NULL))
10855 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010856 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010857 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010858 xmlInitCharEncodingHandlers();
10859 xmlInitializePredefinedEntities();
10860 xmlDefaultSAXHandlerInit();
10861 xmlRegisterDefaultInputCallbacks();
10862 xmlRegisterDefaultOutputCallbacks();
10863#ifdef LIBXML_HTML_ENABLED
10864 htmlInitAutoClose();
10865 htmlDefaultSAXHandlerInit();
10866#endif
10867#ifdef LIBXML_XPATH_ENABLED
10868 xmlXPathInit();
10869#endif
10870 xmlParserInitialized = 1;
10871}
10872
10873/**
10874 * xmlCleanupParser:
10875 *
10876 * Cleanup function for the XML parser. It tries to reclaim all
10877 * parsing related global memory allocated for the parser processing.
10878 * It doesn't deallocate any document related memory. Calling this
10879 * function should not prevent reusing the parser.
10880 */
10881
10882void
10883xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010884 xmlCleanupCharEncodingHandlers();
10885 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010886#ifdef LIBXML_CATALOG_ENABLED
10887 xmlCatalogCleanup();
10888#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010889 xmlCleanupThreads();
10890 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010891}