blob: 16daa80e5801ef9f169b2986f2c88ce0f72d0cab [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Daniel Veillard328f48c2002-11-15 15:24:34 +0000108static int
109xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
110 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000111/************************************************************************
112 * *
113 * Parser stacks related functions and macros *
114 * *
115 ************************************************************************/
116
117xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
118 const xmlChar ** str);
119
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000120/**
121 * inputPush:
122 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000123 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000124 *
125 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000126 *
127 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000129extern int
130inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
131{
132 if (ctxt->inputNr >= ctxt->inputMax) {
133 ctxt->inputMax *= 2;
134 ctxt->inputTab =
135 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
136 ctxt->inputMax *
137 sizeof(ctxt->inputTab[0]));
138 if (ctxt->inputTab == NULL) {
139 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
140 return (0);
141 }
142 }
143 ctxt->inputTab[ctxt->inputNr] = value;
144 ctxt->input = value;
145 return (ctxt->inputNr++);
146}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000148 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000149 * @ctxt: an XML parser context
150 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000151 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000152 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000153 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000154 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000155extern xmlParserInputPtr
156inputPop(xmlParserCtxtPtr ctxt)
157{
158 xmlParserInputPtr ret;
159
160 if (ctxt->inputNr <= 0)
161 return (0);
162 ctxt->inputNr--;
163 if (ctxt->inputNr > 0)
164 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
165 else
166 ctxt->input = NULL;
167 ret = ctxt->inputTab[ctxt->inputNr];
168 ctxt->inputTab[ctxt->inputNr] = 0;
169 return (ret);
170}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000171/**
172 * nodePush:
173 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000174 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000175 *
176 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000177 *
178 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000180extern int
181nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
182{
183 if (ctxt->nodeNr >= ctxt->nodeMax) {
184 ctxt->nodeMax *= 2;
185 ctxt->nodeTab =
186 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
187 ctxt->nodeMax *
188 sizeof(ctxt->nodeTab[0]));
189 if (ctxt->nodeTab == NULL) {
190 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
191 return (0);
192 }
193 }
194 ctxt->nodeTab[ctxt->nodeNr] = value;
195 ctxt->node = value;
196 return (ctxt->nodeNr++);
197}
198/**
199 * nodePop:
200 * @ctxt: an XML parser context
201 *
202 * Pops the top element node from the node stack
203 *
204 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000205 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000206extern xmlNodePtr
207nodePop(xmlParserCtxtPtr ctxt)
208{
209 xmlNodePtr ret;
210
211 if (ctxt->nodeNr <= 0)
212 return (0);
213 ctxt->nodeNr--;
214 if (ctxt->nodeNr > 0)
215 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
216 else
217 ctxt->node = NULL;
218 ret = ctxt->nodeTab[ctxt->nodeNr];
219 ctxt->nodeTab[ctxt->nodeNr] = 0;
220 return (ret);
221}
222/**
223 * namePush:
224 * @ctxt: an XML parser context
225 * @value: the element name
226 *
227 * Pushes a new element name on top of the name stack
228 *
229 * Returns 0 in case of error, the index in the stack otherwise
230 */
231extern int
232namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
233{
234 if (ctxt->nameNr >= ctxt->nameMax) {
235 ctxt->nameMax *= 2;
236 ctxt->nameTab =
237 (xmlChar * *)xmlRealloc(ctxt->nameTab,
238 ctxt->nameMax *
239 sizeof(ctxt->nameTab[0]));
240 if (ctxt->nameTab == NULL) {
241 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
242 return (0);
243 }
244 }
245 ctxt->nameTab[ctxt->nameNr] = value;
246 ctxt->name = value;
247 return (ctxt->nameNr++);
248}
249/**
250 * namePop:
251 * @ctxt: an XML parser context
252 *
253 * Pops the top element name from the name stack
254 *
255 * Returns the name just removed
256 */
257extern xmlChar *
258namePop(xmlParserCtxtPtr ctxt)
259{
260 xmlChar *ret;
261
262 if (ctxt->nameNr <= 0)
263 return (0);
264 ctxt->nameNr--;
265 if (ctxt->nameNr > 0)
266 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
267 else
268 ctxt->name = NULL;
269 ret = ctxt->nameTab[ctxt->nameNr];
270 ctxt->nameTab[ctxt->nameNr] = 0;
271 return (ret);
272}
Owen Taylor3473f882001-02-23 17:55:21 +0000273
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000274static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000275 if (ctxt->spaceNr >= ctxt->spaceMax) {
276 ctxt->spaceMax *= 2;
277 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
278 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
279 if (ctxt->spaceTab == NULL) {
280 xmlGenericError(xmlGenericErrorContext,
281 "realloc failed !\n");
282 return(0);
283 }
284 }
285 ctxt->spaceTab[ctxt->spaceNr] = val;
286 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
287 return(ctxt->spaceNr++);
288}
289
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000290static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000291 int ret;
292 if (ctxt->spaceNr <= 0) return(0);
293 ctxt->spaceNr--;
294 if (ctxt->spaceNr > 0)
295 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
296 else
297 ctxt->space = NULL;
298 ret = ctxt->spaceTab[ctxt->spaceNr];
299 ctxt->spaceTab[ctxt->spaceNr] = -1;
300 return(ret);
301}
302
303/*
304 * Macros for accessing the content. Those should be used only by the parser,
305 * and not exported.
306 *
307 * Dirty macros, i.e. one often need to make assumption on the context to
308 * use them
309 *
310 * CUR_PTR return the current pointer to the xmlChar to be parsed.
311 * To be used with extreme caution since operations consuming
312 * characters may move the input buffer to a different location !
313 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
314 * This should be used internally by the parser
315 * only to compare to ASCII values otherwise it would break when
316 * running with UTF-8 encoding.
317 * RAW same as CUR but in the input buffer, bypass any token
318 * extraction that may have been done
319 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
320 * to compare on ASCII based substring.
321 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
322 * strings within the parser.
323 *
324 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
325 *
326 * NEXT Skip to the next character, this does the proper decoding
327 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000328 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000329 * CUR_CHAR(l) returns the current unicode character (int), set l
330 * to the number of xmlChars used for the encoding [0-5].
331 * CUR_SCHAR same but operate on a string instead of the context
332 * COPY_BUF copy the current unicode char to the target buffer, increment
333 * the index
334 * GROW, SHRINK handling of input buffers
335 */
336
Daniel Veillardfdc91562002-07-01 21:52:03 +0000337#define RAW (*ctxt->input->cur)
338#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000339#define NXT(val) ctxt->input->cur[(val)]
340#define CUR_PTR ctxt->input->cur
341
342#define SKIP(val) do { \
343 ctxt->nbChars += (val),ctxt->input->cur += (val); \
344 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000345 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000346 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
347 xmlPopInput(ctxt); \
348 } while (0)
349
Daniel Veillard46de64e2002-05-29 08:21:33 +0000350#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
351 xmlSHRINK (ctxt);
352
353static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
354 xmlParserInputShrink(ctxt->input);
355 if ((*ctxt->input->cur == 0) &&
356 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
357 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000358 }
Owen Taylor3473f882001-02-23 17:55:21 +0000359
Daniel Veillard46de64e2002-05-29 08:21:33 +0000360#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
361 xmlGROW (ctxt);
362
363static void xmlGROW (xmlParserCtxtPtr ctxt) {
364 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
365 if ((*ctxt->input->cur == 0) &&
366 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
367 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000368 }
Owen Taylor3473f882001-02-23 17:55:21 +0000369
370#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
371
372#define NEXT xmlNextChar(ctxt)
373
Daniel Veillard21a0f912001-02-25 19:54:14 +0000374#define NEXT1 { \
375 ctxt->input->cur++; \
376 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000377 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000378 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
379 }
380
Owen Taylor3473f882001-02-23 17:55:21 +0000381#define NEXTL(l) do { \
382 if (*(ctxt->input->cur) == '\n') { \
383 ctxt->input->line++; ctxt->input->col = 1; \
384 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000385 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000386 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000387 } while (0)
388
389#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
390#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
391
392#define COPY_BUF(l,b,i,v) \
393 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000394 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000395
396/**
397 * xmlSkipBlankChars:
398 * @ctxt: the XML parser context
399 *
400 * skip all blanks character found at that point in the input streams.
401 * It pops up finished entities in the process if allowable at that point.
402 *
403 * Returns the number of space chars skipped
404 */
405
406int
407xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000408 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000409
410 /*
411 * It's Okay to use CUR/NEXT here since all the blanks are on
412 * the ASCII range.
413 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000414 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
415 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000416 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000417 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000418 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000419 cur = ctxt->input->cur;
420 while (IS_BLANK(*cur)) {
421 if (*cur == '\n') {
422 ctxt->input->line++; ctxt->input->col = 1;
423 }
424 cur++;
425 res++;
426 if (*cur == 0) {
427 ctxt->input->cur = cur;
428 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
429 cur = ctxt->input->cur;
430 }
431 }
432 ctxt->input->cur = cur;
433 } else {
434 int cur;
435 do {
436 cur = CUR;
437 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
438 NEXT;
439 cur = CUR;
440 res++;
441 }
442 while ((cur == 0) && (ctxt->inputNr > 1) &&
443 (ctxt->instate != XML_PARSER_COMMENT)) {
444 xmlPopInput(ctxt);
445 cur = CUR;
446 }
447 /*
448 * Need to handle support of entities branching here
449 */
450 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
451 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
452 }
Owen Taylor3473f882001-02-23 17:55:21 +0000453 return(res);
454}
455
456/************************************************************************
457 * *
458 * Commodity functions to handle entities *
459 * *
460 ************************************************************************/
461
462/**
463 * xmlPopInput:
464 * @ctxt: an XML parser context
465 *
466 * xmlPopInput: the current input pointed by ctxt->input came to an end
467 * pop it and return the next char.
468 *
469 * Returns the current xmlChar in the parser context
470 */
471xmlChar
472xmlPopInput(xmlParserCtxtPtr ctxt) {
473 if (ctxt->inputNr == 1) return(0); /* End of main Input */
474 if (xmlParserDebugEntities)
475 xmlGenericError(xmlGenericErrorContext,
476 "Popping input %d\n", ctxt->inputNr);
477 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000478 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000479 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
480 return(xmlPopInput(ctxt));
481 return(CUR);
482}
483
484/**
485 * xmlPushInput:
486 * @ctxt: an XML parser context
487 * @input: an XML parser input fragment (entity, XML fragment ...).
488 *
489 * xmlPushInput: switch to a new input stream which is stacked on top
490 * of the previous one(s).
491 */
492void
493xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
494 if (input == NULL) return;
495
496 if (xmlParserDebugEntities) {
497 if ((ctxt->input != NULL) && (ctxt->input->filename))
498 xmlGenericError(xmlGenericErrorContext,
499 "%s(%d): ", ctxt->input->filename,
500 ctxt->input->line);
501 xmlGenericError(xmlGenericErrorContext,
502 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
503 }
504 inputPush(ctxt, input);
505 GROW;
506}
507
508/**
509 * xmlParseCharRef:
510 * @ctxt: an XML parser context
511 *
512 * parse Reference declarations
513 *
514 * [66] CharRef ::= '&#' [0-9]+ ';' |
515 * '&#x' [0-9a-fA-F]+ ';'
516 *
517 * [ WFC: Legal Character ]
518 * Characters referred to using character references must match the
519 * production for Char.
520 *
521 * Returns the value parsed (as an int), 0 in case of error
522 */
523int
524xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000525 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000526 int count = 0;
527
Owen Taylor3473f882001-02-23 17:55:21 +0000528 /*
529 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
530 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000531 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000532 (NXT(2) == 'x')) {
533 SKIP(3);
534 GROW;
535 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000536 if (count++ > 20) {
537 count = 0;
538 GROW;
539 }
540 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000541 val = val * 16 + (CUR - '0');
542 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
543 val = val * 16 + (CUR - 'a') + 10;
544 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
545 val = val * 16 + (CUR - 'A') + 10;
546 else {
547 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
549 ctxt->sax->error(ctxt->userData,
550 "xmlParseCharRef: invalid hexadecimal value\n");
551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000553 val = 0;
554 break;
555 }
556 NEXT;
557 count++;
558 }
559 if (RAW == ';') {
560 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
561 ctxt->nbChars ++;
562 ctxt->input->cur++;
563 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000564 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000565 SKIP(2);
566 GROW;
567 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000568 if (count++ > 20) {
569 count = 0;
570 GROW;
571 }
572 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000573 val = val * 10 + (CUR - '0');
574 else {
575 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
577 ctxt->sax->error(ctxt->userData,
578 "xmlParseCharRef: invalid decimal value\n");
579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000581 val = 0;
582 break;
583 }
584 NEXT;
585 count++;
586 }
587 if (RAW == ';') {
588 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
589 ctxt->nbChars ++;
590 ctxt->input->cur++;
591 }
592 } else {
593 ctxt->errNo = XML_ERR_INVALID_CHARREF;
594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
595 ctxt->sax->error(ctxt->userData,
596 "xmlParseCharRef: invalid value\n");
597 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000599 }
600
601 /*
602 * [ WFC: Legal Character ]
603 * Characters referred to using character references must match the
604 * production for Char.
605 */
606 if (IS_CHAR(val)) {
607 return(val);
608 } else {
609 ctxt->errNo = XML_ERR_INVALID_CHAR;
610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000611 ctxt->sax->error(ctxt->userData,
612 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000613 val);
614 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000615 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000616 }
617 return(0);
618}
619
620/**
621 * xmlParseStringCharRef:
622 * @ctxt: an XML parser context
623 * @str: a pointer to an index in the string
624 *
625 * parse Reference declarations, variant parsing from a string rather
626 * than an an input flow.
627 *
628 * [66] CharRef ::= '&#' [0-9]+ ';' |
629 * '&#x' [0-9a-fA-F]+ ';'
630 *
631 * [ WFC: Legal Character ]
632 * Characters referred to using character references must match the
633 * production for Char.
634 *
635 * Returns the value parsed (as an int), 0 in case of error, str will be
636 * updated to the current value of the index
637 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000638static int
Owen Taylor3473f882001-02-23 17:55:21 +0000639xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
640 const xmlChar *ptr;
641 xmlChar cur;
642 int val = 0;
643
644 if ((str == NULL) || (*str == NULL)) return(0);
645 ptr = *str;
646 cur = *ptr;
647 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
648 ptr += 3;
649 cur = *ptr;
650 while (cur != ';') { /* Non input consuming loop */
651 if ((cur >= '0') && (cur <= '9'))
652 val = val * 16 + (cur - '0');
653 else if ((cur >= 'a') && (cur <= 'f'))
654 val = val * 16 + (cur - 'a') + 10;
655 else if ((cur >= 'A') && (cur <= 'F'))
656 val = val * 16 + (cur - 'A') + 10;
657 else {
658 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
660 ctxt->sax->error(ctxt->userData,
661 "xmlParseStringCharRef: invalid hexadecimal value\n");
662 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000663 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000664 val = 0;
665 break;
666 }
667 ptr++;
668 cur = *ptr;
669 }
670 if (cur == ';')
671 ptr++;
672 } else if ((cur == '&') && (ptr[1] == '#')){
673 ptr += 2;
674 cur = *ptr;
675 while (cur != ';') { /* Non input consuming loops */
676 if ((cur >= '0') && (cur <= '9'))
677 val = val * 10 + (cur - '0');
678 else {
679 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
681 ctxt->sax->error(ctxt->userData,
682 "xmlParseStringCharRef: invalid decimal value\n");
683 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000684 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000685 val = 0;
686 break;
687 }
688 ptr++;
689 cur = *ptr;
690 }
691 if (cur == ';')
692 ptr++;
693 } else {
694 ctxt->errNo = XML_ERR_INVALID_CHARREF;
695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
696 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000697 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000698 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000699 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000700 return(0);
701 }
702 *str = ptr;
703
704 /*
705 * [ WFC: Legal Character ]
706 * Characters referred to using character references must match the
707 * production for Char.
708 */
709 if (IS_CHAR(val)) {
710 return(val);
711 } else {
712 ctxt->errNo = XML_ERR_INVALID_CHAR;
713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
714 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000715 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000716 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000717 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000718 }
719 return(0);
720}
721
722/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000723 * xmlNewBlanksWrapperInputStream:
724 * @ctxt: an XML parser context
725 * @entity: an Entity pointer
726 *
727 * Create a new input stream for wrapping
728 * blanks around a PEReference
729 *
730 * Returns the new input stream or NULL
731 */
732
733static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
734
Daniel Veillardf4862f02002-09-10 11:13:43 +0000735static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000736xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
737 xmlParserInputPtr input;
738 xmlChar *buffer;
739 size_t length;
740 if (entity == NULL) {
741 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
743 ctxt->sax->error(ctxt->userData,
744 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
745 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
746 return(NULL);
747 }
748 if (xmlParserDebugEntities)
749 xmlGenericError(xmlGenericErrorContext,
750 "new blanks wrapper for entity: %s\n", entity->name);
751 input = xmlNewInputStream(ctxt);
752 if (input == NULL) {
753 return(NULL);
754 }
755 length = xmlStrlen(entity->name) + 5;
756 buffer = xmlMalloc(length);
757 if (buffer == NULL) {
758 return(NULL);
759 }
760 buffer [0] = ' ';
761 buffer [1] = '%';
762 buffer [length-3] = ';';
763 buffer [length-2] = ' ';
764 buffer [length-1] = 0;
765 memcpy(buffer + 2, entity->name, length - 5);
766 input->free = deallocblankswrapper;
767 input->base = buffer;
768 input->cur = buffer;
769 input->length = length;
770 input->end = &buffer[length];
771 return(input);
772}
773
774/**
Owen Taylor3473f882001-02-23 17:55:21 +0000775 * xmlParserHandlePEReference:
776 * @ctxt: the parser context
777 *
778 * [69] PEReference ::= '%' Name ';'
779 *
780 * [ WFC: No Recursion ]
781 * A parsed entity must not contain a recursive
782 * reference to itself, either directly or indirectly.
783 *
784 * [ WFC: Entity Declared ]
785 * In a document without any DTD, a document with only an internal DTD
786 * subset which contains no parameter entity references, or a document
787 * with "standalone='yes'", ... ... The declaration of a parameter
788 * entity must precede any reference to it...
789 *
790 * [ VC: Entity Declared ]
791 * In a document with an external subset or external parameter entities
792 * with "standalone='no'", ... ... The declaration of a parameter entity
793 * must precede any reference to it...
794 *
795 * [ WFC: In DTD ]
796 * Parameter-entity references may only appear in the DTD.
797 * NOTE: misleading but this is handled.
798 *
799 * A PEReference may have been detected in the current input stream
800 * the handling is done accordingly to
801 * http://www.w3.org/TR/REC-xml#entproc
802 * i.e.
803 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000804 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000805 */
806void
807xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
808 xmlChar *name;
809 xmlEntityPtr entity = NULL;
810 xmlParserInputPtr input;
811
Owen Taylor3473f882001-02-23 17:55:21 +0000812 if (RAW != '%') return;
813 switch(ctxt->instate) {
814 case XML_PARSER_CDATA_SECTION:
815 return;
816 case XML_PARSER_COMMENT:
817 return;
818 case XML_PARSER_START_TAG:
819 return;
820 case XML_PARSER_END_TAG:
821 return;
822 case XML_PARSER_EOF:
823 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
825 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
826 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000827 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000828 return;
829 case XML_PARSER_PROLOG:
830 case XML_PARSER_START:
831 case XML_PARSER_MISC:
832 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
834 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
835 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000836 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000837 return;
838 case XML_PARSER_ENTITY_DECL:
839 case XML_PARSER_CONTENT:
840 case XML_PARSER_ATTRIBUTE_VALUE:
841 case XML_PARSER_PI:
842 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000843 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000844 /* we just ignore it there */
845 return;
846 case XML_PARSER_EPILOG:
847 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
849 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
850 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000851 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000852 return;
853 case XML_PARSER_ENTITY_VALUE:
854 /*
855 * NOTE: in the case of entity values, we don't do the
856 * substitution here since we need the literal
857 * entity value to be able to save the internal
858 * subset of the document.
859 * This will be handled by xmlStringDecodeEntities
860 */
861 return;
862 case XML_PARSER_DTD:
863 /*
864 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
865 * In the internal DTD subset, parameter-entity references
866 * can occur only where markup declarations can occur, not
867 * within markup declarations.
868 * In that case this is handled in xmlParseMarkupDecl
869 */
870 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
871 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000872 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
873 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000874 break;
875 case XML_PARSER_IGNORE:
876 return;
877 }
878
879 NEXT;
880 name = xmlParseName(ctxt);
881 if (xmlParserDebugEntities)
882 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000883 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000884 if (name == NULL) {
885 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000887 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000888 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000889 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000890 } else {
891 if (RAW == ';') {
892 NEXT;
893 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
894 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
895 if (entity == NULL) {
896
897 /*
898 * [ WFC: Entity Declared ]
899 * In a document without any DTD, a document with only an
900 * internal DTD subset which contains no parameter entity
901 * references, or a document with "standalone='yes'", ...
902 * ... The declaration of a parameter entity must precede
903 * any reference to it...
904 */
905 if ((ctxt->standalone == 1) ||
906 ((ctxt->hasExternalSubset == 0) &&
907 (ctxt->hasPErefs == 0))) {
908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
909 ctxt->sax->error(ctxt->userData,
910 "PEReference: %%%s; not found\n", name);
911 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000912 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000913 } else {
914 /*
915 * [ VC: Entity Declared ]
916 * In a document with an external subset or external
917 * parameter entities with "standalone='no'", ...
918 * ... The declaration of a parameter entity must precede
919 * any reference to it...
920 */
921 if ((!ctxt->disableSAX) &&
922 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
923 ctxt->vctxt.error(ctxt->vctxt.userData,
924 "PEReference: %%%s; not found\n", name);
925 } else if ((!ctxt->disableSAX) &&
926 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
927 ctxt->sax->warning(ctxt->userData,
928 "PEReference: %%%s; not found\n", name);
929 ctxt->valid = 0;
930 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000931 } else if (ctxt->input->free != deallocblankswrapper) {
932 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
933 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000934 } else {
935 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
936 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000937 xmlChar start[4];
938 xmlCharEncoding enc;
939
Owen Taylor3473f882001-02-23 17:55:21 +0000940 /*
941 * handle the extra spaces added before and after
942 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000943 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000944 */
945 input = xmlNewEntityInputStream(ctxt, entity);
946 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000947
948 /*
949 * Get the 4 first bytes and decode the charset
950 * if enc != XML_CHAR_ENCODING_NONE
951 * plug some encoding conversion routines.
952 */
953 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000954 if (entity->length >= 4) {
955 start[0] = RAW;
956 start[1] = NXT(1);
957 start[2] = NXT(2);
958 start[3] = NXT(3);
959 enc = xmlDetectCharEncoding(start, 4);
960 if (enc != XML_CHAR_ENCODING_NONE) {
961 xmlSwitchEncoding(ctxt, enc);
962 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000963 }
964
Owen Taylor3473f882001-02-23 17:55:21 +0000965 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
966 (RAW == '<') && (NXT(1) == '?') &&
967 (NXT(2) == 'x') && (NXT(3) == 'm') &&
968 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
969 xmlParseTextDecl(ctxt);
970 }
Owen Taylor3473f882001-02-23 17:55:21 +0000971 } else {
972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
973 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000974 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000975 name);
976 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000977 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000978 }
979 }
980 } else {
981 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
983 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000984 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000985 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000986 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000987 }
988 xmlFree(name);
989 }
990}
991
992/*
993 * Macro used to grow the current buffer.
994 */
995#define growBuffer(buffer) { \
996 buffer##_size *= 2; \
997 buffer = (xmlChar *) \
998 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
999 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001000 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001001 return(NULL); \
1002 } \
1003}
1004
1005/**
1006 * xmlStringDecodeEntities:
1007 * @ctxt: the parser context
1008 * @str: the input string
1009 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1010 * @end: an end marker xmlChar, 0 if none
1011 * @end2: an end marker xmlChar, 0 if none
1012 * @end3: an end marker xmlChar, 0 if none
1013 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001014 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001015 *
1016 * [67] Reference ::= EntityRef | CharRef
1017 *
1018 * [69] PEReference ::= '%' Name ';'
1019 *
1020 * Returns A newly allocated string with the substitution done. The caller
1021 * must deallocate it !
1022 */
1023xmlChar *
1024xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1025 xmlChar end, xmlChar end2, xmlChar end3) {
1026 xmlChar *buffer = NULL;
1027 int buffer_size = 0;
1028
1029 xmlChar *current = NULL;
1030 xmlEntityPtr ent;
1031 int c,l;
1032 int nbchars = 0;
1033
1034 if (str == NULL)
1035 return(NULL);
1036
1037 if (ctxt->depth > 40) {
1038 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1040 ctxt->sax->error(ctxt->userData,
1041 "Detected entity reference loop\n");
1042 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001043 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001044 return(NULL);
1045 }
1046
1047 /*
1048 * allocate a translation buffer.
1049 */
1050 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1051 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1052 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001053 xmlGenericError(xmlGenericErrorContext,
1054 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001055 return(NULL);
1056 }
1057
1058 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001059 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001060 * we are operating on already parsed values.
1061 */
1062 c = CUR_SCHAR(str, l);
1063 while ((c != 0) && (c != end) && /* non input consuming loop */
1064 (c != end2) && (c != end3)) {
1065
1066 if (c == 0) break;
1067 if ((c == '&') && (str[1] == '#')) {
1068 int val = xmlParseStringCharRef(ctxt, &str);
1069 if (val != 0) {
1070 COPY_BUF(0,buffer,nbchars,val);
1071 }
1072 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1073 if (xmlParserDebugEntities)
1074 xmlGenericError(xmlGenericErrorContext,
1075 "String decoding Entity Reference: %.30s\n",
1076 str);
1077 ent = xmlParseStringEntityRef(ctxt, &str);
1078 if ((ent != NULL) &&
1079 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1080 if (ent->content != NULL) {
1081 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1082 } else {
1083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1084 ctxt->sax->error(ctxt->userData,
1085 "internal error entity has no content\n");
1086 }
1087 } else if ((ent != NULL) && (ent->content != NULL)) {
1088 xmlChar *rep;
1089
1090 ctxt->depth++;
1091 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1092 0, 0, 0);
1093 ctxt->depth--;
1094 if (rep != NULL) {
1095 current = rep;
1096 while (*current != 0) { /* non input consuming loop */
1097 buffer[nbchars++] = *current++;
1098 if (nbchars >
1099 buffer_size - XML_PARSER_BUFFER_SIZE) {
1100 growBuffer(buffer);
1101 }
1102 }
1103 xmlFree(rep);
1104 }
1105 } else if (ent != NULL) {
1106 int i = xmlStrlen(ent->name);
1107 const xmlChar *cur = ent->name;
1108
1109 buffer[nbchars++] = '&';
1110 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1111 growBuffer(buffer);
1112 }
1113 for (;i > 0;i--)
1114 buffer[nbchars++] = *cur++;
1115 buffer[nbchars++] = ';';
1116 }
1117 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1118 if (xmlParserDebugEntities)
1119 xmlGenericError(xmlGenericErrorContext,
1120 "String decoding PE Reference: %.30s\n", str);
1121 ent = xmlParseStringPEReference(ctxt, &str);
1122 if (ent != NULL) {
1123 xmlChar *rep;
1124
1125 ctxt->depth++;
1126 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1127 0, 0, 0);
1128 ctxt->depth--;
1129 if (rep != NULL) {
1130 current = rep;
1131 while (*current != 0) { /* non input consuming loop */
1132 buffer[nbchars++] = *current++;
1133 if (nbchars >
1134 buffer_size - XML_PARSER_BUFFER_SIZE) {
1135 growBuffer(buffer);
1136 }
1137 }
1138 xmlFree(rep);
1139 }
1140 }
1141 } else {
1142 COPY_BUF(l,buffer,nbchars,c);
1143 str += l;
1144 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1145 growBuffer(buffer);
1146 }
1147 }
1148 c = CUR_SCHAR(str, l);
1149 }
1150 buffer[nbchars++] = 0;
1151 return(buffer);
1152}
1153
1154
1155/************************************************************************
1156 * *
1157 * Commodity functions to handle xmlChars *
1158 * *
1159 ************************************************************************/
1160
1161/**
1162 * xmlStrndup:
1163 * @cur: the input xmlChar *
1164 * @len: the len of @cur
1165 *
1166 * a strndup for array of xmlChar's
1167 *
1168 * Returns a new xmlChar * or NULL
1169 */
1170xmlChar *
1171xmlStrndup(const xmlChar *cur, int len) {
1172 xmlChar *ret;
1173
1174 if ((cur == NULL) || (len < 0)) return(NULL);
1175 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1176 if (ret == NULL) {
1177 xmlGenericError(xmlGenericErrorContext,
1178 "malloc of %ld byte failed\n",
1179 (len + 1) * (long)sizeof(xmlChar));
1180 return(NULL);
1181 }
1182 memcpy(ret, cur, len * sizeof(xmlChar));
1183 ret[len] = 0;
1184 return(ret);
1185}
1186
1187/**
1188 * xmlStrdup:
1189 * @cur: the input xmlChar *
1190 *
1191 * a strdup for array of xmlChar's. Since they are supposed to be
1192 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1193 * a termination mark of '0'.
1194 *
1195 * Returns a new xmlChar * or NULL
1196 */
1197xmlChar *
1198xmlStrdup(const xmlChar *cur) {
1199 const xmlChar *p = cur;
1200
1201 if (cur == NULL) return(NULL);
1202 while (*p != 0) p++; /* non input consuming */
1203 return(xmlStrndup(cur, p - cur));
1204}
1205
1206/**
1207 * xmlCharStrndup:
1208 * @cur: the input char *
1209 * @len: the len of @cur
1210 *
1211 * a strndup for char's to xmlChar's
1212 *
1213 * Returns a new xmlChar * or NULL
1214 */
1215
1216xmlChar *
1217xmlCharStrndup(const char *cur, int len) {
1218 int i;
1219 xmlChar *ret;
1220
1221 if ((cur == NULL) || (len < 0)) return(NULL);
1222 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1223 if (ret == NULL) {
1224 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1225 (len + 1) * (long)sizeof(xmlChar));
1226 return(NULL);
1227 }
1228 for (i = 0;i < len;i++)
1229 ret[i] = (xmlChar) cur[i];
1230 ret[len] = 0;
1231 return(ret);
1232}
1233
1234/**
1235 * xmlCharStrdup:
1236 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001237 *
1238 * a strdup for char's to xmlChar's
1239 *
1240 * Returns a new xmlChar * or NULL
1241 */
1242
1243xmlChar *
1244xmlCharStrdup(const char *cur) {
1245 const char *p = cur;
1246
1247 if (cur == NULL) return(NULL);
1248 while (*p != '\0') p++; /* non input consuming */
1249 return(xmlCharStrndup(cur, p - cur));
1250}
1251
1252/**
1253 * xmlStrcmp:
1254 * @str1: the first xmlChar *
1255 * @str2: the second xmlChar *
1256 *
1257 * a strcmp for xmlChar's
1258 *
1259 * Returns the integer result of the comparison
1260 */
1261
1262int
1263xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1264 register int tmp;
1265
1266 if (str1 == str2) return(0);
1267 if (str1 == NULL) return(-1);
1268 if (str2 == NULL) return(1);
1269 do {
1270 tmp = *str1++ - *str2;
1271 if (tmp != 0) return(tmp);
1272 } while (*str2++ != 0);
1273 return 0;
1274}
1275
1276/**
1277 * xmlStrEqual:
1278 * @str1: the first xmlChar *
1279 * @str2: the second xmlChar *
1280 *
1281 * Check if both string are equal of have same content
1282 * Should be a bit more readable and faster than xmlStrEqual()
1283 *
1284 * Returns 1 if they are equal, 0 if they are different
1285 */
1286
1287int
1288xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1289 if (str1 == str2) return(1);
1290 if (str1 == NULL) return(0);
1291 if (str2 == NULL) return(0);
1292 do {
1293 if (*str1++ != *str2) return(0);
1294 } while (*str2++);
1295 return(1);
1296}
1297
1298/**
1299 * xmlStrncmp:
1300 * @str1: the first xmlChar *
1301 * @str2: the second xmlChar *
1302 * @len: the max comparison length
1303 *
1304 * a strncmp for xmlChar's
1305 *
1306 * Returns the integer result of the comparison
1307 */
1308
1309int
1310xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1311 register int tmp;
1312
1313 if (len <= 0) return(0);
1314 if (str1 == str2) return(0);
1315 if (str1 == NULL) return(-1);
1316 if (str2 == NULL) return(1);
1317 do {
1318 tmp = *str1++ - *str2;
1319 if (tmp != 0 || --len == 0) return(tmp);
1320 } while (*str2++ != 0);
1321 return 0;
1322}
1323
Daniel Veillardb44025c2001-10-11 22:55:55 +00001324static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001325 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1326 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1327 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1328 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1329 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1330 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1331 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1332 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1333 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1334 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1335 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1336 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1337 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1338 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1339 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1340 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1341 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1342 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1343 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1344 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1345 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1346 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1347 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1348 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1349 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1350 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1351 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1352 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1353 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1354 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1355 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1356 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1357};
1358
1359/**
1360 * xmlStrcasecmp:
1361 * @str1: the first xmlChar *
1362 * @str2: the second xmlChar *
1363 *
1364 * a strcasecmp for xmlChar's
1365 *
1366 * Returns the integer result of the comparison
1367 */
1368
1369int
1370xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1371 register int tmp;
1372
1373 if (str1 == str2) return(0);
1374 if (str1 == NULL) return(-1);
1375 if (str2 == NULL) return(1);
1376 do {
1377 tmp = casemap[*str1++] - casemap[*str2];
1378 if (tmp != 0) return(tmp);
1379 } while (*str2++ != 0);
1380 return 0;
1381}
1382
1383/**
1384 * xmlStrncasecmp:
1385 * @str1: the first xmlChar *
1386 * @str2: the second xmlChar *
1387 * @len: the max comparison length
1388 *
1389 * a strncasecmp for xmlChar's
1390 *
1391 * Returns the integer result of the comparison
1392 */
1393
1394int
1395xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1396 register int tmp;
1397
1398 if (len <= 0) return(0);
1399 if (str1 == str2) return(0);
1400 if (str1 == NULL) return(-1);
1401 if (str2 == NULL) return(1);
1402 do {
1403 tmp = casemap[*str1++] - casemap[*str2];
1404 if (tmp != 0 || --len == 0) return(tmp);
1405 } while (*str2++ != 0);
1406 return 0;
1407}
1408
1409/**
1410 * xmlStrchr:
1411 * @str: the xmlChar * array
1412 * @val: the xmlChar to search
1413 *
1414 * a strchr for xmlChar's
1415 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001416 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001417 */
1418
1419const xmlChar *
1420xmlStrchr(const xmlChar *str, xmlChar val) {
1421 if (str == NULL) return(NULL);
1422 while (*str != 0) { /* non input consuming */
1423 if (*str == val) return((xmlChar *) str);
1424 str++;
1425 }
1426 return(NULL);
1427}
1428
1429/**
1430 * xmlStrstr:
1431 * @str: the xmlChar * array (haystack)
1432 * @val: the xmlChar to search (needle)
1433 *
1434 * a strstr for xmlChar's
1435 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001436 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001437 */
1438
1439const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001440xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001441 int n;
1442
1443 if (str == NULL) return(NULL);
1444 if (val == NULL) return(NULL);
1445 n = xmlStrlen(val);
1446
1447 if (n == 0) return(str);
1448 while (*str != 0) { /* non input consuming */
1449 if (*str == *val) {
1450 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1451 }
1452 str++;
1453 }
1454 return(NULL);
1455}
1456
1457/**
1458 * xmlStrcasestr:
1459 * @str: the xmlChar * array (haystack)
1460 * @val: the xmlChar to search (needle)
1461 *
1462 * a case-ignoring strstr for xmlChar's
1463 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001464 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001465 */
1466
1467const xmlChar *
1468xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1469 int n;
1470
1471 if (str == NULL) return(NULL);
1472 if (val == NULL) return(NULL);
1473 n = xmlStrlen(val);
1474
1475 if (n == 0) return(str);
1476 while (*str != 0) { /* non input consuming */
1477 if (casemap[*str] == casemap[*val])
1478 if (!xmlStrncasecmp(str, val, n)) return(str);
1479 str++;
1480 }
1481 return(NULL);
1482}
1483
1484/**
1485 * xmlStrsub:
1486 * @str: the xmlChar * array (haystack)
1487 * @start: the index of the first char (zero based)
1488 * @len: the length of the substring
1489 *
1490 * Extract a substring of a given string
1491 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001492 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001493 */
1494
1495xmlChar *
1496xmlStrsub(const xmlChar *str, int start, int len) {
1497 int i;
1498
1499 if (str == NULL) return(NULL);
1500 if (start < 0) return(NULL);
1501 if (len < 0) return(NULL);
1502
1503 for (i = 0;i < start;i++) {
1504 if (*str == 0) return(NULL);
1505 str++;
1506 }
1507 if (*str == 0) return(NULL);
1508 return(xmlStrndup(str, len));
1509}
1510
1511/**
1512 * xmlStrlen:
1513 * @str: the xmlChar * array
1514 *
1515 * length of a xmlChar's string
1516 *
1517 * Returns the number of xmlChar contained in the ARRAY.
1518 */
1519
1520int
1521xmlStrlen(const xmlChar *str) {
1522 int len = 0;
1523
1524 if (str == NULL) return(0);
1525 while (*str != 0) { /* non input consuming */
1526 str++;
1527 len++;
1528 }
1529 return(len);
1530}
1531
1532/**
1533 * xmlStrncat:
1534 * @cur: the original xmlChar * array
1535 * @add: the xmlChar * array added
1536 * @len: the length of @add
1537 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001538 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001539 * first bytes of @add.
1540 *
1541 * Returns a new xmlChar *, the original @cur is reallocated if needed
1542 * and should not be freed
1543 */
1544
1545xmlChar *
1546xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1547 int size;
1548 xmlChar *ret;
1549
1550 if ((add == NULL) || (len == 0))
1551 return(cur);
1552 if (cur == NULL)
1553 return(xmlStrndup(add, len));
1554
1555 size = xmlStrlen(cur);
1556 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1557 if (ret == NULL) {
1558 xmlGenericError(xmlGenericErrorContext,
1559 "xmlStrncat: realloc of %ld byte failed\n",
1560 (size + len + 1) * (long)sizeof(xmlChar));
1561 return(cur);
1562 }
1563 memcpy(&ret[size], add, len * sizeof(xmlChar));
1564 ret[size + len] = 0;
1565 return(ret);
1566}
1567
1568/**
1569 * xmlStrcat:
1570 * @cur: the original xmlChar * array
1571 * @add: the xmlChar * array added
1572 *
1573 * a strcat for array of xmlChar's. Since they are supposed to be
1574 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1575 * a termination mark of '0'.
1576 *
1577 * Returns a new xmlChar * containing the concatenated string.
1578 */
1579xmlChar *
1580xmlStrcat(xmlChar *cur, const xmlChar *add) {
1581 const xmlChar *p = add;
1582
1583 if (add == NULL) return(cur);
1584 if (cur == NULL)
1585 return(xmlStrdup(add));
1586
1587 while (*p != 0) p++; /* non input consuming */
1588 return(xmlStrncat(cur, add, p - add));
1589}
1590
1591/************************************************************************
1592 * *
1593 * Commodity functions, cleanup needed ? *
1594 * *
1595 ************************************************************************/
1596
1597/**
1598 * areBlanks:
1599 * @ctxt: an XML parser context
1600 * @str: a xmlChar *
1601 * @len: the size of @str
1602 *
1603 * Is this a sequence of blank chars that one can ignore ?
1604 *
1605 * Returns 1 if ignorable 0 otherwise.
1606 */
1607
1608static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1609 int i, ret;
1610 xmlNodePtr lastChild;
1611
Daniel Veillard05c13a22001-09-09 08:38:09 +00001612 /*
1613 * Don't spend time trying to differentiate them, the same callback is
1614 * used !
1615 */
1616 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001617 return(0);
1618
Owen Taylor3473f882001-02-23 17:55:21 +00001619 /*
1620 * Check for xml:space value.
1621 */
1622 if (*(ctxt->space) == 1)
1623 return(0);
1624
1625 /*
1626 * Check that the string is made of blanks
1627 */
1628 for (i = 0;i < len;i++)
1629 if (!(IS_BLANK(str[i]))) return(0);
1630
1631 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001632 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001633 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001634 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001635 if (ctxt->myDoc != NULL) {
1636 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1637 if (ret == 0) return(1);
1638 if (ret == 1) return(0);
1639 }
1640
1641 /*
1642 * Otherwise, heuristic :-\
1643 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001644 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001645 if ((ctxt->node->children == NULL) &&
1646 (RAW == '<') && (NXT(1) == '/')) return(0);
1647
1648 lastChild = xmlGetLastChild(ctxt->node);
1649 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001650 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1651 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001652 } else if (xmlNodeIsText(lastChild))
1653 return(0);
1654 else if ((ctxt->node->children != NULL) &&
1655 (xmlNodeIsText(ctxt->node->children)))
1656 return(0);
1657 return(1);
1658}
1659
Owen Taylor3473f882001-02-23 17:55:21 +00001660/************************************************************************
1661 * *
1662 * Extra stuff for namespace support *
1663 * Relates to http://www.w3.org/TR/WD-xml-names *
1664 * *
1665 ************************************************************************/
1666
1667/**
1668 * xmlSplitQName:
1669 * @ctxt: an XML parser context
1670 * @name: an XML parser context
1671 * @prefix: a xmlChar **
1672 *
1673 * parse an UTF8 encoded XML qualified name string
1674 *
1675 * [NS 5] QName ::= (Prefix ':')? LocalPart
1676 *
1677 * [NS 6] Prefix ::= NCName
1678 *
1679 * [NS 7] LocalPart ::= NCName
1680 *
1681 * Returns the local part, and prefix is updated
1682 * to get the Prefix if any.
1683 */
1684
1685xmlChar *
1686xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1687 xmlChar buf[XML_MAX_NAMELEN + 5];
1688 xmlChar *buffer = NULL;
1689 int len = 0;
1690 int max = XML_MAX_NAMELEN;
1691 xmlChar *ret = NULL;
1692 const xmlChar *cur = name;
1693 int c;
1694
1695 *prefix = NULL;
1696
1697#ifndef XML_XML_NAMESPACE
1698 /* xml: prefix is not really a namespace */
1699 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1700 (cur[2] == 'l') && (cur[3] == ':'))
1701 return(xmlStrdup(name));
1702#endif
1703
1704 /* nasty but valid */
1705 if (cur[0] == ':')
1706 return(xmlStrdup(name));
1707
1708 c = *cur++;
1709 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1710 buf[len++] = c;
1711 c = *cur++;
1712 }
1713 if (len >= max) {
1714 /*
1715 * Okay someone managed to make a huge name, so he's ready to pay
1716 * for the processing speed.
1717 */
1718 max = len * 2;
1719
1720 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1721 if (buffer == NULL) {
1722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1723 ctxt->sax->error(ctxt->userData,
1724 "xmlSplitQName: out of memory\n");
1725 return(NULL);
1726 }
1727 memcpy(buffer, buf, len);
1728 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1729 if (len + 10 > max) {
1730 max *= 2;
1731 buffer = (xmlChar *) xmlRealloc(buffer,
1732 max * sizeof(xmlChar));
1733 if (buffer == NULL) {
1734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1735 ctxt->sax->error(ctxt->userData,
1736 "xmlSplitQName: out of memory\n");
1737 return(NULL);
1738 }
1739 }
1740 buffer[len++] = c;
1741 c = *cur++;
1742 }
1743 buffer[len] = 0;
1744 }
1745
1746 if (buffer == NULL)
1747 ret = xmlStrndup(buf, len);
1748 else {
1749 ret = buffer;
1750 buffer = NULL;
1751 max = XML_MAX_NAMELEN;
1752 }
1753
1754
1755 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001756 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001757 if (c == 0) return(ret);
1758 *prefix = ret;
1759 len = 0;
1760
Daniel Veillardbb284f42002-10-16 18:02:47 +00001761 /*
1762 * Check that the first character is proper to start
1763 * a new name
1764 */
1765 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1766 ((c >= 0x41) && (c <= 0x5A)) ||
1767 (c == '_') || (c == ':'))) {
1768 int l;
1769 int first = CUR_SCHAR(cur, l);
1770
1771 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001772 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1773 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001774 ctxt->sax->error(ctxt->userData,
1775 "Name %s is not XML Namespace compliant\n",
1776 name);
1777 }
1778 }
1779 cur++;
1780
Owen Taylor3473f882001-02-23 17:55:21 +00001781 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1782 buf[len++] = c;
1783 c = *cur++;
1784 }
1785 if (len >= max) {
1786 /*
1787 * Okay someone managed to make a huge name, so he's ready to pay
1788 * for the processing speed.
1789 */
1790 max = len * 2;
1791
1792 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1793 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001794 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1795 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001796 ctxt->sax->error(ctxt->userData,
1797 "xmlSplitQName: out of memory\n");
1798 return(NULL);
1799 }
1800 memcpy(buffer, buf, len);
1801 while (c != 0) { /* tested bigname2.xml */
1802 if (len + 10 > max) {
1803 max *= 2;
1804 buffer = (xmlChar *) xmlRealloc(buffer,
1805 max * sizeof(xmlChar));
1806 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001807 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1808 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001809 ctxt->sax->error(ctxt->userData,
1810 "xmlSplitQName: out of memory\n");
1811 return(NULL);
1812 }
1813 }
1814 buffer[len++] = c;
1815 c = *cur++;
1816 }
1817 buffer[len] = 0;
1818 }
1819
1820 if (buffer == NULL)
1821 ret = xmlStrndup(buf, len);
1822 else {
1823 ret = buffer;
1824 }
1825 }
1826
1827 return(ret);
1828}
1829
1830/************************************************************************
1831 * *
1832 * The parser itself *
1833 * Relates to http://www.w3.org/TR/REC-xml *
1834 * *
1835 ************************************************************************/
1836
Daniel Veillard76d66f42001-05-16 21:05:17 +00001837static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001838/**
1839 * xmlParseName:
1840 * @ctxt: an XML parser context
1841 *
1842 * parse an XML name.
1843 *
1844 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1845 * CombiningChar | Extender
1846 *
1847 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1848 *
1849 * [6] Names ::= Name (S Name)*
1850 *
1851 * Returns the Name parsed or NULL
1852 */
1853
1854xmlChar *
1855xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001856 const xmlChar *in;
1857 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001858 int count = 0;
1859
1860 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001861
1862 /*
1863 * Accelerator for simple ASCII names
1864 */
1865 in = ctxt->input->cur;
1866 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1867 ((*in >= 0x41) && (*in <= 0x5A)) ||
1868 (*in == '_') || (*in == ':')) {
1869 in++;
1870 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1871 ((*in >= 0x41) && (*in <= 0x5A)) ||
1872 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001873 (*in == '_') || (*in == '-') ||
1874 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001875 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001876 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001877 count = in - ctxt->input->cur;
1878 ret = xmlStrndup(ctxt->input->cur, count);
1879 ctxt->input->cur = in;
1880 return(ret);
1881 }
1882 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001883 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001884}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001885
Daniel Veillard46de64e2002-05-29 08:21:33 +00001886/**
1887 * xmlParseNameAndCompare:
1888 * @ctxt: an XML parser context
1889 *
1890 * parse an XML name and compares for match
1891 * (specialized for endtag parsing)
1892 *
1893 *
1894 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1895 * and the name for mismatch
1896 */
1897
Daniel Veillardf4862f02002-09-10 11:13:43 +00001898static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001899xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1900 const xmlChar *cmp = other;
1901 const xmlChar *in;
1902 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001903
1904 GROW;
1905
1906 in = ctxt->input->cur;
1907 while (*in != 0 && *in == *cmp) {
1908 ++in;
1909 ++cmp;
1910 }
1911 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1912 /* success */
1913 ctxt->input->cur = in;
1914 return (xmlChar*) 1;
1915 }
1916 /* failure (or end of input buffer), check with full function */
1917 ret = xmlParseName (ctxt);
1918 if (ret != 0 && xmlStrEqual (ret, other)) {
1919 xmlFree (ret);
1920 return (xmlChar*) 1;
1921 }
1922 return ret;
1923}
1924
Daniel Veillard76d66f42001-05-16 21:05:17 +00001925static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001926xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1927 xmlChar buf[XML_MAX_NAMELEN + 5];
1928 int len = 0, l;
1929 int c;
1930 int count = 0;
1931
1932 /*
1933 * Handler for more complex cases
1934 */
1935 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001936 c = CUR_CHAR(l);
1937 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1938 (!IS_LETTER(c) && (c != '_') &&
1939 (c != ':'))) {
1940 return(NULL);
1941 }
1942
1943 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1944 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1945 (c == '.') || (c == '-') ||
1946 (c == '_') || (c == ':') ||
1947 (IS_COMBINING(c)) ||
1948 (IS_EXTENDER(c)))) {
1949 if (count++ > 100) {
1950 count = 0;
1951 GROW;
1952 }
1953 COPY_BUF(l,buf,len,c);
1954 NEXTL(l);
1955 c = CUR_CHAR(l);
1956 if (len >= XML_MAX_NAMELEN) {
1957 /*
1958 * Okay someone managed to make a huge name, so he's ready to pay
1959 * for the processing speed.
1960 */
1961 xmlChar *buffer;
1962 int max = len * 2;
1963
1964 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1965 if (buffer == NULL) {
1966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1967 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001968 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001969 return(NULL);
1970 }
1971 memcpy(buffer, buf, len);
1972 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1973 (c == '.') || (c == '-') ||
1974 (c == '_') || (c == ':') ||
1975 (IS_COMBINING(c)) ||
1976 (IS_EXTENDER(c))) {
1977 if (count++ > 100) {
1978 count = 0;
1979 GROW;
1980 }
1981 if (len + 10 > max) {
1982 max *= 2;
1983 buffer = (xmlChar *) xmlRealloc(buffer,
1984 max * sizeof(xmlChar));
1985 if (buffer == NULL) {
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1987 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001988 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001989 return(NULL);
1990 }
1991 }
1992 COPY_BUF(l,buffer,len,c);
1993 NEXTL(l);
1994 c = CUR_CHAR(l);
1995 }
1996 buffer[len] = 0;
1997 return(buffer);
1998 }
1999 }
2000 return(xmlStrndup(buf, len));
2001}
2002
2003/**
2004 * xmlParseStringName:
2005 * @ctxt: an XML parser context
2006 * @str: a pointer to the string pointer (IN/OUT)
2007 *
2008 * parse an XML name.
2009 *
2010 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2011 * CombiningChar | Extender
2012 *
2013 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2014 *
2015 * [6] Names ::= Name (S Name)*
2016 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002017 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002018 * is updated to the current location in the string.
2019 */
2020
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002021static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002022xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2023 xmlChar buf[XML_MAX_NAMELEN + 5];
2024 const xmlChar *cur = *str;
2025 int len = 0, l;
2026 int c;
2027
2028 c = CUR_SCHAR(cur, l);
2029 if (!IS_LETTER(c) && (c != '_') &&
2030 (c != ':')) {
2031 return(NULL);
2032 }
2033
2034 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2035 (c == '.') || (c == '-') ||
2036 (c == '_') || (c == ':') ||
2037 (IS_COMBINING(c)) ||
2038 (IS_EXTENDER(c))) {
2039 COPY_BUF(l,buf,len,c);
2040 cur += l;
2041 c = CUR_SCHAR(cur, l);
2042 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2043 /*
2044 * Okay someone managed to make a huge name, so he's ready to pay
2045 * for the processing speed.
2046 */
2047 xmlChar *buffer;
2048 int max = len * 2;
2049
2050 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2051 if (buffer == NULL) {
2052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2053 ctxt->sax->error(ctxt->userData,
2054 "xmlParseStringName: out of memory\n");
2055 return(NULL);
2056 }
2057 memcpy(buffer, buf, len);
2058 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2059 (c == '.') || (c == '-') ||
2060 (c == '_') || (c == ':') ||
2061 (IS_COMBINING(c)) ||
2062 (IS_EXTENDER(c))) {
2063 if (len + 10 > max) {
2064 max *= 2;
2065 buffer = (xmlChar *) xmlRealloc(buffer,
2066 max * sizeof(xmlChar));
2067 if (buffer == NULL) {
2068 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2069 ctxt->sax->error(ctxt->userData,
2070 "xmlParseStringName: out of memory\n");
2071 return(NULL);
2072 }
2073 }
2074 COPY_BUF(l,buffer,len,c);
2075 cur += l;
2076 c = CUR_SCHAR(cur, l);
2077 }
2078 buffer[len] = 0;
2079 *str = cur;
2080 return(buffer);
2081 }
2082 }
2083 *str = cur;
2084 return(xmlStrndup(buf, len));
2085}
2086
2087/**
2088 * xmlParseNmtoken:
2089 * @ctxt: an XML parser context
2090 *
2091 * parse an XML Nmtoken.
2092 *
2093 * [7] Nmtoken ::= (NameChar)+
2094 *
2095 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2096 *
2097 * Returns the Nmtoken parsed or NULL
2098 */
2099
2100xmlChar *
2101xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2102 xmlChar buf[XML_MAX_NAMELEN + 5];
2103 int len = 0, l;
2104 int c;
2105 int count = 0;
2106
2107 GROW;
2108 c = CUR_CHAR(l);
2109
2110 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2111 (c == '.') || (c == '-') ||
2112 (c == '_') || (c == ':') ||
2113 (IS_COMBINING(c)) ||
2114 (IS_EXTENDER(c))) {
2115 if (count++ > 100) {
2116 count = 0;
2117 GROW;
2118 }
2119 COPY_BUF(l,buf,len,c);
2120 NEXTL(l);
2121 c = CUR_CHAR(l);
2122 if (len >= XML_MAX_NAMELEN) {
2123 /*
2124 * Okay someone managed to make a huge token, so he's ready to pay
2125 * for the processing speed.
2126 */
2127 xmlChar *buffer;
2128 int max = len * 2;
2129
2130 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2131 if (buffer == NULL) {
2132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2133 ctxt->sax->error(ctxt->userData,
2134 "xmlParseNmtoken: out of memory\n");
2135 return(NULL);
2136 }
2137 memcpy(buffer, buf, len);
2138 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2139 (c == '.') || (c == '-') ||
2140 (c == '_') || (c == ':') ||
2141 (IS_COMBINING(c)) ||
2142 (IS_EXTENDER(c))) {
2143 if (count++ > 100) {
2144 count = 0;
2145 GROW;
2146 }
2147 if (len + 10 > max) {
2148 max *= 2;
2149 buffer = (xmlChar *) xmlRealloc(buffer,
2150 max * sizeof(xmlChar));
2151 if (buffer == NULL) {
2152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2153 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002154 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002155 return(NULL);
2156 }
2157 }
2158 COPY_BUF(l,buffer,len,c);
2159 NEXTL(l);
2160 c = CUR_CHAR(l);
2161 }
2162 buffer[len] = 0;
2163 return(buffer);
2164 }
2165 }
2166 if (len == 0)
2167 return(NULL);
2168 return(xmlStrndup(buf, len));
2169}
2170
2171/**
2172 * xmlParseEntityValue:
2173 * @ctxt: an XML parser context
2174 * @orig: if non-NULL store a copy of the original entity value
2175 *
2176 * parse a value for ENTITY declarations
2177 *
2178 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2179 * "'" ([^%&'] | PEReference | Reference)* "'"
2180 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002181 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002182 */
2183
2184xmlChar *
2185xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2186 xmlChar *buf = NULL;
2187 int len = 0;
2188 int size = XML_PARSER_BUFFER_SIZE;
2189 int c, l;
2190 xmlChar stop;
2191 xmlChar *ret = NULL;
2192 const xmlChar *cur = NULL;
2193 xmlParserInputPtr input;
2194
2195 if (RAW == '"') stop = '"';
2196 else if (RAW == '\'') stop = '\'';
2197 else {
2198 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2200 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2201 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002202 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 return(NULL);
2204 }
2205 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2206 if (buf == NULL) {
2207 xmlGenericError(xmlGenericErrorContext,
2208 "malloc of %d byte failed\n", size);
2209 return(NULL);
2210 }
2211
2212 /*
2213 * The content of the entity definition is copied in a buffer.
2214 */
2215
2216 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2217 input = ctxt->input;
2218 GROW;
2219 NEXT;
2220 c = CUR_CHAR(l);
2221 /*
2222 * NOTE: 4.4.5 Included in Literal
2223 * When a parameter entity reference appears in a literal entity
2224 * value, ... a single or double quote character in the replacement
2225 * text is always treated as a normal data character and will not
2226 * terminate the literal.
2227 * In practice it means we stop the loop only when back at parsing
2228 * the initial entity and the quote is found
2229 */
2230 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2231 (ctxt->input != input))) {
2232 if (len + 5 >= size) {
2233 size *= 2;
2234 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2235 if (buf == NULL) {
2236 xmlGenericError(xmlGenericErrorContext,
2237 "realloc of %d byte failed\n", size);
2238 return(NULL);
2239 }
2240 }
2241 COPY_BUF(l,buf,len,c);
2242 NEXTL(l);
2243 /*
2244 * Pop-up of finished entities.
2245 */
2246 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2247 xmlPopInput(ctxt);
2248
2249 GROW;
2250 c = CUR_CHAR(l);
2251 if (c == 0) {
2252 GROW;
2253 c = CUR_CHAR(l);
2254 }
2255 }
2256 buf[len] = 0;
2257
2258 /*
2259 * Raise problem w.r.t. '&' and '%' being used in non-entities
2260 * reference constructs. Note Charref will be handled in
2261 * xmlStringDecodeEntities()
2262 */
2263 cur = buf;
2264 while (*cur != 0) { /* non input consuming */
2265 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2266 xmlChar *name;
2267 xmlChar tmp = *cur;
2268
2269 cur++;
2270 name = xmlParseStringName(ctxt, &cur);
2271 if ((name == NULL) || (*cur != ';')) {
2272 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2274 ctxt->sax->error(ctxt->userData,
2275 "EntityValue: '%c' forbidden except for entities references\n",
2276 tmp);
2277 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002278 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002279 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002280 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2281 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002282 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2284 ctxt->sax->error(ctxt->userData,
2285 "EntityValue: PEReferences forbidden in internal subset\n",
2286 tmp);
2287 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002288 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002289 }
2290 if (name != NULL)
2291 xmlFree(name);
2292 }
2293 cur++;
2294 }
2295
2296 /*
2297 * Then PEReference entities are substituted.
2298 */
2299 if (c != stop) {
2300 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2302 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2303 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002304 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002305 xmlFree(buf);
2306 } else {
2307 NEXT;
2308 /*
2309 * NOTE: 4.4.7 Bypassed
2310 * When a general entity reference appears in the EntityValue in
2311 * an entity declaration, it is bypassed and left as is.
2312 * so XML_SUBSTITUTE_REF is not set here.
2313 */
2314 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2315 0, 0, 0);
2316 if (orig != NULL)
2317 *orig = buf;
2318 else
2319 xmlFree(buf);
2320 }
2321
2322 return(ret);
2323}
2324
2325/**
2326 * xmlParseAttValue:
2327 * @ctxt: an XML parser context
2328 *
2329 * parse a value for an attribute
2330 * Note: the parser won't do substitution of entities here, this
2331 * will be handled later in xmlStringGetNodeList
2332 *
2333 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2334 * "'" ([^<&'] | Reference)* "'"
2335 *
2336 * 3.3.3 Attribute-Value Normalization:
2337 * Before the value of an attribute is passed to the application or
2338 * checked for validity, the XML processor must normalize it as follows:
2339 * - a character reference is processed by appending the referenced
2340 * character to the attribute value
2341 * - an entity reference is processed by recursively processing the
2342 * replacement text of the entity
2343 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2344 * appending #x20 to the normalized value, except that only a single
2345 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2346 * parsed entity or the literal entity value of an internal parsed entity
2347 * - other characters are processed by appending them to the normalized value
2348 * If the declared value is not CDATA, then the XML processor must further
2349 * process the normalized attribute value by discarding any leading and
2350 * trailing space (#x20) characters, and by replacing sequences of space
2351 * (#x20) characters by a single space (#x20) character.
2352 * All attributes for which no declaration has been read should be treated
2353 * by a non-validating parser as if declared CDATA.
2354 *
2355 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2356 */
2357
2358xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002359xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2360
2361xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002362xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2363 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002364 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002365 xmlChar *ret = NULL;
2366 SHRINK;
2367 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002368 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002369 if (*in != '"' && *in != '\'') {
2370 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2372 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2373 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002374 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002375 return(NULL);
2376 }
2377 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2378 limit = *in;
2379 ++in;
2380
2381 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2382 *in != '&' && *in != '<'
2383 ) {
2384 ++in;
2385 }
2386 if (*in != limit) {
2387 return xmlParseAttValueComplex(ctxt);
2388 }
2389 ++in;
2390 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2391 CUR_PTR = in;
2392 return ret;
2393}
2394
Daniel Veillard01c13b52002-12-10 15:19:08 +00002395/**
2396 * xmlParseAttValueComplex:
2397 * @ctxt: an XML parser context
2398 *
2399 * parse a value for an attribute, this is the fallback function
2400 * of xmlParseAttValue() when the attribute parsing requires handling
2401 * of non-ASCII characters.
2402 *
2403 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2404 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002405xmlChar *
2406xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2407 xmlChar limit = 0;
2408 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002409 int len = 0;
2410 int buf_size = 0;
2411 int c, l;
2412 xmlChar *current = NULL;
2413 xmlEntityPtr ent;
2414
2415
2416 SHRINK;
2417 if (NXT(0) == '"') {
2418 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2419 limit = '"';
2420 NEXT;
2421 } else if (NXT(0) == '\'') {
2422 limit = '\'';
2423 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2424 NEXT;
2425 } else {
2426 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2427 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2428 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2429 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002430 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002431 return(NULL);
2432 }
2433
2434 /*
2435 * allocate a translation buffer.
2436 */
2437 buf_size = XML_PARSER_BUFFER_SIZE;
2438 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2439 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002440 xmlGenericError(xmlGenericErrorContext,
2441 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002442 return(NULL);
2443 }
2444
2445 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002446 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002447 */
2448 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002449 while ((NXT(0) != limit) && /* checked */
2450 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002451 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002452 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002453 if (NXT(1) == '#') {
2454 int val = xmlParseCharRef(ctxt);
2455 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002456 if (ctxt->replaceEntities) {
2457 if (len > buf_size - 10) {
2458 growBuffer(buf);
2459 }
2460 buf[len++] = '&';
2461 } else {
2462 /*
2463 * The reparsing will be done in xmlStringGetNodeList()
2464 * called by the attribute() function in SAX.c
2465 */
2466 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002467
Daniel Veillard319a7422001-09-11 09:27:09 +00002468 if (len > buf_size - 10) {
2469 growBuffer(buf);
2470 }
2471 current = &buffer[0];
2472 while (*current != 0) { /* non input consuming */
2473 buf[len++] = *current++;
2474 }
Owen Taylor3473f882001-02-23 17:55:21 +00002475 }
2476 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002477 if (len > buf_size - 10) {
2478 growBuffer(buf);
2479 }
Owen Taylor3473f882001-02-23 17:55:21 +00002480 len += xmlCopyChar(0, &buf[len], val);
2481 }
2482 } else {
2483 ent = xmlParseEntityRef(ctxt);
2484 if ((ent != NULL) &&
2485 (ctxt->replaceEntities != 0)) {
2486 xmlChar *rep;
2487
2488 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2489 rep = xmlStringDecodeEntities(ctxt, ent->content,
2490 XML_SUBSTITUTE_REF, 0, 0, 0);
2491 if (rep != NULL) {
2492 current = rep;
2493 while (*current != 0) { /* non input consuming */
2494 buf[len++] = *current++;
2495 if (len > buf_size - 10) {
2496 growBuffer(buf);
2497 }
2498 }
2499 xmlFree(rep);
2500 }
2501 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002502 if (len > buf_size - 10) {
2503 growBuffer(buf);
2504 }
Owen Taylor3473f882001-02-23 17:55:21 +00002505 if (ent->content != NULL)
2506 buf[len++] = ent->content[0];
2507 }
2508 } else if (ent != NULL) {
2509 int i = xmlStrlen(ent->name);
2510 const xmlChar *cur = ent->name;
2511
2512 /*
2513 * This may look absurd but is needed to detect
2514 * entities problems
2515 */
2516 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2517 (ent->content != NULL)) {
2518 xmlChar *rep;
2519 rep = xmlStringDecodeEntities(ctxt, ent->content,
2520 XML_SUBSTITUTE_REF, 0, 0, 0);
2521 if (rep != NULL)
2522 xmlFree(rep);
2523 }
2524
2525 /*
2526 * Just output the reference
2527 */
2528 buf[len++] = '&';
2529 if (len > buf_size - i - 10) {
2530 growBuffer(buf);
2531 }
2532 for (;i > 0;i--)
2533 buf[len++] = *cur++;
2534 buf[len++] = ';';
2535 }
2536 }
2537 } else {
2538 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2539 COPY_BUF(l,buf,len,0x20);
2540 if (len > buf_size - 10) {
2541 growBuffer(buf);
2542 }
2543 } else {
2544 COPY_BUF(l,buf,len,c);
2545 if (len > buf_size - 10) {
2546 growBuffer(buf);
2547 }
2548 }
2549 NEXTL(l);
2550 }
2551 GROW;
2552 c = CUR_CHAR(l);
2553 }
2554 buf[len++] = 0;
2555 if (RAW == '<') {
2556 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2558 ctxt->sax->error(ctxt->userData,
2559 "Unescaped '<' not allowed in attributes values\n");
2560 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002561 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002562 } else if (RAW != limit) {
2563 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2565 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2566 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002567 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002568 } else
2569 NEXT;
2570 return(buf);
2571}
2572
2573/**
2574 * xmlParseSystemLiteral:
2575 * @ctxt: an XML parser context
2576 *
2577 * parse an XML Literal
2578 *
2579 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2580 *
2581 * Returns the SystemLiteral parsed or NULL
2582 */
2583
2584xmlChar *
2585xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2586 xmlChar *buf = NULL;
2587 int len = 0;
2588 int size = XML_PARSER_BUFFER_SIZE;
2589 int cur, l;
2590 xmlChar stop;
2591 int state = ctxt->instate;
2592 int count = 0;
2593
2594 SHRINK;
2595 if (RAW == '"') {
2596 NEXT;
2597 stop = '"';
2598 } else if (RAW == '\'') {
2599 NEXT;
2600 stop = '\'';
2601 } else {
2602 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2604 ctxt->sax->error(ctxt->userData,
2605 "SystemLiteral \" or ' expected\n");
2606 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002607 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002608 return(NULL);
2609 }
2610
2611 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2612 if (buf == NULL) {
2613 xmlGenericError(xmlGenericErrorContext,
2614 "malloc of %d byte failed\n", size);
2615 return(NULL);
2616 }
2617 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2618 cur = CUR_CHAR(l);
2619 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2620 if (len + 5 >= size) {
2621 size *= 2;
2622 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2623 if (buf == NULL) {
2624 xmlGenericError(xmlGenericErrorContext,
2625 "realloc of %d byte failed\n", size);
2626 ctxt->instate = (xmlParserInputState) state;
2627 return(NULL);
2628 }
2629 }
2630 count++;
2631 if (count > 50) {
2632 GROW;
2633 count = 0;
2634 }
2635 COPY_BUF(l,buf,len,cur);
2636 NEXTL(l);
2637 cur = CUR_CHAR(l);
2638 if (cur == 0) {
2639 GROW;
2640 SHRINK;
2641 cur = CUR_CHAR(l);
2642 }
2643 }
2644 buf[len] = 0;
2645 ctxt->instate = (xmlParserInputState) state;
2646 if (!IS_CHAR(cur)) {
2647 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2649 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2650 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002651 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002652 } else {
2653 NEXT;
2654 }
2655 return(buf);
2656}
2657
2658/**
2659 * xmlParsePubidLiteral:
2660 * @ctxt: an XML parser context
2661 *
2662 * parse an XML public literal
2663 *
2664 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2665 *
2666 * Returns the PubidLiteral parsed or NULL.
2667 */
2668
2669xmlChar *
2670xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2671 xmlChar *buf = NULL;
2672 int len = 0;
2673 int size = XML_PARSER_BUFFER_SIZE;
2674 xmlChar cur;
2675 xmlChar stop;
2676 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002677 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002678
2679 SHRINK;
2680 if (RAW == '"') {
2681 NEXT;
2682 stop = '"';
2683 } else if (RAW == '\'') {
2684 NEXT;
2685 stop = '\'';
2686 } else {
2687 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2689 ctxt->sax->error(ctxt->userData,
2690 "SystemLiteral \" or ' expected\n");
2691 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002692 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002693 return(NULL);
2694 }
2695 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2696 if (buf == NULL) {
2697 xmlGenericError(xmlGenericErrorContext,
2698 "malloc of %d byte failed\n", size);
2699 return(NULL);
2700 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002701 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002702 cur = CUR;
2703 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2704 if (len + 1 >= size) {
2705 size *= 2;
2706 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2707 if (buf == NULL) {
2708 xmlGenericError(xmlGenericErrorContext,
2709 "realloc of %d byte failed\n", size);
2710 return(NULL);
2711 }
2712 }
2713 buf[len++] = cur;
2714 count++;
2715 if (count > 50) {
2716 GROW;
2717 count = 0;
2718 }
2719 NEXT;
2720 cur = CUR;
2721 if (cur == 0) {
2722 GROW;
2723 SHRINK;
2724 cur = CUR;
2725 }
2726 }
2727 buf[len] = 0;
2728 if (cur != stop) {
2729 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2731 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2732 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002733 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002734 } else {
2735 NEXT;
2736 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002737 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002738 return(buf);
2739}
2740
Daniel Veillard48b2f892001-02-25 16:11:03 +00002741void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002742/**
2743 * xmlParseCharData:
2744 * @ctxt: an XML parser context
2745 * @cdata: int indicating whether we are within a CDATA section
2746 *
2747 * parse a CharData section.
2748 * if we are within a CDATA section ']]>' marks an end of section.
2749 *
2750 * The right angle bracket (>) may be represented using the string "&gt;",
2751 * and must, for compatibility, be escaped using "&gt;" or a character
2752 * reference when it appears in the string "]]>" in content, when that
2753 * string is not marking the end of a CDATA section.
2754 *
2755 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2756 */
2757
2758void
2759xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002760 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002761 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002762 int line = ctxt->input->line;
2763 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002764
2765 SHRINK;
2766 GROW;
2767 /*
2768 * Accelerated common case where input don't need to be
2769 * modified before passing it to the handler.
2770 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002771 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002772 in = ctxt->input->cur;
2773 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002774get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002775 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2776 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002777 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002778 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002779 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002780 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002781 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002782 ctxt->input->line++;
2783 in++;
2784 }
2785 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002786 }
2787 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002788 if ((in[1] == ']') && (in[2] == '>')) {
2789 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2791 ctxt->sax->error(ctxt->userData,
2792 "Sequence ']]>' not allowed in content\n");
2793 ctxt->input->cur = in;
2794 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002795 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002796 return;
2797 }
2798 in++;
2799 goto get_more;
2800 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002801 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002802 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002803 if (IS_BLANK(*ctxt->input->cur)) {
2804 const xmlChar *tmp = ctxt->input->cur;
2805 ctxt->input->cur = in;
2806 if (areBlanks(ctxt, tmp, nbchar)) {
2807 if (ctxt->sax->ignorableWhitespace != NULL)
2808 ctxt->sax->ignorableWhitespace(ctxt->userData,
2809 tmp, nbchar);
2810 } else {
2811 if (ctxt->sax->characters != NULL)
2812 ctxt->sax->characters(ctxt->userData,
2813 tmp, nbchar);
2814 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002815 line = ctxt->input->line;
2816 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002817 } else {
2818 if (ctxt->sax->characters != NULL)
2819 ctxt->sax->characters(ctxt->userData,
2820 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002821 line = ctxt->input->line;
2822 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002823 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002824 }
2825 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002826 if (*in == 0xD) {
2827 in++;
2828 if (*in == 0xA) {
2829 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002830 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002831 ctxt->input->line++;
2832 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002833 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002834 in--;
2835 }
2836 if (*in == '<') {
2837 return;
2838 }
2839 if (*in == '&') {
2840 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002841 }
2842 SHRINK;
2843 GROW;
2844 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002845 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002846 nbchar = 0;
2847 }
Daniel Veillard50582112001-03-26 22:52:16 +00002848 ctxt->input->line = line;
2849 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002850 xmlParseCharDataComplex(ctxt, cdata);
2851}
2852
Daniel Veillard01c13b52002-12-10 15:19:08 +00002853/**
2854 * xmlParseCharDataComplex:
2855 * @ctxt: an XML parser context
2856 * @cdata: int indicating whether we are within a CDATA section
2857 *
2858 * parse a CharData section.this is the fallback function
2859 * of xmlParseCharData() when the parsing requires handling
2860 * of non-ASCII characters.
2861 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002862void
2863xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002864 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2865 int nbchar = 0;
2866 int cur, l;
2867 int count = 0;
2868
2869 SHRINK;
2870 GROW;
2871 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002872 while ((cur != '<') && /* checked */
2873 (cur != '&') &&
2874 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002875 if ((cur == ']') && (NXT(1) == ']') &&
2876 (NXT(2) == '>')) {
2877 if (cdata) break;
2878 else {
2879 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2881 ctxt->sax->error(ctxt->userData,
2882 "Sequence ']]>' not allowed in content\n");
2883 /* Should this be relaxed ??? I see a "must here */
2884 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002885 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002886 }
2887 }
2888 COPY_BUF(l,buf,nbchar,cur);
2889 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2890 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002891 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002892 */
2893 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2894 if (areBlanks(ctxt, buf, nbchar)) {
2895 if (ctxt->sax->ignorableWhitespace != NULL)
2896 ctxt->sax->ignorableWhitespace(ctxt->userData,
2897 buf, nbchar);
2898 } else {
2899 if (ctxt->sax->characters != NULL)
2900 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2901 }
2902 }
2903 nbchar = 0;
2904 }
2905 count++;
2906 if (count > 50) {
2907 GROW;
2908 count = 0;
2909 }
2910 NEXTL(l);
2911 cur = CUR_CHAR(l);
2912 }
2913 if (nbchar != 0) {
2914 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002915 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002916 */
2917 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2918 if (areBlanks(ctxt, buf, nbchar)) {
2919 if (ctxt->sax->ignorableWhitespace != NULL)
2920 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2921 } else {
2922 if (ctxt->sax->characters != NULL)
2923 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2924 }
2925 }
2926 }
2927}
2928
2929/**
2930 * xmlParseExternalID:
2931 * @ctxt: an XML parser context
2932 * @publicID: a xmlChar** receiving PubidLiteral
2933 * @strict: indicate whether we should restrict parsing to only
2934 * production [75], see NOTE below
2935 *
2936 * Parse an External ID or a Public ID
2937 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002938 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002939 * 'PUBLIC' S PubidLiteral S SystemLiteral
2940 *
2941 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2942 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2943 *
2944 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2945 *
2946 * Returns the function returns SystemLiteral and in the second
2947 * case publicID receives PubidLiteral, is strict is off
2948 * it is possible to return NULL and have publicID set.
2949 */
2950
2951xmlChar *
2952xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2953 xmlChar *URI = NULL;
2954
2955 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002956
2957 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002958 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2959 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2960 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2961 SKIP(6);
2962 if (!IS_BLANK(CUR)) {
2963 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2965 ctxt->sax->error(ctxt->userData,
2966 "Space required after 'SYSTEM'\n");
2967 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002968 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002969 }
2970 SKIP_BLANKS;
2971 URI = xmlParseSystemLiteral(ctxt);
2972 if (URI == NULL) {
2973 ctxt->errNo = XML_ERR_URI_REQUIRED;
2974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2975 ctxt->sax->error(ctxt->userData,
2976 "xmlParseExternalID: SYSTEM, no URI\n");
2977 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002978 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002979 }
2980 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2981 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2982 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2983 SKIP(6);
2984 if (!IS_BLANK(CUR)) {
2985 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2987 ctxt->sax->error(ctxt->userData,
2988 "Space required after 'PUBLIC'\n");
2989 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002990 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002991 }
2992 SKIP_BLANKS;
2993 *publicID = xmlParsePubidLiteral(ctxt);
2994 if (*publicID == NULL) {
2995 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2997 ctxt->sax->error(ctxt->userData,
2998 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2999 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003000 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003001 }
3002 if (strict) {
3003 /*
3004 * We don't handle [83] so "S SystemLiteral" is required.
3005 */
3006 if (!IS_BLANK(CUR)) {
3007 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3009 ctxt->sax->error(ctxt->userData,
3010 "Space required after the Public Identifier\n");
3011 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003012 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003013 }
3014 } else {
3015 /*
3016 * We handle [83] so we return immediately, if
3017 * "S SystemLiteral" is not detected. From a purely parsing
3018 * point of view that's a nice mess.
3019 */
3020 const xmlChar *ptr;
3021 GROW;
3022
3023 ptr = CUR_PTR;
3024 if (!IS_BLANK(*ptr)) return(NULL);
3025
3026 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3027 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3028 }
3029 SKIP_BLANKS;
3030 URI = xmlParseSystemLiteral(ctxt);
3031 if (URI == NULL) {
3032 ctxt->errNo = XML_ERR_URI_REQUIRED;
3033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3034 ctxt->sax->error(ctxt->userData,
3035 "xmlParseExternalID: PUBLIC, no URI\n");
3036 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003037 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003038 }
3039 }
3040 return(URI);
3041}
3042
3043/**
3044 * xmlParseComment:
3045 * @ctxt: an XML parser context
3046 *
3047 * Skip an XML (SGML) comment <!-- .... -->
3048 * The spec says that "For compatibility, the string "--" (double-hyphen)
3049 * must not occur within comments. "
3050 *
3051 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3052 */
3053void
3054xmlParseComment(xmlParserCtxtPtr ctxt) {
3055 xmlChar *buf = NULL;
3056 int len;
3057 int size = XML_PARSER_BUFFER_SIZE;
3058 int q, ql;
3059 int r, rl;
3060 int cur, l;
3061 xmlParserInputState state;
3062 xmlParserInputPtr input = ctxt->input;
3063 int count = 0;
3064
3065 /*
3066 * Check that there is a comment right here.
3067 */
3068 if ((RAW != '<') || (NXT(1) != '!') ||
3069 (NXT(2) != '-') || (NXT(3) != '-')) return;
3070
3071 state = ctxt->instate;
3072 ctxt->instate = XML_PARSER_COMMENT;
3073 SHRINK;
3074 SKIP(4);
3075 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3076 if (buf == NULL) {
3077 xmlGenericError(xmlGenericErrorContext,
3078 "malloc of %d byte failed\n", size);
3079 ctxt->instate = state;
3080 return;
3081 }
3082 q = CUR_CHAR(ql);
3083 NEXTL(ql);
3084 r = CUR_CHAR(rl);
3085 NEXTL(rl);
3086 cur = CUR_CHAR(l);
3087 len = 0;
3088 while (IS_CHAR(cur) && /* checked */
3089 ((cur != '>') ||
3090 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003091 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003092 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3094 ctxt->sax->error(ctxt->userData,
3095 "Comment must not contain '--' (double-hyphen)`\n");
3096 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003097 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003098 }
3099 if (len + 5 >= size) {
3100 size *= 2;
3101 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3102 if (buf == NULL) {
3103 xmlGenericError(xmlGenericErrorContext,
3104 "realloc of %d byte failed\n", size);
3105 ctxt->instate = state;
3106 return;
3107 }
3108 }
3109 COPY_BUF(ql,buf,len,q);
3110 q = r;
3111 ql = rl;
3112 r = cur;
3113 rl = l;
3114
3115 count++;
3116 if (count > 50) {
3117 GROW;
3118 count = 0;
3119 }
3120 NEXTL(l);
3121 cur = CUR_CHAR(l);
3122 if (cur == 0) {
3123 SHRINK;
3124 GROW;
3125 cur = CUR_CHAR(l);
3126 }
3127 }
3128 buf[len] = 0;
3129 if (!IS_CHAR(cur)) {
3130 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3132 ctxt->sax->error(ctxt->userData,
3133 "Comment not terminated \n<!--%.50s\n", buf);
3134 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003135 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003136 xmlFree(buf);
3137 } else {
3138 if (input != ctxt->input) {
3139 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3141 ctxt->sax->error(ctxt->userData,
3142"Comment doesn't start and stop in the same entity\n");
3143 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003144 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003145 }
3146 NEXT;
3147 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3148 (!ctxt->disableSAX))
3149 ctxt->sax->comment(ctxt->userData, buf);
3150 xmlFree(buf);
3151 }
3152 ctxt->instate = state;
3153}
3154
3155/**
3156 * xmlParsePITarget:
3157 * @ctxt: an XML parser context
3158 *
3159 * parse the name of a PI
3160 *
3161 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3162 *
3163 * Returns the PITarget name or NULL
3164 */
3165
3166xmlChar *
3167xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3168 xmlChar *name;
3169
3170 name = xmlParseName(ctxt);
3171 if ((name != NULL) &&
3172 ((name[0] == 'x') || (name[0] == 'X')) &&
3173 ((name[1] == 'm') || (name[1] == 'M')) &&
3174 ((name[2] == 'l') || (name[2] == 'L'))) {
3175 int i;
3176 if ((name[0] == 'x') && (name[1] == 'm') &&
3177 (name[2] == 'l') && (name[3] == 0)) {
3178 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3179 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3180 ctxt->sax->error(ctxt->userData,
3181 "XML declaration allowed only at the start of the document\n");
3182 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003183 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003184 return(name);
3185 } else if (name[3] == 0) {
3186 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3188 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3189 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003190 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003191 return(name);
3192 }
3193 for (i = 0;;i++) {
3194 if (xmlW3CPIs[i] == NULL) break;
3195 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3196 return(name);
3197 }
3198 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3199 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3200 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003201 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003202 }
3203 }
3204 return(name);
3205}
3206
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003207#ifdef LIBXML_CATALOG_ENABLED
3208/**
3209 * xmlParseCatalogPI:
3210 * @ctxt: an XML parser context
3211 * @catalog: the PI value string
3212 *
3213 * parse an XML Catalog Processing Instruction.
3214 *
3215 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3216 *
3217 * Occurs only if allowed by the user and if happening in the Misc
3218 * part of the document before any doctype informations
3219 * This will add the given catalog to the parsing context in order
3220 * to be used if there is a resolution need further down in the document
3221 */
3222
3223static void
3224xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3225 xmlChar *URL = NULL;
3226 const xmlChar *tmp, *base;
3227 xmlChar marker;
3228
3229 tmp = catalog;
3230 while (IS_BLANK(*tmp)) tmp++;
3231 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3232 goto error;
3233 tmp += 7;
3234 while (IS_BLANK(*tmp)) tmp++;
3235 if (*tmp != '=') {
3236 return;
3237 }
3238 tmp++;
3239 while (IS_BLANK(*tmp)) tmp++;
3240 marker = *tmp;
3241 if ((marker != '\'') && (marker != '"'))
3242 goto error;
3243 tmp++;
3244 base = tmp;
3245 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3246 if (*tmp == 0)
3247 goto error;
3248 URL = xmlStrndup(base, tmp - base);
3249 tmp++;
3250 while (IS_BLANK(*tmp)) tmp++;
3251 if (*tmp != 0)
3252 goto error;
3253
3254 if (URL != NULL) {
3255 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3256 xmlFree(URL);
3257 }
3258 return;
3259
3260error:
3261 ctxt->errNo = XML_WAR_CATALOG_PI;
3262 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3263 ctxt->sax->warning(ctxt->userData,
3264 "Catalog PI syntax error: %s\n", catalog);
3265 if (URL != NULL)
3266 xmlFree(URL);
3267}
3268#endif
3269
Owen Taylor3473f882001-02-23 17:55:21 +00003270/**
3271 * xmlParsePI:
3272 * @ctxt: an XML parser context
3273 *
3274 * parse an XML Processing Instruction.
3275 *
3276 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3277 *
3278 * The processing is transfered to SAX once parsed.
3279 */
3280
3281void
3282xmlParsePI(xmlParserCtxtPtr ctxt) {
3283 xmlChar *buf = NULL;
3284 int len = 0;
3285 int size = XML_PARSER_BUFFER_SIZE;
3286 int cur, l;
3287 xmlChar *target;
3288 xmlParserInputState state;
3289 int count = 0;
3290
3291 if ((RAW == '<') && (NXT(1) == '?')) {
3292 xmlParserInputPtr input = ctxt->input;
3293 state = ctxt->instate;
3294 ctxt->instate = XML_PARSER_PI;
3295 /*
3296 * this is a Processing Instruction.
3297 */
3298 SKIP(2);
3299 SHRINK;
3300
3301 /*
3302 * Parse the target name and check for special support like
3303 * namespace.
3304 */
3305 target = xmlParsePITarget(ctxt);
3306 if (target != NULL) {
3307 if ((RAW == '?') && (NXT(1) == '>')) {
3308 if (input != ctxt->input) {
3309 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3311 ctxt->sax->error(ctxt->userData,
3312 "PI declaration doesn't start and stop in the same entity\n");
3313 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003314 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003315 }
3316 SKIP(2);
3317
3318 /*
3319 * SAX: PI detected.
3320 */
3321 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3322 (ctxt->sax->processingInstruction != NULL))
3323 ctxt->sax->processingInstruction(ctxt->userData,
3324 target, NULL);
3325 ctxt->instate = state;
3326 xmlFree(target);
3327 return;
3328 }
3329 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3330 if (buf == NULL) {
3331 xmlGenericError(xmlGenericErrorContext,
3332 "malloc of %d byte failed\n", size);
3333 ctxt->instate = state;
3334 return;
3335 }
3336 cur = CUR;
3337 if (!IS_BLANK(cur)) {
3338 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3340 ctxt->sax->error(ctxt->userData,
3341 "xmlParsePI: PI %s space expected\n", target);
3342 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003343 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003344 }
3345 SKIP_BLANKS;
3346 cur = CUR_CHAR(l);
3347 while (IS_CHAR(cur) && /* checked */
3348 ((cur != '?') || (NXT(1) != '>'))) {
3349 if (len + 5 >= size) {
3350 size *= 2;
3351 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3352 if (buf == NULL) {
3353 xmlGenericError(xmlGenericErrorContext,
3354 "realloc of %d byte failed\n", size);
3355 ctxt->instate = state;
3356 return;
3357 }
3358 }
3359 count++;
3360 if (count > 50) {
3361 GROW;
3362 count = 0;
3363 }
3364 COPY_BUF(l,buf,len,cur);
3365 NEXTL(l);
3366 cur = CUR_CHAR(l);
3367 if (cur == 0) {
3368 SHRINK;
3369 GROW;
3370 cur = CUR_CHAR(l);
3371 }
3372 }
3373 buf[len] = 0;
3374 if (cur != '?') {
3375 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3377 ctxt->sax->error(ctxt->userData,
3378 "xmlParsePI: PI %s never end ...\n", target);
3379 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003380 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003381 } else {
3382 if (input != ctxt->input) {
3383 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3385 ctxt->sax->error(ctxt->userData,
3386 "PI declaration doesn't start and stop in the same entity\n");
3387 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003388 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003389 }
3390 SKIP(2);
3391
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003392#ifdef LIBXML_CATALOG_ENABLED
3393 if (((state == XML_PARSER_MISC) ||
3394 (state == XML_PARSER_START)) &&
3395 (xmlStrEqual(target, XML_CATALOG_PI))) {
3396 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3397 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3398 (allow == XML_CATA_ALLOW_ALL))
3399 xmlParseCatalogPI(ctxt, buf);
3400 }
3401#endif
3402
3403
Owen Taylor3473f882001-02-23 17:55:21 +00003404 /*
3405 * SAX: PI detected.
3406 */
3407 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3408 (ctxt->sax->processingInstruction != NULL))
3409 ctxt->sax->processingInstruction(ctxt->userData,
3410 target, buf);
3411 }
3412 xmlFree(buf);
3413 xmlFree(target);
3414 } else {
3415 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3417 ctxt->sax->error(ctxt->userData,
3418 "xmlParsePI : no target name\n");
3419 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003420 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003421 }
3422 ctxt->instate = state;
3423 }
3424}
3425
3426/**
3427 * xmlParseNotationDecl:
3428 * @ctxt: an XML parser context
3429 *
3430 * parse a notation declaration
3431 *
3432 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3433 *
3434 * Hence there is actually 3 choices:
3435 * 'PUBLIC' S PubidLiteral
3436 * 'PUBLIC' S PubidLiteral S SystemLiteral
3437 * and 'SYSTEM' S SystemLiteral
3438 *
3439 * See the NOTE on xmlParseExternalID().
3440 */
3441
3442void
3443xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3444 xmlChar *name;
3445 xmlChar *Pubid;
3446 xmlChar *Systemid;
3447
3448 if ((RAW == '<') && (NXT(1) == '!') &&
3449 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3450 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3451 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3452 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3453 xmlParserInputPtr input = ctxt->input;
3454 SHRINK;
3455 SKIP(10);
3456 if (!IS_BLANK(CUR)) {
3457 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3459 ctxt->sax->error(ctxt->userData,
3460 "Space required after '<!NOTATION'\n");
3461 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003462 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003463 return;
3464 }
3465 SKIP_BLANKS;
3466
Daniel Veillard76d66f42001-05-16 21:05:17 +00003467 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003468 if (name == NULL) {
3469 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3471 ctxt->sax->error(ctxt->userData,
3472 "NOTATION: Name expected here\n");
3473 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003474 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003475 return;
3476 }
3477 if (!IS_BLANK(CUR)) {
3478 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3480 ctxt->sax->error(ctxt->userData,
3481 "Space required after the NOTATION name'\n");
3482 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003483 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003484 return;
3485 }
3486 SKIP_BLANKS;
3487
3488 /*
3489 * Parse the IDs.
3490 */
3491 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3492 SKIP_BLANKS;
3493
3494 if (RAW == '>') {
3495 if (input != ctxt->input) {
3496 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3498 ctxt->sax->error(ctxt->userData,
3499"Notation declaration doesn't start and stop in the same entity\n");
3500 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003501 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003502 }
3503 NEXT;
3504 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3505 (ctxt->sax->notationDecl != NULL))
3506 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3507 } else {
3508 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3510 ctxt->sax->error(ctxt->userData,
3511 "'>' required to close NOTATION declaration\n");
3512 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003513 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003514 }
3515 xmlFree(name);
3516 if (Systemid != NULL) xmlFree(Systemid);
3517 if (Pubid != NULL) xmlFree(Pubid);
3518 }
3519}
3520
3521/**
3522 * xmlParseEntityDecl:
3523 * @ctxt: an XML parser context
3524 *
3525 * parse <!ENTITY declarations
3526 *
3527 * [70] EntityDecl ::= GEDecl | PEDecl
3528 *
3529 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3530 *
3531 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3532 *
3533 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3534 *
3535 * [74] PEDef ::= EntityValue | ExternalID
3536 *
3537 * [76] NDataDecl ::= S 'NDATA' S Name
3538 *
3539 * [ VC: Notation Declared ]
3540 * The Name must match the declared name of a notation.
3541 */
3542
3543void
3544xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3545 xmlChar *name = NULL;
3546 xmlChar *value = NULL;
3547 xmlChar *URI = NULL, *literal = NULL;
3548 xmlChar *ndata = NULL;
3549 int isParameter = 0;
3550 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003551 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003552
3553 GROW;
3554 if ((RAW == '<') && (NXT(1) == '!') &&
3555 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3556 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3557 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3558 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003559 SHRINK;
3560 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003561 skipped = SKIP_BLANKS;
3562 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003563 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3565 ctxt->sax->error(ctxt->userData,
3566 "Space required after '<!ENTITY'\n");
3567 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003568 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003569 }
Owen Taylor3473f882001-02-23 17:55:21 +00003570
3571 if (RAW == '%') {
3572 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003573 skipped = SKIP_BLANKS;
3574 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003575 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3577 ctxt->sax->error(ctxt->userData,
3578 "Space required after '%'\n");
3579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003581 }
Owen Taylor3473f882001-02-23 17:55:21 +00003582 isParameter = 1;
3583 }
3584
Daniel Veillard76d66f42001-05-16 21:05:17 +00003585 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003586 if (name == NULL) {
3587 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3589 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3590 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003591 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003592 return;
3593 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003594 skipped = SKIP_BLANKS;
3595 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003596 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3598 ctxt->sax->error(ctxt->userData,
3599 "Space required after the entity name\n");
3600 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003601 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003602 }
Owen Taylor3473f882001-02-23 17:55:21 +00003603
Daniel Veillardf5582f12002-06-11 10:08:16 +00003604 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003605 /*
3606 * handle the various case of definitions...
3607 */
3608 if (isParameter) {
3609 if ((RAW == '"') || (RAW == '\'')) {
3610 value = xmlParseEntityValue(ctxt, &orig);
3611 if (value) {
3612 if ((ctxt->sax != NULL) &&
3613 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3614 ctxt->sax->entityDecl(ctxt->userData, name,
3615 XML_INTERNAL_PARAMETER_ENTITY,
3616 NULL, NULL, value);
3617 }
3618 } else {
3619 URI = xmlParseExternalID(ctxt, &literal, 1);
3620 if ((URI == NULL) && (literal == NULL)) {
3621 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3623 ctxt->sax->error(ctxt->userData,
3624 "Entity value required\n");
3625 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003626 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003627 }
3628 if (URI) {
3629 xmlURIPtr uri;
3630
3631 uri = xmlParseURI((const char *) URI);
3632 if (uri == NULL) {
3633 ctxt->errNo = XML_ERR_INVALID_URI;
3634 if ((ctxt->sax != NULL) &&
3635 (!ctxt->disableSAX) &&
3636 (ctxt->sax->error != NULL))
3637 ctxt->sax->error(ctxt->userData,
3638 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003639 /*
3640 * This really ought to be a well formedness error
3641 * but the XML Core WG decided otherwise c.f. issue
3642 * E26 of the XML erratas.
3643 */
Owen Taylor3473f882001-02-23 17:55:21 +00003644 } else {
3645 if (uri->fragment != NULL) {
3646 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3647 if ((ctxt->sax != NULL) &&
3648 (!ctxt->disableSAX) &&
3649 (ctxt->sax->error != NULL))
3650 ctxt->sax->error(ctxt->userData,
3651 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003652 /*
3653 * Okay this is foolish to block those but not
3654 * invalid URIs.
3655 */
Owen Taylor3473f882001-02-23 17:55:21 +00003656 ctxt->wellFormed = 0;
3657 } else {
3658 if ((ctxt->sax != NULL) &&
3659 (!ctxt->disableSAX) &&
3660 (ctxt->sax->entityDecl != NULL))
3661 ctxt->sax->entityDecl(ctxt->userData, name,
3662 XML_EXTERNAL_PARAMETER_ENTITY,
3663 literal, URI, NULL);
3664 }
3665 xmlFreeURI(uri);
3666 }
3667 }
3668 }
3669 } else {
3670 if ((RAW == '"') || (RAW == '\'')) {
3671 value = xmlParseEntityValue(ctxt, &orig);
3672 if ((ctxt->sax != NULL) &&
3673 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3674 ctxt->sax->entityDecl(ctxt->userData, name,
3675 XML_INTERNAL_GENERAL_ENTITY,
3676 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003677 /*
3678 * For expat compatibility in SAX mode.
3679 */
3680 if ((ctxt->myDoc == NULL) ||
3681 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3682 if (ctxt->myDoc == NULL) {
3683 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3684 }
3685 if (ctxt->myDoc->intSubset == NULL)
3686 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3687 BAD_CAST "fake", NULL, NULL);
3688
3689 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3690 NULL, NULL, value);
3691 }
Owen Taylor3473f882001-02-23 17:55:21 +00003692 } else {
3693 URI = xmlParseExternalID(ctxt, &literal, 1);
3694 if ((URI == NULL) && (literal == NULL)) {
3695 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3697 ctxt->sax->error(ctxt->userData,
3698 "Entity value required\n");
3699 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003700 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003701 }
3702 if (URI) {
3703 xmlURIPtr uri;
3704
3705 uri = xmlParseURI((const char *)URI);
3706 if (uri == NULL) {
3707 ctxt->errNo = XML_ERR_INVALID_URI;
3708 if ((ctxt->sax != NULL) &&
3709 (!ctxt->disableSAX) &&
3710 (ctxt->sax->error != NULL))
3711 ctxt->sax->error(ctxt->userData,
3712 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003713 /*
3714 * This really ought to be a well formedness error
3715 * but the XML Core WG decided otherwise c.f. issue
3716 * E26 of the XML erratas.
3717 */
Owen Taylor3473f882001-02-23 17:55:21 +00003718 } else {
3719 if (uri->fragment != NULL) {
3720 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3721 if ((ctxt->sax != NULL) &&
3722 (!ctxt->disableSAX) &&
3723 (ctxt->sax->error != NULL))
3724 ctxt->sax->error(ctxt->userData,
3725 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003726 /*
3727 * Okay this is foolish to block those but not
3728 * invalid URIs.
3729 */
Owen Taylor3473f882001-02-23 17:55:21 +00003730 ctxt->wellFormed = 0;
3731 }
3732 xmlFreeURI(uri);
3733 }
3734 }
3735 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3736 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3738 ctxt->sax->error(ctxt->userData,
3739 "Space required before 'NDATA'\n");
3740 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003741 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003742 }
3743 SKIP_BLANKS;
3744 if ((RAW == 'N') && (NXT(1) == 'D') &&
3745 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3746 (NXT(4) == 'A')) {
3747 SKIP(5);
3748 if (!IS_BLANK(CUR)) {
3749 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3751 ctxt->sax->error(ctxt->userData,
3752 "Space required after 'NDATA'\n");
3753 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003754 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003755 }
3756 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003757 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003758 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3759 (ctxt->sax->unparsedEntityDecl != NULL))
3760 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3761 literal, URI, ndata);
3762 } else {
3763 if ((ctxt->sax != NULL) &&
3764 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3765 ctxt->sax->entityDecl(ctxt->userData, name,
3766 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3767 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003768 /*
3769 * For expat compatibility in SAX mode.
3770 * assuming the entity repalcement was asked for
3771 */
3772 if ((ctxt->replaceEntities != 0) &&
3773 ((ctxt->myDoc == NULL) ||
3774 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3775 if (ctxt->myDoc == NULL) {
3776 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3777 }
3778
3779 if (ctxt->myDoc->intSubset == NULL)
3780 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3781 BAD_CAST "fake", NULL, NULL);
3782 entityDecl(ctxt, name,
3783 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3784 literal, URI, NULL);
3785 }
Owen Taylor3473f882001-02-23 17:55:21 +00003786 }
3787 }
3788 }
3789 SKIP_BLANKS;
3790 if (RAW != '>') {
3791 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3793 ctxt->sax->error(ctxt->userData,
3794 "xmlParseEntityDecl: entity %s not terminated\n", name);
3795 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003796 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003797 } else {
3798 if (input != ctxt->input) {
3799 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3801 ctxt->sax->error(ctxt->userData,
3802"Entity declaration doesn't start and stop in the same entity\n");
3803 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003804 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003805 }
3806 NEXT;
3807 }
3808 if (orig != NULL) {
3809 /*
3810 * Ugly mechanism to save the raw entity value.
3811 */
3812 xmlEntityPtr cur = NULL;
3813
3814 if (isParameter) {
3815 if ((ctxt->sax != NULL) &&
3816 (ctxt->sax->getParameterEntity != NULL))
3817 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3818 } else {
3819 if ((ctxt->sax != NULL) &&
3820 (ctxt->sax->getEntity != NULL))
3821 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003822 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3823 cur = getEntity(ctxt, name);
3824 }
Owen Taylor3473f882001-02-23 17:55:21 +00003825 }
3826 if (cur != NULL) {
3827 if (cur->orig != NULL)
3828 xmlFree(orig);
3829 else
3830 cur->orig = orig;
3831 } else
3832 xmlFree(orig);
3833 }
3834 if (name != NULL) xmlFree(name);
3835 if (value != NULL) xmlFree(value);
3836 if (URI != NULL) xmlFree(URI);
3837 if (literal != NULL) xmlFree(literal);
3838 if (ndata != NULL) xmlFree(ndata);
3839 }
3840}
3841
3842/**
3843 * xmlParseDefaultDecl:
3844 * @ctxt: an XML parser context
3845 * @value: Receive a possible fixed default value for the attribute
3846 *
3847 * Parse an attribute default declaration
3848 *
3849 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3850 *
3851 * [ VC: Required Attribute ]
3852 * if the default declaration is the keyword #REQUIRED, then the
3853 * attribute must be specified for all elements of the type in the
3854 * attribute-list declaration.
3855 *
3856 * [ VC: Attribute Default Legal ]
3857 * The declared default value must meet the lexical constraints of
3858 * the declared attribute type c.f. xmlValidateAttributeDecl()
3859 *
3860 * [ VC: Fixed Attribute Default ]
3861 * if an attribute has a default value declared with the #FIXED
3862 * keyword, instances of that attribute must match the default value.
3863 *
3864 * [ WFC: No < in Attribute Values ]
3865 * handled in xmlParseAttValue()
3866 *
3867 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3868 * or XML_ATTRIBUTE_FIXED.
3869 */
3870
3871int
3872xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3873 int val;
3874 xmlChar *ret;
3875
3876 *value = NULL;
3877 if ((RAW == '#') && (NXT(1) == 'R') &&
3878 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3879 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3880 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3881 (NXT(8) == 'D')) {
3882 SKIP(9);
3883 return(XML_ATTRIBUTE_REQUIRED);
3884 }
3885 if ((RAW == '#') && (NXT(1) == 'I') &&
3886 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3887 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3888 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3889 SKIP(8);
3890 return(XML_ATTRIBUTE_IMPLIED);
3891 }
3892 val = XML_ATTRIBUTE_NONE;
3893 if ((RAW == '#') && (NXT(1) == 'F') &&
3894 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3895 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3896 SKIP(6);
3897 val = XML_ATTRIBUTE_FIXED;
3898 if (!IS_BLANK(CUR)) {
3899 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3901 ctxt->sax->error(ctxt->userData,
3902 "Space required after '#FIXED'\n");
3903 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003904 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003905 }
3906 SKIP_BLANKS;
3907 }
3908 ret = xmlParseAttValue(ctxt);
3909 ctxt->instate = XML_PARSER_DTD;
3910 if (ret == NULL) {
3911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3912 ctxt->sax->error(ctxt->userData,
3913 "Attribute default value declaration error\n");
3914 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003915 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003916 } else
3917 *value = ret;
3918 return(val);
3919}
3920
3921/**
3922 * xmlParseNotationType:
3923 * @ctxt: an XML parser context
3924 *
3925 * parse an Notation attribute type.
3926 *
3927 * Note: the leading 'NOTATION' S part has already being parsed...
3928 *
3929 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3930 *
3931 * [ VC: Notation Attributes ]
3932 * Values of this type must match one of the notation names included
3933 * in the declaration; all notation names in the declaration must be declared.
3934 *
3935 * Returns: the notation attribute tree built while parsing
3936 */
3937
3938xmlEnumerationPtr
3939xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3940 xmlChar *name;
3941 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3942
3943 if (RAW != '(') {
3944 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3946 ctxt->sax->error(ctxt->userData,
3947 "'(' required to start 'NOTATION'\n");
3948 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003949 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003950 return(NULL);
3951 }
3952 SHRINK;
3953 do {
3954 NEXT;
3955 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003956 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003957 if (name == NULL) {
3958 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3960 ctxt->sax->error(ctxt->userData,
3961 "Name expected in NOTATION declaration\n");
3962 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003963 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003964 return(ret);
3965 }
3966 cur = xmlCreateEnumeration(name);
3967 xmlFree(name);
3968 if (cur == NULL) return(ret);
3969 if (last == NULL) ret = last = cur;
3970 else {
3971 last->next = cur;
3972 last = cur;
3973 }
3974 SKIP_BLANKS;
3975 } while (RAW == '|');
3976 if (RAW != ')') {
3977 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3979 ctxt->sax->error(ctxt->userData,
3980 "')' required to finish NOTATION declaration\n");
3981 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003982 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003983 if ((last != NULL) && (last != ret))
3984 xmlFreeEnumeration(last);
3985 return(ret);
3986 }
3987 NEXT;
3988 return(ret);
3989}
3990
3991/**
3992 * xmlParseEnumerationType:
3993 * @ctxt: an XML parser context
3994 *
3995 * parse an Enumeration attribute type.
3996 *
3997 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3998 *
3999 * [ VC: Enumeration ]
4000 * Values of this type must match one of the Nmtoken tokens in
4001 * the declaration
4002 *
4003 * Returns: the enumeration attribute tree built while parsing
4004 */
4005
4006xmlEnumerationPtr
4007xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4008 xmlChar *name;
4009 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4010
4011 if (RAW != '(') {
4012 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4014 ctxt->sax->error(ctxt->userData,
4015 "'(' required to start ATTLIST enumeration\n");
4016 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004017 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004018 return(NULL);
4019 }
4020 SHRINK;
4021 do {
4022 NEXT;
4023 SKIP_BLANKS;
4024 name = xmlParseNmtoken(ctxt);
4025 if (name == NULL) {
4026 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4028 ctxt->sax->error(ctxt->userData,
4029 "NmToken expected in ATTLIST enumeration\n");
4030 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004031 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004032 return(ret);
4033 }
4034 cur = xmlCreateEnumeration(name);
4035 xmlFree(name);
4036 if (cur == NULL) return(ret);
4037 if (last == NULL) ret = last = cur;
4038 else {
4039 last->next = cur;
4040 last = cur;
4041 }
4042 SKIP_BLANKS;
4043 } while (RAW == '|');
4044 if (RAW != ')') {
4045 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4047 ctxt->sax->error(ctxt->userData,
4048 "')' required to finish ATTLIST enumeration\n");
4049 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004050 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004051 return(ret);
4052 }
4053 NEXT;
4054 return(ret);
4055}
4056
4057/**
4058 * xmlParseEnumeratedType:
4059 * @ctxt: an XML parser context
4060 * @tree: the enumeration tree built while parsing
4061 *
4062 * parse an Enumerated attribute type.
4063 *
4064 * [57] EnumeratedType ::= NotationType | Enumeration
4065 *
4066 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4067 *
4068 *
4069 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4070 */
4071
4072int
4073xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4074 if ((RAW == 'N') && (NXT(1) == 'O') &&
4075 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4076 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4077 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4078 SKIP(8);
4079 if (!IS_BLANK(CUR)) {
4080 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4082 ctxt->sax->error(ctxt->userData,
4083 "Space required after 'NOTATION'\n");
4084 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004085 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004086 return(0);
4087 }
4088 SKIP_BLANKS;
4089 *tree = xmlParseNotationType(ctxt);
4090 if (*tree == NULL) return(0);
4091 return(XML_ATTRIBUTE_NOTATION);
4092 }
4093 *tree = xmlParseEnumerationType(ctxt);
4094 if (*tree == NULL) return(0);
4095 return(XML_ATTRIBUTE_ENUMERATION);
4096}
4097
4098/**
4099 * xmlParseAttributeType:
4100 * @ctxt: an XML parser context
4101 * @tree: the enumeration tree built while parsing
4102 *
4103 * parse the Attribute list def for an element
4104 *
4105 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4106 *
4107 * [55] StringType ::= 'CDATA'
4108 *
4109 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4110 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4111 *
4112 * Validity constraints for attribute values syntax are checked in
4113 * xmlValidateAttributeValue()
4114 *
4115 * [ VC: ID ]
4116 * Values of type ID must match the Name production. A name must not
4117 * appear more than once in an XML document as a value of this type;
4118 * i.e., ID values must uniquely identify the elements which bear them.
4119 *
4120 * [ VC: One ID per Element Type ]
4121 * No element type may have more than one ID attribute specified.
4122 *
4123 * [ VC: ID Attribute Default ]
4124 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4125 *
4126 * [ VC: IDREF ]
4127 * Values of type IDREF must match the Name production, and values
4128 * of type IDREFS must match Names; each IDREF Name must match the value
4129 * of an ID attribute on some element in the XML document; i.e. IDREF
4130 * values must match the value of some ID attribute.
4131 *
4132 * [ VC: Entity Name ]
4133 * Values of type ENTITY must match the Name production, values
4134 * of type ENTITIES must match Names; each Entity Name must match the
4135 * name of an unparsed entity declared in the DTD.
4136 *
4137 * [ VC: Name Token ]
4138 * Values of type NMTOKEN must match the Nmtoken production; values
4139 * of type NMTOKENS must match Nmtokens.
4140 *
4141 * Returns the attribute type
4142 */
4143int
4144xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4145 SHRINK;
4146 if ((RAW == 'C') && (NXT(1) == 'D') &&
4147 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4148 (NXT(4) == 'A')) {
4149 SKIP(5);
4150 return(XML_ATTRIBUTE_CDATA);
4151 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4152 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4153 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4154 SKIP(6);
4155 return(XML_ATTRIBUTE_IDREFS);
4156 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4157 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4158 (NXT(4) == 'F')) {
4159 SKIP(5);
4160 return(XML_ATTRIBUTE_IDREF);
4161 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4162 SKIP(2);
4163 return(XML_ATTRIBUTE_ID);
4164 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4165 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4166 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4167 SKIP(6);
4168 return(XML_ATTRIBUTE_ENTITY);
4169 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4170 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4171 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4172 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4173 SKIP(8);
4174 return(XML_ATTRIBUTE_ENTITIES);
4175 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4176 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4177 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4178 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4179 SKIP(8);
4180 return(XML_ATTRIBUTE_NMTOKENS);
4181 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4182 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4183 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4184 (NXT(6) == 'N')) {
4185 SKIP(7);
4186 return(XML_ATTRIBUTE_NMTOKEN);
4187 }
4188 return(xmlParseEnumeratedType(ctxt, tree));
4189}
4190
4191/**
4192 * xmlParseAttributeListDecl:
4193 * @ctxt: an XML parser context
4194 *
4195 * : parse the Attribute list def for an element
4196 *
4197 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4198 *
4199 * [53] AttDef ::= S Name S AttType S DefaultDecl
4200 *
4201 */
4202void
4203xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4204 xmlChar *elemName;
4205 xmlChar *attrName;
4206 xmlEnumerationPtr tree;
4207
4208 if ((RAW == '<') && (NXT(1) == '!') &&
4209 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4210 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4211 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4212 (NXT(8) == 'T')) {
4213 xmlParserInputPtr input = ctxt->input;
4214
4215 SKIP(9);
4216 if (!IS_BLANK(CUR)) {
4217 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4219 ctxt->sax->error(ctxt->userData,
4220 "Space required after '<!ATTLIST'\n");
4221 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004223 }
4224 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004225 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004226 if (elemName == NULL) {
4227 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4229 ctxt->sax->error(ctxt->userData,
4230 "ATTLIST: no name for Element\n");
4231 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004232 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004233 return;
4234 }
4235 SKIP_BLANKS;
4236 GROW;
4237 while (RAW != '>') {
4238 const xmlChar *check = CUR_PTR;
4239 int type;
4240 int def;
4241 xmlChar *defaultValue = NULL;
4242
4243 GROW;
4244 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004245 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004246 if (attrName == NULL) {
4247 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4249 ctxt->sax->error(ctxt->userData,
4250 "ATTLIST: no name for Attribute\n");
4251 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004252 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004253 break;
4254 }
4255 GROW;
4256 if (!IS_BLANK(CUR)) {
4257 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4258 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4259 ctxt->sax->error(ctxt->userData,
4260 "Space required after the attribute name\n");
4261 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004262 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004263 if (attrName != NULL)
4264 xmlFree(attrName);
4265 if (defaultValue != NULL)
4266 xmlFree(defaultValue);
4267 break;
4268 }
4269 SKIP_BLANKS;
4270
4271 type = xmlParseAttributeType(ctxt, &tree);
4272 if (type <= 0) {
4273 if (attrName != NULL)
4274 xmlFree(attrName);
4275 if (defaultValue != NULL)
4276 xmlFree(defaultValue);
4277 break;
4278 }
4279
4280 GROW;
4281 if (!IS_BLANK(CUR)) {
4282 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4284 ctxt->sax->error(ctxt->userData,
4285 "Space required after the attribute type\n");
4286 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004287 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004288 if (attrName != NULL)
4289 xmlFree(attrName);
4290 if (defaultValue != NULL)
4291 xmlFree(defaultValue);
4292 if (tree != NULL)
4293 xmlFreeEnumeration(tree);
4294 break;
4295 }
4296 SKIP_BLANKS;
4297
4298 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4299 if (def <= 0) {
4300 if (attrName != NULL)
4301 xmlFree(attrName);
4302 if (defaultValue != NULL)
4303 xmlFree(defaultValue);
4304 if (tree != NULL)
4305 xmlFreeEnumeration(tree);
4306 break;
4307 }
4308
4309 GROW;
4310 if (RAW != '>') {
4311 if (!IS_BLANK(CUR)) {
4312 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4313 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4314 ctxt->sax->error(ctxt->userData,
4315 "Space required after the attribute default value\n");
4316 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004317 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004318 if (attrName != NULL)
4319 xmlFree(attrName);
4320 if (defaultValue != NULL)
4321 xmlFree(defaultValue);
4322 if (tree != NULL)
4323 xmlFreeEnumeration(tree);
4324 break;
4325 }
4326 SKIP_BLANKS;
4327 }
4328 if (check == CUR_PTR) {
4329 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4331 ctxt->sax->error(ctxt->userData,
4332 "xmlParseAttributeListDecl: detected internal error\n");
4333 if (attrName != NULL)
4334 xmlFree(attrName);
4335 if (defaultValue != NULL)
4336 xmlFree(defaultValue);
4337 if (tree != NULL)
4338 xmlFreeEnumeration(tree);
4339 break;
4340 }
4341 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4342 (ctxt->sax->attributeDecl != NULL))
4343 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4344 type, def, defaultValue, tree);
4345 if (attrName != NULL)
4346 xmlFree(attrName);
4347 if (defaultValue != NULL)
4348 xmlFree(defaultValue);
4349 GROW;
4350 }
4351 if (RAW == '>') {
4352 if (input != ctxt->input) {
4353 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4355 ctxt->sax->error(ctxt->userData,
4356"Attribute list declaration doesn't start and stop in the same entity\n");
4357 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004358 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004359 }
4360 NEXT;
4361 }
4362
4363 xmlFree(elemName);
4364 }
4365}
4366
4367/**
4368 * xmlParseElementMixedContentDecl:
4369 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004370 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004371 *
4372 * parse the declaration for a Mixed Element content
4373 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4374 *
4375 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4376 * '(' S? '#PCDATA' S? ')'
4377 *
4378 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4379 *
4380 * [ VC: No Duplicate Types ]
4381 * The same name must not appear more than once in a single
4382 * mixed-content declaration.
4383 *
4384 * returns: the list of the xmlElementContentPtr describing the element choices
4385 */
4386xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004387xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004388 xmlElementContentPtr ret = NULL, cur = NULL, n;
4389 xmlChar *elem = NULL;
4390
4391 GROW;
4392 if ((RAW == '#') && (NXT(1) == 'P') &&
4393 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4394 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4395 (NXT(6) == 'A')) {
4396 SKIP(7);
4397 SKIP_BLANKS;
4398 SHRINK;
4399 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004400 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4401 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4402 if (ctxt->vctxt.error != NULL)
4403 ctxt->vctxt.error(ctxt->vctxt.userData,
4404"Element content declaration doesn't start and stop in the same entity\n");
4405 ctxt->valid = 0;
4406 }
Owen Taylor3473f882001-02-23 17:55:21 +00004407 NEXT;
4408 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4409 if (RAW == '*') {
4410 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4411 NEXT;
4412 }
4413 return(ret);
4414 }
4415 if ((RAW == '(') || (RAW == '|')) {
4416 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4417 if (ret == NULL) return(NULL);
4418 }
4419 while (RAW == '|') {
4420 NEXT;
4421 if (elem == NULL) {
4422 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4423 if (ret == NULL) return(NULL);
4424 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004425 if (cur != NULL)
4426 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004427 cur = ret;
4428 } else {
4429 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4430 if (n == NULL) return(NULL);
4431 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004432 if (n->c1 != NULL)
4433 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004434 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004435 if (n != NULL)
4436 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004437 cur = n;
4438 xmlFree(elem);
4439 }
4440 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004441 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004442 if (elem == NULL) {
4443 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4445 ctxt->sax->error(ctxt->userData,
4446 "xmlParseElementMixedContentDecl : Name expected\n");
4447 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004448 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004449 xmlFreeElementContent(cur);
4450 return(NULL);
4451 }
4452 SKIP_BLANKS;
4453 GROW;
4454 }
4455 if ((RAW == ')') && (NXT(1) == '*')) {
4456 if (elem != NULL) {
4457 cur->c2 = xmlNewElementContent(elem,
4458 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004459 if (cur->c2 != NULL)
4460 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004461 xmlFree(elem);
4462 }
4463 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004464 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4465 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4466 if (ctxt->vctxt.error != NULL)
4467 ctxt->vctxt.error(ctxt->vctxt.userData,
4468"Element content declaration doesn't start and stop in the same entity\n");
4469 ctxt->valid = 0;
4470 }
Owen Taylor3473f882001-02-23 17:55:21 +00004471 SKIP(2);
4472 } else {
4473 if (elem != NULL) xmlFree(elem);
4474 xmlFreeElementContent(ret);
4475 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4477 ctxt->sax->error(ctxt->userData,
4478 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4479 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004480 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004481 return(NULL);
4482 }
4483
4484 } else {
4485 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4487 ctxt->sax->error(ctxt->userData,
4488 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4489 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004490 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004491 }
4492 return(ret);
4493}
4494
4495/**
4496 * xmlParseElementChildrenContentDecl:
4497 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004498 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004499 *
4500 * parse the declaration for a Mixed Element content
4501 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4502 *
4503 *
4504 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4505 *
4506 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4507 *
4508 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4509 *
4510 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4511 *
4512 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4513 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004514 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004515 * opening or closing parentheses in a choice, seq, or Mixed
4516 * construct is contained in the replacement text for a parameter
4517 * entity, both must be contained in the same replacement text. For
4518 * interoperability, if a parameter-entity reference appears in a
4519 * choice, seq, or Mixed construct, its replacement text should not
4520 * be empty, and neither the first nor last non-blank character of
4521 * the replacement text should be a connector (| or ,).
4522 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004523 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004524 * hierarchy.
4525 */
4526xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004527xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004528(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004529 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4530 xmlChar *elem;
4531 xmlChar type = 0;
4532
4533 SKIP_BLANKS;
4534 GROW;
4535 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004536 xmlParserInputPtr input = ctxt->input;
4537
Owen Taylor3473f882001-02-23 17:55:21 +00004538 /* Recurse on first child */
4539 NEXT;
4540 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004541 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004542 SKIP_BLANKS;
4543 GROW;
4544 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004545 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004546 if (elem == NULL) {
4547 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4549 ctxt->sax->error(ctxt->userData,
4550 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004553 return(NULL);
4554 }
4555 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4556 GROW;
4557 if (RAW == '?') {
4558 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4559 NEXT;
4560 } else if (RAW == '*') {
4561 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4562 NEXT;
4563 } else if (RAW == '+') {
4564 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4565 NEXT;
4566 } else {
4567 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4568 }
4569 xmlFree(elem);
4570 GROW;
4571 }
4572 SKIP_BLANKS;
4573 SHRINK;
4574 while (RAW != ')') {
4575 /*
4576 * Each loop we parse one separator and one element.
4577 */
4578 if (RAW == ',') {
4579 if (type == 0) type = CUR;
4580
4581 /*
4582 * Detect "Name | Name , Name" error
4583 */
4584 else if (type != CUR) {
4585 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4587 ctxt->sax->error(ctxt->userData,
4588 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4589 type);
4590 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004591 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004592 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004593 xmlFreeElementContent(last);
4594 if (ret != NULL)
4595 xmlFreeElementContent(ret);
4596 return(NULL);
4597 }
4598 NEXT;
4599
4600 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4601 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004602 if ((last != NULL) && (last != ret))
4603 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004604 xmlFreeElementContent(ret);
4605 return(NULL);
4606 }
4607 if (last == NULL) {
4608 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004609 if (ret != NULL)
4610 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004611 ret = cur = op;
4612 } else {
4613 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004614 if (op != NULL)
4615 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004616 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004617 if (last != NULL)
4618 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004619 cur =op;
4620 last = NULL;
4621 }
4622 } else if (RAW == '|') {
4623 if (type == 0) type = CUR;
4624
4625 /*
4626 * Detect "Name , Name | Name" error
4627 */
4628 else if (type != CUR) {
4629 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4631 ctxt->sax->error(ctxt->userData,
4632 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4633 type);
4634 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004635 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004636 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004637 xmlFreeElementContent(last);
4638 if (ret != NULL)
4639 xmlFreeElementContent(ret);
4640 return(NULL);
4641 }
4642 NEXT;
4643
4644 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4645 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004646 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004647 xmlFreeElementContent(last);
4648 if (ret != NULL)
4649 xmlFreeElementContent(ret);
4650 return(NULL);
4651 }
4652 if (last == NULL) {
4653 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004654 if (ret != NULL)
4655 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004656 ret = cur = op;
4657 } else {
4658 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004659 if (op != NULL)
4660 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004661 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004662 if (last != NULL)
4663 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004664 cur =op;
4665 last = NULL;
4666 }
4667 } else {
4668 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4670 ctxt->sax->error(ctxt->userData,
4671 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4672 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004673 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004674 if (ret != NULL)
4675 xmlFreeElementContent(ret);
4676 return(NULL);
4677 }
4678 GROW;
4679 SKIP_BLANKS;
4680 GROW;
4681 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004682 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004683 /* Recurse on second child */
4684 NEXT;
4685 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004686 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004687 SKIP_BLANKS;
4688 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004689 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004690 if (elem == NULL) {
4691 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4693 ctxt->sax->error(ctxt->userData,
4694 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4695 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004696 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004697 if (ret != NULL)
4698 xmlFreeElementContent(ret);
4699 return(NULL);
4700 }
4701 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4702 xmlFree(elem);
4703 if (RAW == '?') {
4704 last->ocur = XML_ELEMENT_CONTENT_OPT;
4705 NEXT;
4706 } else if (RAW == '*') {
4707 last->ocur = XML_ELEMENT_CONTENT_MULT;
4708 NEXT;
4709 } else if (RAW == '+') {
4710 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4711 NEXT;
4712 } else {
4713 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4714 }
4715 }
4716 SKIP_BLANKS;
4717 GROW;
4718 }
4719 if ((cur != NULL) && (last != NULL)) {
4720 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004721 if (last != NULL)
4722 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004723 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004724 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4725 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4726 if (ctxt->vctxt.error != NULL)
4727 ctxt->vctxt.error(ctxt->vctxt.userData,
4728"Element content declaration doesn't start and stop in the same entity\n");
4729 ctxt->valid = 0;
4730 }
Owen Taylor3473f882001-02-23 17:55:21 +00004731 NEXT;
4732 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004733 if (ret != NULL)
4734 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004735 NEXT;
4736 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004737 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004738 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004739 cur = ret;
4740 /*
4741 * Some normalization:
4742 * (a | b* | c?)* == (a | b | c)*
4743 */
4744 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4745 if ((cur->c1 != NULL) &&
4746 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4747 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4748 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4749 if ((cur->c2 != NULL) &&
4750 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4751 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4752 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4753 cur = cur->c2;
4754 }
4755 }
Owen Taylor3473f882001-02-23 17:55:21 +00004756 NEXT;
4757 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004758 if (ret != NULL) {
4759 int found = 0;
4760
Daniel Veillarde470df72001-04-18 21:41:07 +00004761 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004762 /*
4763 * Some normalization:
4764 * (a | b*)+ == (a | b)*
4765 * (a | b?)+ == (a | b)*
4766 */
4767 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4768 if ((cur->c1 != NULL) &&
4769 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4770 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4771 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4772 found = 1;
4773 }
4774 if ((cur->c2 != NULL) &&
4775 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4776 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4777 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4778 found = 1;
4779 }
4780 cur = cur->c2;
4781 }
4782 if (found)
4783 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4784 }
Owen Taylor3473f882001-02-23 17:55:21 +00004785 NEXT;
4786 }
4787 return(ret);
4788}
4789
4790/**
4791 * xmlParseElementContentDecl:
4792 * @ctxt: an XML parser context
4793 * @name: the name of the element being defined.
4794 * @result: the Element Content pointer will be stored here if any
4795 *
4796 * parse the declaration for an Element content either Mixed or Children,
4797 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4798 *
4799 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4800 *
4801 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4802 */
4803
4804int
4805xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4806 xmlElementContentPtr *result) {
4807
4808 xmlElementContentPtr tree = NULL;
4809 xmlParserInputPtr input = ctxt->input;
4810 int res;
4811
4812 *result = NULL;
4813
4814 if (RAW != '(') {
4815 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4817 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004818 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004819 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004820 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004821 return(-1);
4822 }
4823 NEXT;
4824 GROW;
4825 SKIP_BLANKS;
4826 if ((RAW == '#') && (NXT(1) == 'P') &&
4827 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4828 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4829 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004830 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004831 res = XML_ELEMENT_TYPE_MIXED;
4832 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004833 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004834 res = XML_ELEMENT_TYPE_ELEMENT;
4835 }
Owen Taylor3473f882001-02-23 17:55:21 +00004836 SKIP_BLANKS;
4837 *result = tree;
4838 return(res);
4839}
4840
4841/**
4842 * xmlParseElementDecl:
4843 * @ctxt: an XML parser context
4844 *
4845 * parse an Element declaration.
4846 *
4847 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4848 *
4849 * [ VC: Unique Element Type Declaration ]
4850 * No element type may be declared more than once
4851 *
4852 * Returns the type of the element, or -1 in case of error
4853 */
4854int
4855xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4856 xmlChar *name;
4857 int ret = -1;
4858 xmlElementContentPtr content = NULL;
4859
4860 GROW;
4861 if ((RAW == '<') && (NXT(1) == '!') &&
4862 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4863 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4864 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4865 (NXT(8) == 'T')) {
4866 xmlParserInputPtr input = ctxt->input;
4867
4868 SKIP(9);
4869 if (!IS_BLANK(CUR)) {
4870 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4872 ctxt->sax->error(ctxt->userData,
4873 "Space required after 'ELEMENT'\n");
4874 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004875 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004876 }
4877 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004878 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004879 if (name == NULL) {
4880 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4882 ctxt->sax->error(ctxt->userData,
4883 "xmlParseElementDecl: no name for Element\n");
4884 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004885 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004886 return(-1);
4887 }
4888 while ((RAW == 0) && (ctxt->inputNr > 1))
4889 xmlPopInput(ctxt);
4890 if (!IS_BLANK(CUR)) {
4891 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4893 ctxt->sax->error(ctxt->userData,
4894 "Space required after the element name\n");
4895 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004896 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004897 }
4898 SKIP_BLANKS;
4899 if ((RAW == 'E') && (NXT(1) == 'M') &&
4900 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4901 (NXT(4) == 'Y')) {
4902 SKIP(5);
4903 /*
4904 * Element must always be empty.
4905 */
4906 ret = XML_ELEMENT_TYPE_EMPTY;
4907 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4908 (NXT(2) == 'Y')) {
4909 SKIP(3);
4910 /*
4911 * Element is a generic container.
4912 */
4913 ret = XML_ELEMENT_TYPE_ANY;
4914 } else if (RAW == '(') {
4915 ret = xmlParseElementContentDecl(ctxt, name, &content);
4916 } else {
4917 /*
4918 * [ WFC: PEs in Internal Subset ] error handling.
4919 */
4920 if ((RAW == '%') && (ctxt->external == 0) &&
4921 (ctxt->inputNr == 1)) {
4922 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4924 ctxt->sax->error(ctxt->userData,
4925 "PEReference: forbidden within markup decl in internal subset\n");
4926 } else {
4927 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4929 ctxt->sax->error(ctxt->userData,
4930 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4931 }
4932 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004933 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004934 if (name != NULL) xmlFree(name);
4935 return(-1);
4936 }
4937
4938 SKIP_BLANKS;
4939 /*
4940 * Pop-up of finished entities.
4941 */
4942 while ((RAW == 0) && (ctxt->inputNr > 1))
4943 xmlPopInput(ctxt);
4944 SKIP_BLANKS;
4945
4946 if (RAW != '>') {
4947 ctxt->errNo = XML_ERR_GT_REQUIRED;
4948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4949 ctxt->sax->error(ctxt->userData,
4950 "xmlParseElementDecl: expected '>' at the end\n");
4951 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004952 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004953 } else {
4954 if (input != ctxt->input) {
4955 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4957 ctxt->sax->error(ctxt->userData,
4958"Element declaration doesn't start and stop in the same entity\n");
4959 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004960 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004961 }
4962
4963 NEXT;
4964 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4965 (ctxt->sax->elementDecl != NULL))
4966 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4967 content);
4968 }
4969 if (content != NULL) {
4970 xmlFreeElementContent(content);
4971 }
4972 if (name != NULL) {
4973 xmlFree(name);
4974 }
4975 }
4976 return(ret);
4977}
4978
4979/**
Owen Taylor3473f882001-02-23 17:55:21 +00004980 * xmlParseConditionalSections
4981 * @ctxt: an XML parser context
4982 *
4983 * [61] conditionalSect ::= includeSect | ignoreSect
4984 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4985 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4986 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4987 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4988 */
4989
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004990static void
Owen Taylor3473f882001-02-23 17:55:21 +00004991xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4992 SKIP(3);
4993 SKIP_BLANKS;
4994 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4995 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4996 (NXT(6) == 'E')) {
4997 SKIP(7);
4998 SKIP_BLANKS;
4999 if (RAW != '[') {
5000 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5001 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5002 ctxt->sax->error(ctxt->userData,
5003 "XML conditional section '[' expected\n");
5004 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005005 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005006 } else {
5007 NEXT;
5008 }
5009 if (xmlParserDebugEntities) {
5010 if ((ctxt->input != NULL) && (ctxt->input->filename))
5011 xmlGenericError(xmlGenericErrorContext,
5012 "%s(%d): ", ctxt->input->filename,
5013 ctxt->input->line);
5014 xmlGenericError(xmlGenericErrorContext,
5015 "Entering INCLUDE Conditional Section\n");
5016 }
5017
5018 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5019 (NXT(2) != '>'))) {
5020 const xmlChar *check = CUR_PTR;
5021 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005022
5023 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5024 xmlParseConditionalSections(ctxt);
5025 } else if (IS_BLANK(CUR)) {
5026 NEXT;
5027 } else if (RAW == '%') {
5028 xmlParsePEReference(ctxt);
5029 } else
5030 xmlParseMarkupDecl(ctxt);
5031
5032 /*
5033 * Pop-up of finished entities.
5034 */
5035 while ((RAW == 0) && (ctxt->inputNr > 1))
5036 xmlPopInput(ctxt);
5037
Daniel Veillardfdc91562002-07-01 21:52:03 +00005038 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005039 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5041 ctxt->sax->error(ctxt->userData,
5042 "Content error in the external subset\n");
5043 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005044 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005045 break;
5046 }
5047 }
5048 if (xmlParserDebugEntities) {
5049 if ((ctxt->input != NULL) && (ctxt->input->filename))
5050 xmlGenericError(xmlGenericErrorContext,
5051 "%s(%d): ", ctxt->input->filename,
5052 ctxt->input->line);
5053 xmlGenericError(xmlGenericErrorContext,
5054 "Leaving INCLUDE Conditional Section\n");
5055 }
5056
5057 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5058 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5059 int state;
5060 int instate;
5061 int depth = 0;
5062
5063 SKIP(6);
5064 SKIP_BLANKS;
5065 if (RAW != '[') {
5066 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5067 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5068 ctxt->sax->error(ctxt->userData,
5069 "XML conditional section '[' expected\n");
5070 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005071 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005072 } else {
5073 NEXT;
5074 }
5075 if (xmlParserDebugEntities) {
5076 if ((ctxt->input != NULL) && (ctxt->input->filename))
5077 xmlGenericError(xmlGenericErrorContext,
5078 "%s(%d): ", ctxt->input->filename,
5079 ctxt->input->line);
5080 xmlGenericError(xmlGenericErrorContext,
5081 "Entering IGNORE Conditional Section\n");
5082 }
5083
5084 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005085 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005086 * But disable SAX event generating DTD building in the meantime
5087 */
5088 state = ctxt->disableSAX;
5089 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005090 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005091 ctxt->instate = XML_PARSER_IGNORE;
5092
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005093 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005094 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5095 depth++;
5096 SKIP(3);
5097 continue;
5098 }
5099 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5100 if (--depth >= 0) SKIP(3);
5101 continue;
5102 }
5103 NEXT;
5104 continue;
5105 }
5106
5107 ctxt->disableSAX = state;
5108 ctxt->instate = instate;
5109
5110 if (xmlParserDebugEntities) {
5111 if ((ctxt->input != NULL) && (ctxt->input->filename))
5112 xmlGenericError(xmlGenericErrorContext,
5113 "%s(%d): ", ctxt->input->filename,
5114 ctxt->input->line);
5115 xmlGenericError(xmlGenericErrorContext,
5116 "Leaving IGNORE Conditional Section\n");
5117 }
5118
5119 } else {
5120 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5122 ctxt->sax->error(ctxt->userData,
5123 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5124 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005125 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005126 }
5127
5128 if (RAW == 0)
5129 SHRINK;
5130
5131 if (RAW == 0) {
5132 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5133 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5134 ctxt->sax->error(ctxt->userData,
5135 "XML conditional section not closed\n");
5136 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005137 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005138 } else {
5139 SKIP(3);
5140 }
5141}
5142
5143/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005144 * xmlParseMarkupDecl:
5145 * @ctxt: an XML parser context
5146 *
5147 * parse Markup declarations
5148 *
5149 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5150 * NotationDecl | PI | Comment
5151 *
5152 * [ VC: Proper Declaration/PE Nesting ]
5153 * Parameter-entity replacement text must be properly nested with
5154 * markup declarations. That is to say, if either the first character
5155 * or the last character of a markup declaration (markupdecl above) is
5156 * contained in the replacement text for a parameter-entity reference,
5157 * both must be contained in the same replacement text.
5158 *
5159 * [ WFC: PEs in Internal Subset ]
5160 * In the internal DTD subset, parameter-entity references can occur
5161 * only where markup declarations can occur, not within markup declarations.
5162 * (This does not apply to references that occur in external parameter
5163 * entities or to the external subset.)
5164 */
5165void
5166xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5167 GROW;
5168 xmlParseElementDecl(ctxt);
5169 xmlParseAttributeListDecl(ctxt);
5170 xmlParseEntityDecl(ctxt);
5171 xmlParseNotationDecl(ctxt);
5172 xmlParsePI(ctxt);
5173 xmlParseComment(ctxt);
5174 /*
5175 * This is only for internal subset. On external entities,
5176 * the replacement is done before parsing stage
5177 */
5178 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5179 xmlParsePEReference(ctxt);
5180
5181 /*
5182 * Conditional sections are allowed from entities included
5183 * by PE References in the internal subset.
5184 */
5185 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5186 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5187 xmlParseConditionalSections(ctxt);
5188 }
5189 }
5190
5191 ctxt->instate = XML_PARSER_DTD;
5192}
5193
5194/**
5195 * xmlParseTextDecl:
5196 * @ctxt: an XML parser context
5197 *
5198 * parse an XML declaration header for external entities
5199 *
5200 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5201 *
5202 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5203 */
5204
5205void
5206xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5207 xmlChar *version;
5208
5209 /*
5210 * We know that '<?xml' is here.
5211 */
5212 if ((RAW == '<') && (NXT(1) == '?') &&
5213 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5214 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5215 SKIP(5);
5216 } else {
5217 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5219 ctxt->sax->error(ctxt->userData,
5220 "Text declaration '<?xml' required\n");
5221 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005223
5224 return;
5225 }
5226
5227 if (!IS_BLANK(CUR)) {
5228 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5230 ctxt->sax->error(ctxt->userData,
5231 "Space needed after '<?xml'\n");
5232 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005233 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005234 }
5235 SKIP_BLANKS;
5236
5237 /*
5238 * We may have the VersionInfo here.
5239 */
5240 version = xmlParseVersionInfo(ctxt);
5241 if (version == NULL)
5242 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005243 else {
5244 if (!IS_BLANK(CUR)) {
5245 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5247 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5248 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005249 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005250 }
5251 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005252 ctxt->input->version = version;
5253
5254 /*
5255 * We must have the encoding declaration
5256 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005257 xmlParseEncodingDecl(ctxt);
5258 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5259 /*
5260 * The XML REC instructs us to stop parsing right here
5261 */
5262 return;
5263 }
5264
5265 SKIP_BLANKS;
5266 if ((RAW == '?') && (NXT(1) == '>')) {
5267 SKIP(2);
5268 } else if (RAW == '>') {
5269 /* Deprecated old WD ... */
5270 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5272 ctxt->sax->error(ctxt->userData,
5273 "XML declaration must end-up with '?>'\n");
5274 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005275 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005276 NEXT;
5277 } else {
5278 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5280 ctxt->sax->error(ctxt->userData,
5281 "parsing XML declaration: '?>' expected\n");
5282 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005283 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005284 MOVETO_ENDTAG(CUR_PTR);
5285 NEXT;
5286 }
5287}
5288
5289/**
Owen Taylor3473f882001-02-23 17:55:21 +00005290 * xmlParseExternalSubset:
5291 * @ctxt: an XML parser context
5292 * @ExternalID: the external identifier
5293 * @SystemID: the system identifier (or URL)
5294 *
5295 * parse Markup declarations from an external subset
5296 *
5297 * [30] extSubset ::= textDecl? extSubsetDecl
5298 *
5299 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5300 */
5301void
5302xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5303 const xmlChar *SystemID) {
5304 GROW;
5305 if ((RAW == '<') && (NXT(1) == '?') &&
5306 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5307 (NXT(4) == 'l')) {
5308 xmlParseTextDecl(ctxt);
5309 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5310 /*
5311 * The XML REC instructs us to stop parsing right here
5312 */
5313 ctxt->instate = XML_PARSER_EOF;
5314 return;
5315 }
5316 }
5317 if (ctxt->myDoc == NULL) {
5318 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5319 }
5320 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5321 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5322
5323 ctxt->instate = XML_PARSER_DTD;
5324 ctxt->external = 1;
5325 while (((RAW == '<') && (NXT(1) == '?')) ||
5326 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005327 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005328 const xmlChar *check = CUR_PTR;
5329 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005330
5331 GROW;
5332 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5333 xmlParseConditionalSections(ctxt);
5334 } else if (IS_BLANK(CUR)) {
5335 NEXT;
5336 } else if (RAW == '%') {
5337 xmlParsePEReference(ctxt);
5338 } else
5339 xmlParseMarkupDecl(ctxt);
5340
5341 /*
5342 * Pop-up of finished entities.
5343 */
5344 while ((RAW == 0) && (ctxt->inputNr > 1))
5345 xmlPopInput(ctxt);
5346
Daniel Veillardfdc91562002-07-01 21:52:03 +00005347 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005348 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5350 ctxt->sax->error(ctxt->userData,
5351 "Content error in the external subset\n");
5352 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005353 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005354 break;
5355 }
5356 }
5357
5358 if (RAW != 0) {
5359 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5361 ctxt->sax->error(ctxt->userData,
5362 "Extra content at the end of the document\n");
5363 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005364 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005365 }
5366
5367}
5368
5369/**
5370 * xmlParseReference:
5371 * @ctxt: an XML parser context
5372 *
5373 * parse and handle entity references in content, depending on the SAX
5374 * interface, this may end-up in a call to character() if this is a
5375 * CharRef, a predefined entity, if there is no reference() callback.
5376 * or if the parser was asked to switch to that mode.
5377 *
5378 * [67] Reference ::= EntityRef | CharRef
5379 */
5380void
5381xmlParseReference(xmlParserCtxtPtr ctxt) {
5382 xmlEntityPtr ent;
5383 xmlChar *val;
5384 if (RAW != '&') return;
5385
5386 if (NXT(1) == '#') {
5387 int i = 0;
5388 xmlChar out[10];
5389 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005390 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005391
5392 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5393 /*
5394 * So we are using non-UTF-8 buffers
5395 * Check that the char fit on 8bits, if not
5396 * generate a CharRef.
5397 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005398 if (value <= 0xFF) {
5399 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005400 out[1] = 0;
5401 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5402 (!ctxt->disableSAX))
5403 ctxt->sax->characters(ctxt->userData, out, 1);
5404 } else {
5405 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005406 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005407 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005408 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005409 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5410 (!ctxt->disableSAX))
5411 ctxt->sax->reference(ctxt->userData, out);
5412 }
5413 } else {
5414 /*
5415 * Just encode the value in UTF-8
5416 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005417 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005418 out[i] = 0;
5419 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5420 (!ctxt->disableSAX))
5421 ctxt->sax->characters(ctxt->userData, out, i);
5422 }
5423 } else {
5424 ent = xmlParseEntityRef(ctxt);
5425 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005426 if (!ctxt->wellFormed)
5427 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005428 if ((ent->name != NULL) &&
5429 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5430 xmlNodePtr list = NULL;
5431 int ret;
5432
5433
5434 /*
5435 * The first reference to the entity trigger a parsing phase
5436 * where the ent->children is filled with the result from
5437 * the parsing.
5438 */
5439 if (ent->children == NULL) {
5440 xmlChar *value;
5441 value = ent->content;
5442
5443 /*
5444 * Check that this entity is well formed
5445 */
5446 if ((value != NULL) &&
5447 (value[1] == 0) && (value[0] == '<') &&
5448 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5449 /*
5450 * DONE: get definite answer on this !!!
5451 * Lots of entity decls are used to declare a single
5452 * char
5453 * <!ENTITY lt "<">
5454 * Which seems to be valid since
5455 * 2.4: The ampersand character (&) and the left angle
5456 * bracket (<) may appear in their literal form only
5457 * when used ... They are also legal within the literal
5458 * entity value of an internal entity declaration;i
5459 * see "4.3.2 Well-Formed Parsed Entities".
5460 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5461 * Looking at the OASIS test suite and James Clark
5462 * tests, this is broken. However the XML REC uses
5463 * it. Is the XML REC not well-formed ????
5464 * This is a hack to avoid this problem
5465 *
5466 * ANSWER: since lt gt amp .. are already defined,
5467 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005468 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005469 * is lousy but acceptable.
5470 */
5471 list = xmlNewDocText(ctxt->myDoc, value);
5472 if (list != NULL) {
5473 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5474 (ent->children == NULL)) {
5475 ent->children = list;
5476 ent->last = list;
5477 list->parent = (xmlNodePtr) ent;
5478 } else {
5479 xmlFreeNodeList(list);
5480 }
5481 } else if (list != NULL) {
5482 xmlFreeNodeList(list);
5483 }
5484 } else {
5485 /*
5486 * 4.3.2: An internal general parsed entity is well-formed
5487 * if its replacement text matches the production labeled
5488 * content.
5489 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005490
5491 void *user_data;
5492 /*
5493 * This is a bit hackish but this seems the best
5494 * way to make sure both SAX and DOM entity support
5495 * behaves okay.
5496 */
5497 if (ctxt->userData == ctxt)
5498 user_data = NULL;
5499 else
5500 user_data = ctxt->userData;
5501
Owen Taylor3473f882001-02-23 17:55:21 +00005502 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5503 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005504 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5505 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005506 ctxt->depth--;
5507 } else if (ent->etype ==
5508 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5509 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005510 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005511 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005512 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005513 ctxt->depth--;
5514 } else {
5515 ret = -1;
5516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5517 ctxt->sax->error(ctxt->userData,
5518 "Internal: invalid entity type\n");
5519 }
5520 if (ret == XML_ERR_ENTITY_LOOP) {
5521 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5523 ctxt->sax->error(ctxt->userData,
5524 "Detected entity reference loop\n");
5525 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005526 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005527 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005528 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005529 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5530 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005531 (ent->children == NULL)) {
5532 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005533 if (ctxt->replaceEntities) {
5534 /*
5535 * Prune it directly in the generated document
5536 * except for single text nodes.
5537 */
5538 if ((list->type == XML_TEXT_NODE) &&
5539 (list->next == NULL)) {
5540 list->parent = (xmlNodePtr) ent;
5541 list = NULL;
5542 } else {
5543 while (list != NULL) {
5544 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005545 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005546 if (list->next == NULL)
5547 ent->last = list;
5548 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005549 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005550 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005551 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5552 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005553 }
5554 } else {
5555 while (list != NULL) {
5556 list->parent = (xmlNodePtr) ent;
5557 if (list->next == NULL)
5558 ent->last = list;
5559 list = list->next;
5560 }
Owen Taylor3473f882001-02-23 17:55:21 +00005561 }
5562 } else {
5563 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005564 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005565 }
5566 } else if (ret > 0) {
5567 ctxt->errNo = ret;
5568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5569 ctxt->sax->error(ctxt->userData,
5570 "Entity value required\n");
5571 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005572 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005573 } else if (list != NULL) {
5574 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005575 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005576 }
5577 }
5578 }
5579 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5580 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5581 /*
5582 * Create a node.
5583 */
5584 ctxt->sax->reference(ctxt->userData, ent->name);
5585 return;
5586 } else if (ctxt->replaceEntities) {
5587 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5588 /*
5589 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005590 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005591 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005592 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005593 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005594 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005595 cur = ent->children;
5596 while (cur != NULL) {
5597 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005598 if (firstChild == NULL){
5599 firstChild = new;
5600 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005601 xmlAddChild(ctxt->node, new);
5602 if (cur == ent->last)
5603 break;
5604 cur = cur->next;
5605 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005606 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5607 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005608 } else {
5609 /*
5610 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005611 * node with a possible previous text one which
5612 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005613 */
5614 if (ent->children->type == XML_TEXT_NODE)
5615 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5616 if ((ent->last != ent->children) &&
5617 (ent->last->type == XML_TEXT_NODE))
5618 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5619 xmlAddChildList(ctxt->node, ent->children);
5620 }
5621
Owen Taylor3473f882001-02-23 17:55:21 +00005622 /*
5623 * This is to avoid a nasty side effect, see
5624 * characters() in SAX.c
5625 */
5626 ctxt->nodemem = 0;
5627 ctxt->nodelen = 0;
5628 return;
5629 } else {
5630 /*
5631 * Probably running in SAX mode
5632 */
5633 xmlParserInputPtr input;
5634
5635 input = xmlNewEntityInputStream(ctxt, ent);
5636 xmlPushInput(ctxt, input);
5637 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5638 (RAW == '<') && (NXT(1) == '?') &&
5639 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5640 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5641 xmlParseTextDecl(ctxt);
5642 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5643 /*
5644 * The XML REC instructs us to stop parsing right here
5645 */
5646 ctxt->instate = XML_PARSER_EOF;
5647 return;
5648 }
5649 if (input->standalone == 1) {
5650 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5652 ctxt->sax->error(ctxt->userData,
5653 "external parsed entities cannot be standalone\n");
5654 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005655 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005656 }
5657 }
5658 return;
5659 }
5660 }
5661 } else {
5662 val = ent->content;
5663 if (val == NULL) return;
5664 /*
5665 * inline the entity.
5666 */
5667 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5668 (!ctxt->disableSAX))
5669 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5670 }
5671 }
5672}
5673
5674/**
5675 * xmlParseEntityRef:
5676 * @ctxt: an XML parser context
5677 *
5678 * parse ENTITY references declarations
5679 *
5680 * [68] EntityRef ::= '&' Name ';'
5681 *
5682 * [ WFC: Entity Declared ]
5683 * In a document without any DTD, a document with only an internal DTD
5684 * subset which contains no parameter entity references, or a document
5685 * with "standalone='yes'", the Name given in the entity reference
5686 * must match that in an entity declaration, except that well-formed
5687 * documents need not declare any of the following entities: amp, lt,
5688 * gt, apos, quot. The declaration of a parameter entity must precede
5689 * any reference to it. Similarly, the declaration of a general entity
5690 * must precede any reference to it which appears in a default value in an
5691 * attribute-list declaration. Note that if entities are declared in the
5692 * external subset or in external parameter entities, a non-validating
5693 * processor is not obligated to read and process their declarations;
5694 * for such documents, the rule that an entity must be declared is a
5695 * well-formedness constraint only if standalone='yes'.
5696 *
5697 * [ WFC: Parsed Entity ]
5698 * An entity reference must not contain the name of an unparsed entity
5699 *
5700 * Returns the xmlEntityPtr if found, or NULL otherwise.
5701 */
5702xmlEntityPtr
5703xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5704 xmlChar *name;
5705 xmlEntityPtr ent = NULL;
5706
5707 GROW;
5708
5709 if (RAW == '&') {
5710 NEXT;
5711 name = xmlParseName(ctxt);
5712 if (name == NULL) {
5713 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5715 ctxt->sax->error(ctxt->userData,
5716 "xmlParseEntityRef: no name\n");
5717 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005718 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005719 } else {
5720 if (RAW == ';') {
5721 NEXT;
5722 /*
5723 * Ask first SAX for entity resolution, otherwise try the
5724 * predefined set.
5725 */
5726 if (ctxt->sax != NULL) {
5727 if (ctxt->sax->getEntity != NULL)
5728 ent = ctxt->sax->getEntity(ctxt->userData, name);
5729 if (ent == NULL)
5730 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005731 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5732 ent = getEntity(ctxt, name);
5733 }
Owen Taylor3473f882001-02-23 17:55:21 +00005734 }
5735 /*
5736 * [ WFC: Entity Declared ]
5737 * In a document without any DTD, a document with only an
5738 * internal DTD subset which contains no parameter entity
5739 * references, or a document with "standalone='yes'", the
5740 * Name given in the entity reference must match that in an
5741 * entity declaration, except that well-formed documents
5742 * need not declare any of the following entities: amp, lt,
5743 * gt, apos, quot.
5744 * The declaration of a parameter entity must precede any
5745 * reference to it.
5746 * Similarly, the declaration of a general entity must
5747 * precede any reference to it which appears in a default
5748 * value in an attribute-list declaration. Note that if
5749 * entities are declared in the external subset or in
5750 * external parameter entities, a non-validating processor
5751 * is not obligated to read and process their declarations;
5752 * for such documents, the rule that an entity must be
5753 * declared is a well-formedness constraint only if
5754 * standalone='yes'.
5755 */
5756 if (ent == NULL) {
5757 if ((ctxt->standalone == 1) ||
5758 ((ctxt->hasExternalSubset == 0) &&
5759 (ctxt->hasPErefs == 0))) {
5760 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5762 ctxt->sax->error(ctxt->userData,
5763 "Entity '%s' not defined\n", name);
5764 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005765 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005766 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005767 } else {
5768 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005770 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005771 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005772 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005773 }
5774 }
5775
5776 /*
5777 * [ WFC: Parsed Entity ]
5778 * An entity reference must not contain the name of an
5779 * unparsed entity
5780 */
5781 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5782 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5784 ctxt->sax->error(ctxt->userData,
5785 "Entity reference to unparsed entity %s\n", name);
5786 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005787 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005788 }
5789
5790 /*
5791 * [ WFC: No External Entity References ]
5792 * Attribute values cannot contain direct or indirect
5793 * entity references to external entities.
5794 */
5795 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5796 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5797 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5799 ctxt->sax->error(ctxt->userData,
5800 "Attribute references external entity '%s'\n", name);
5801 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005802 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005803 }
5804 /*
5805 * [ WFC: No < in Attribute Values ]
5806 * The replacement text of any entity referred to directly or
5807 * indirectly in an attribute value (other than "&lt;") must
5808 * not contain a <.
5809 */
5810 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5811 (ent != NULL) &&
5812 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5813 (ent->content != NULL) &&
5814 (xmlStrchr(ent->content, '<'))) {
5815 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5817 ctxt->sax->error(ctxt->userData,
5818 "'<' in entity '%s' is not allowed in attributes values\n", name);
5819 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005820 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005821 }
5822
5823 /*
5824 * Internal check, no parameter entities here ...
5825 */
5826 else {
5827 switch (ent->etype) {
5828 case XML_INTERNAL_PARAMETER_ENTITY:
5829 case XML_EXTERNAL_PARAMETER_ENTITY:
5830 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5832 ctxt->sax->error(ctxt->userData,
5833 "Attempt to reference the parameter entity '%s'\n", name);
5834 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005835 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005836 break;
5837 default:
5838 break;
5839 }
5840 }
5841
5842 /*
5843 * [ WFC: No Recursion ]
5844 * A parsed entity must not contain a recursive reference
5845 * to itself, either directly or indirectly.
5846 * Done somewhere else
5847 */
5848
5849 } else {
5850 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5852 ctxt->sax->error(ctxt->userData,
5853 "xmlParseEntityRef: expecting ';'\n");
5854 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005855 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005856 }
5857 xmlFree(name);
5858 }
5859 }
5860 return(ent);
5861}
5862
5863/**
5864 * xmlParseStringEntityRef:
5865 * @ctxt: an XML parser context
5866 * @str: a pointer to an index in the string
5867 *
5868 * parse ENTITY references declarations, but this version parses it from
5869 * a string value.
5870 *
5871 * [68] EntityRef ::= '&' Name ';'
5872 *
5873 * [ WFC: Entity Declared ]
5874 * In a document without any DTD, a document with only an internal DTD
5875 * subset which contains no parameter entity references, or a document
5876 * with "standalone='yes'", the Name given in the entity reference
5877 * must match that in an entity declaration, except that well-formed
5878 * documents need not declare any of the following entities: amp, lt,
5879 * gt, apos, quot. The declaration of a parameter entity must precede
5880 * any reference to it. Similarly, the declaration of a general entity
5881 * must precede any reference to it which appears in a default value in an
5882 * attribute-list declaration. Note that if entities are declared in the
5883 * external subset or in external parameter entities, a non-validating
5884 * processor is not obligated to read and process their declarations;
5885 * for such documents, the rule that an entity must be declared is a
5886 * well-formedness constraint only if standalone='yes'.
5887 *
5888 * [ WFC: Parsed Entity ]
5889 * An entity reference must not contain the name of an unparsed entity
5890 *
5891 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5892 * is updated to the current location in the string.
5893 */
5894xmlEntityPtr
5895xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5896 xmlChar *name;
5897 const xmlChar *ptr;
5898 xmlChar cur;
5899 xmlEntityPtr ent = NULL;
5900
5901 if ((str == NULL) || (*str == NULL))
5902 return(NULL);
5903 ptr = *str;
5904 cur = *ptr;
5905 if (cur == '&') {
5906 ptr++;
5907 cur = *ptr;
5908 name = xmlParseStringName(ctxt, &ptr);
5909 if (name == NULL) {
5910 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5912 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005913 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005914 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005915 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005916 } else {
5917 if (*ptr == ';') {
5918 ptr++;
5919 /*
5920 * Ask first SAX for entity resolution, otherwise try the
5921 * predefined set.
5922 */
5923 if (ctxt->sax != NULL) {
5924 if (ctxt->sax->getEntity != NULL)
5925 ent = ctxt->sax->getEntity(ctxt->userData, name);
5926 if (ent == NULL)
5927 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005928 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5929 ent = getEntity(ctxt, name);
5930 }
Owen Taylor3473f882001-02-23 17:55:21 +00005931 }
5932 /*
5933 * [ WFC: Entity Declared ]
5934 * In a document without any DTD, a document with only an
5935 * internal DTD subset which contains no parameter entity
5936 * references, or a document with "standalone='yes'", the
5937 * Name given in the entity reference must match that in an
5938 * entity declaration, except that well-formed documents
5939 * need not declare any of the following entities: amp, lt,
5940 * gt, apos, quot.
5941 * The declaration of a parameter entity must precede any
5942 * reference to it.
5943 * Similarly, the declaration of a general entity must
5944 * precede any reference to it which appears in a default
5945 * value in an attribute-list declaration. Note that if
5946 * entities are declared in the external subset or in
5947 * external parameter entities, a non-validating processor
5948 * is not obligated to read and process their declarations;
5949 * for such documents, the rule that an entity must be
5950 * declared is a well-formedness constraint only if
5951 * standalone='yes'.
5952 */
5953 if (ent == NULL) {
5954 if ((ctxt->standalone == 1) ||
5955 ((ctxt->hasExternalSubset == 0) &&
5956 (ctxt->hasPErefs == 0))) {
5957 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5959 ctxt->sax->error(ctxt->userData,
5960 "Entity '%s' not defined\n", name);
5961 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005962 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005963 } else {
5964 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5965 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5966 ctxt->sax->warning(ctxt->userData,
5967 "Entity '%s' not defined\n", name);
5968 }
5969 }
5970
5971 /*
5972 * [ WFC: Parsed Entity ]
5973 * An entity reference must not contain the name of an
5974 * unparsed entity
5975 */
5976 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5977 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5979 ctxt->sax->error(ctxt->userData,
5980 "Entity reference to unparsed entity %s\n", name);
5981 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005982 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005983 }
5984
5985 /*
5986 * [ WFC: No External Entity References ]
5987 * Attribute values cannot contain direct or indirect
5988 * entity references to external entities.
5989 */
5990 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5991 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5992 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5994 ctxt->sax->error(ctxt->userData,
5995 "Attribute references external entity '%s'\n", name);
5996 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005997 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005998 }
5999 /*
6000 * [ WFC: No < in Attribute Values ]
6001 * The replacement text of any entity referred to directly or
6002 * indirectly in an attribute value (other than "&lt;") must
6003 * not contain a <.
6004 */
6005 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6006 (ent != NULL) &&
6007 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6008 (ent->content != NULL) &&
6009 (xmlStrchr(ent->content, '<'))) {
6010 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6012 ctxt->sax->error(ctxt->userData,
6013 "'<' in entity '%s' is not allowed in attributes values\n", name);
6014 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006015 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006016 }
6017
6018 /*
6019 * Internal check, no parameter entities here ...
6020 */
6021 else {
6022 switch (ent->etype) {
6023 case XML_INTERNAL_PARAMETER_ENTITY:
6024 case XML_EXTERNAL_PARAMETER_ENTITY:
6025 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6026 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6027 ctxt->sax->error(ctxt->userData,
6028 "Attempt to reference the parameter entity '%s'\n", name);
6029 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006030 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006031 break;
6032 default:
6033 break;
6034 }
6035 }
6036
6037 /*
6038 * [ WFC: No Recursion ]
6039 * A parsed entity must not contain a recursive reference
6040 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006041 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006042 */
6043
6044 } else {
6045 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6047 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006048 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006049 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006050 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006051 }
6052 xmlFree(name);
6053 }
6054 }
6055 *str = ptr;
6056 return(ent);
6057}
6058
6059/**
6060 * xmlParsePEReference:
6061 * @ctxt: an XML parser context
6062 *
6063 * parse PEReference declarations
6064 * The entity content is handled directly by pushing it's content as
6065 * a new input stream.
6066 *
6067 * [69] PEReference ::= '%' Name ';'
6068 *
6069 * [ WFC: No Recursion ]
6070 * A parsed entity must not contain a recursive
6071 * reference to itself, either directly or indirectly.
6072 *
6073 * [ WFC: Entity Declared ]
6074 * In a document without any DTD, a document with only an internal DTD
6075 * subset which contains no parameter entity references, or a document
6076 * with "standalone='yes'", ... ... The declaration of a parameter
6077 * entity must precede any reference to it...
6078 *
6079 * [ VC: Entity Declared ]
6080 * In a document with an external subset or external parameter entities
6081 * with "standalone='no'", ... ... The declaration of a parameter entity
6082 * must precede any reference to it...
6083 *
6084 * [ WFC: In DTD ]
6085 * Parameter-entity references may only appear in the DTD.
6086 * NOTE: misleading but this is handled.
6087 */
6088void
6089xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6090 xmlChar *name;
6091 xmlEntityPtr entity = NULL;
6092 xmlParserInputPtr input;
6093
6094 if (RAW == '%') {
6095 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006096 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006097 if (name == NULL) {
6098 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6100 ctxt->sax->error(ctxt->userData,
6101 "xmlParsePEReference: no name\n");
6102 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006103 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006104 } else {
6105 if (RAW == ';') {
6106 NEXT;
6107 if ((ctxt->sax != NULL) &&
6108 (ctxt->sax->getParameterEntity != NULL))
6109 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6110 name);
6111 if (entity == NULL) {
6112 /*
6113 * [ WFC: Entity Declared ]
6114 * In a document without any DTD, a document with only an
6115 * internal DTD subset which contains no parameter entity
6116 * references, or a document with "standalone='yes'", ...
6117 * ... The declaration of a parameter entity must precede
6118 * any reference to it...
6119 */
6120 if ((ctxt->standalone == 1) ||
6121 ((ctxt->hasExternalSubset == 0) &&
6122 (ctxt->hasPErefs == 0))) {
6123 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6124 if ((!ctxt->disableSAX) &&
6125 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6126 ctxt->sax->error(ctxt->userData,
6127 "PEReference: %%%s; not found\n", name);
6128 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006129 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006130 } else {
6131 /*
6132 * [ VC: Entity Declared ]
6133 * In a document with an external subset or external
6134 * parameter entities with "standalone='no'", ...
6135 * ... The declaration of a parameter entity must precede
6136 * any reference to it...
6137 */
6138 if ((!ctxt->disableSAX) &&
6139 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6140 ctxt->sax->warning(ctxt->userData,
6141 "PEReference: %%%s; not found\n", name);
6142 ctxt->valid = 0;
6143 }
6144 } else {
6145 /*
6146 * Internal checking in case the entity quest barfed
6147 */
6148 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6149 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6150 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6151 ctxt->sax->warning(ctxt->userData,
6152 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006153 } else if (ctxt->input->free != deallocblankswrapper) {
6154 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6155 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006156 } else {
6157 /*
6158 * TODO !!!
6159 * handle the extra spaces added before and after
6160 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6161 */
6162 input = xmlNewEntityInputStream(ctxt, entity);
6163 xmlPushInput(ctxt, input);
6164 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6165 (RAW == '<') && (NXT(1) == '?') &&
6166 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6167 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6168 xmlParseTextDecl(ctxt);
6169 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6170 /*
6171 * The XML REC instructs us to stop parsing
6172 * right here
6173 */
6174 ctxt->instate = XML_PARSER_EOF;
6175 xmlFree(name);
6176 return;
6177 }
6178 }
Owen Taylor3473f882001-02-23 17:55:21 +00006179 }
6180 }
6181 ctxt->hasPErefs = 1;
6182 } else {
6183 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6185 ctxt->sax->error(ctxt->userData,
6186 "xmlParsePEReference: expecting ';'\n");
6187 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006188 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006189 }
6190 xmlFree(name);
6191 }
6192 }
6193}
6194
6195/**
6196 * xmlParseStringPEReference:
6197 * @ctxt: an XML parser context
6198 * @str: a pointer to an index in the string
6199 *
6200 * parse PEReference declarations
6201 *
6202 * [69] PEReference ::= '%' Name ';'
6203 *
6204 * [ WFC: No Recursion ]
6205 * A parsed entity must not contain a recursive
6206 * reference to itself, either directly or indirectly.
6207 *
6208 * [ WFC: Entity Declared ]
6209 * In a document without any DTD, a document with only an internal DTD
6210 * subset which contains no parameter entity references, or a document
6211 * with "standalone='yes'", ... ... The declaration of a parameter
6212 * entity must precede any reference to it...
6213 *
6214 * [ VC: Entity Declared ]
6215 * In a document with an external subset or external parameter entities
6216 * with "standalone='no'", ... ... The declaration of a parameter entity
6217 * must precede any reference to it...
6218 *
6219 * [ WFC: In DTD ]
6220 * Parameter-entity references may only appear in the DTD.
6221 * NOTE: misleading but this is handled.
6222 *
6223 * Returns the string of the entity content.
6224 * str is updated to the current value of the index
6225 */
6226xmlEntityPtr
6227xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6228 const xmlChar *ptr;
6229 xmlChar cur;
6230 xmlChar *name;
6231 xmlEntityPtr entity = NULL;
6232
6233 if ((str == NULL) || (*str == NULL)) return(NULL);
6234 ptr = *str;
6235 cur = *ptr;
6236 if (cur == '%') {
6237 ptr++;
6238 cur = *ptr;
6239 name = xmlParseStringName(ctxt, &ptr);
6240 if (name == NULL) {
6241 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6242 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6243 ctxt->sax->error(ctxt->userData,
6244 "xmlParseStringPEReference: no name\n");
6245 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006246 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006247 } else {
6248 cur = *ptr;
6249 if (cur == ';') {
6250 ptr++;
6251 cur = *ptr;
6252 if ((ctxt->sax != NULL) &&
6253 (ctxt->sax->getParameterEntity != NULL))
6254 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6255 name);
6256 if (entity == NULL) {
6257 /*
6258 * [ WFC: Entity Declared ]
6259 * In a document without any DTD, a document with only an
6260 * internal DTD subset which contains no parameter entity
6261 * references, or a document with "standalone='yes'", ...
6262 * ... The declaration of a parameter entity must precede
6263 * any reference to it...
6264 */
6265 if ((ctxt->standalone == 1) ||
6266 ((ctxt->hasExternalSubset == 0) &&
6267 (ctxt->hasPErefs == 0))) {
6268 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6270 ctxt->sax->error(ctxt->userData,
6271 "PEReference: %%%s; not found\n", name);
6272 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006273 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006274 } else {
6275 /*
6276 * [ VC: Entity Declared ]
6277 * In a document with an external subset or external
6278 * parameter entities with "standalone='no'", ...
6279 * ... The declaration of a parameter entity must
6280 * precede any reference to it...
6281 */
6282 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6283 ctxt->sax->warning(ctxt->userData,
6284 "PEReference: %%%s; not found\n", name);
6285 ctxt->valid = 0;
6286 }
6287 } else {
6288 /*
6289 * Internal checking in case the entity quest barfed
6290 */
6291 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6292 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6293 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6294 ctxt->sax->warning(ctxt->userData,
6295 "Internal: %%%s; is not a parameter entity\n", name);
6296 }
6297 }
6298 ctxt->hasPErefs = 1;
6299 } else {
6300 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6302 ctxt->sax->error(ctxt->userData,
6303 "xmlParseStringPEReference: expecting ';'\n");
6304 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006305 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006306 }
6307 xmlFree(name);
6308 }
6309 }
6310 *str = ptr;
6311 return(entity);
6312}
6313
6314/**
6315 * xmlParseDocTypeDecl:
6316 * @ctxt: an XML parser context
6317 *
6318 * parse a DOCTYPE declaration
6319 *
6320 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6321 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6322 *
6323 * [ VC: Root Element Type ]
6324 * The Name in the document type declaration must match the element
6325 * type of the root element.
6326 */
6327
6328void
6329xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6330 xmlChar *name = NULL;
6331 xmlChar *ExternalID = NULL;
6332 xmlChar *URI = NULL;
6333
6334 /*
6335 * We know that '<!DOCTYPE' has been detected.
6336 */
6337 SKIP(9);
6338
6339 SKIP_BLANKS;
6340
6341 /*
6342 * Parse the DOCTYPE name.
6343 */
6344 name = xmlParseName(ctxt);
6345 if (name == NULL) {
6346 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6347 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6348 ctxt->sax->error(ctxt->userData,
6349 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6350 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006351 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006352 }
6353 ctxt->intSubName = name;
6354
6355 SKIP_BLANKS;
6356
6357 /*
6358 * Check for SystemID and ExternalID
6359 */
6360 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6361
6362 if ((URI != NULL) || (ExternalID != NULL)) {
6363 ctxt->hasExternalSubset = 1;
6364 }
6365 ctxt->extSubURI = URI;
6366 ctxt->extSubSystem = ExternalID;
6367
6368 SKIP_BLANKS;
6369
6370 /*
6371 * Create and update the internal subset.
6372 */
6373 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6374 (!ctxt->disableSAX))
6375 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6376
6377 /*
6378 * Is there any internal subset declarations ?
6379 * they are handled separately in xmlParseInternalSubset()
6380 */
6381 if (RAW == '[')
6382 return;
6383
6384 /*
6385 * We should be at the end of the DOCTYPE declaration.
6386 */
6387 if (RAW != '>') {
6388 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6389 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006390 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006391 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006392 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006393 }
6394 NEXT;
6395}
6396
6397/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006398 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006399 * @ctxt: an XML parser context
6400 *
6401 * parse the internal subset declaration
6402 *
6403 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6404 */
6405
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006406static void
Owen Taylor3473f882001-02-23 17:55:21 +00006407xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6408 /*
6409 * Is there any DTD definition ?
6410 */
6411 if (RAW == '[') {
6412 ctxt->instate = XML_PARSER_DTD;
6413 NEXT;
6414 /*
6415 * Parse the succession of Markup declarations and
6416 * PEReferences.
6417 * Subsequence (markupdecl | PEReference | S)*
6418 */
6419 while (RAW != ']') {
6420 const xmlChar *check = CUR_PTR;
6421 int cons = ctxt->input->consumed;
6422
6423 SKIP_BLANKS;
6424 xmlParseMarkupDecl(ctxt);
6425 xmlParsePEReference(ctxt);
6426
6427 /*
6428 * Pop-up of finished entities.
6429 */
6430 while ((RAW == 0) && (ctxt->inputNr > 1))
6431 xmlPopInput(ctxt);
6432
6433 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6434 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6436 ctxt->sax->error(ctxt->userData,
6437 "xmlParseInternalSubset: error detected in Markup declaration\n");
6438 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006439 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006440 break;
6441 }
6442 }
6443 if (RAW == ']') {
6444 NEXT;
6445 SKIP_BLANKS;
6446 }
6447 }
6448
6449 /*
6450 * We should be at the end of the DOCTYPE declaration.
6451 */
6452 if (RAW != '>') {
6453 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006455 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006456 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006457 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006458 }
6459 NEXT;
6460}
6461
6462/**
6463 * xmlParseAttribute:
6464 * @ctxt: an XML parser context
6465 * @value: a xmlChar ** used to store the value of the attribute
6466 *
6467 * parse an attribute
6468 *
6469 * [41] Attribute ::= Name Eq AttValue
6470 *
6471 * [ WFC: No External Entity References ]
6472 * Attribute values cannot contain direct or indirect entity references
6473 * to external entities.
6474 *
6475 * [ WFC: No < in Attribute Values ]
6476 * The replacement text of any entity referred to directly or indirectly in
6477 * an attribute value (other than "&lt;") must not contain a <.
6478 *
6479 * [ VC: Attribute Value Type ]
6480 * The attribute must have been declared; the value must be of the type
6481 * declared for it.
6482 *
6483 * [25] Eq ::= S? '=' S?
6484 *
6485 * With namespace:
6486 *
6487 * [NS 11] Attribute ::= QName Eq AttValue
6488 *
6489 * Also the case QName == xmlns:??? is handled independently as a namespace
6490 * definition.
6491 *
6492 * Returns the attribute name, and the value in *value.
6493 */
6494
6495xmlChar *
6496xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6497 xmlChar *name, *val;
6498
6499 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006500 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006501 name = xmlParseName(ctxt);
6502 if (name == NULL) {
6503 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6505 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6506 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006507 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006508 return(NULL);
6509 }
6510
6511 /*
6512 * read the value
6513 */
6514 SKIP_BLANKS;
6515 if (RAW == '=') {
6516 NEXT;
6517 SKIP_BLANKS;
6518 val = xmlParseAttValue(ctxt);
6519 ctxt->instate = XML_PARSER_CONTENT;
6520 } else {
6521 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6523 ctxt->sax->error(ctxt->userData,
6524 "Specification mandate value for attribute %s\n", name);
6525 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006526 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006527 xmlFree(name);
6528 return(NULL);
6529 }
6530
6531 /*
6532 * Check that xml:lang conforms to the specification
6533 * No more registered as an error, just generate a warning now
6534 * since this was deprecated in XML second edition
6535 */
6536 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6537 if (!xmlCheckLanguageID(val)) {
6538 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6539 ctxt->sax->warning(ctxt->userData,
6540 "Malformed value for xml:lang : %s\n", val);
6541 }
6542 }
6543
6544 /*
6545 * Check that xml:space conforms to the specification
6546 */
6547 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6548 if (xmlStrEqual(val, BAD_CAST "default"))
6549 *(ctxt->space) = 0;
6550 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6551 *(ctxt->space) = 1;
6552 else {
6553 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6555 ctxt->sax->error(ctxt->userData,
6556"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6557 val);
6558 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006559 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006560 }
6561 }
6562
6563 *value = val;
6564 return(name);
6565}
6566
6567/**
6568 * xmlParseStartTag:
6569 * @ctxt: an XML parser context
6570 *
6571 * parse a start of tag either for rule element or
6572 * EmptyElement. In both case we don't parse the tag closing chars.
6573 *
6574 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6575 *
6576 * [ WFC: Unique Att Spec ]
6577 * No attribute name may appear more than once in the same start-tag or
6578 * empty-element tag.
6579 *
6580 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6581 *
6582 * [ WFC: Unique Att Spec ]
6583 * No attribute name may appear more than once in the same start-tag or
6584 * empty-element tag.
6585 *
6586 * With namespace:
6587 *
6588 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6589 *
6590 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6591 *
6592 * Returns the element name parsed
6593 */
6594
6595xmlChar *
6596xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6597 xmlChar *name;
6598 xmlChar *attname;
6599 xmlChar *attvalue;
6600 const xmlChar **atts = NULL;
6601 int nbatts = 0;
6602 int maxatts = 0;
6603 int i;
6604
6605 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006606 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006607
6608 name = xmlParseName(ctxt);
6609 if (name == NULL) {
6610 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6612 ctxt->sax->error(ctxt->userData,
6613 "xmlParseStartTag: invalid element name\n");
6614 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006615 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006616 return(NULL);
6617 }
6618
6619 /*
6620 * Now parse the attributes, it ends up with the ending
6621 *
6622 * (S Attribute)* S?
6623 */
6624 SKIP_BLANKS;
6625 GROW;
6626
Daniel Veillard21a0f912001-02-25 19:54:14 +00006627 while ((RAW != '>') &&
6628 ((RAW != '/') || (NXT(1) != '>')) &&
6629 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006630 const xmlChar *q = CUR_PTR;
6631 int cons = ctxt->input->consumed;
6632
6633 attname = xmlParseAttribute(ctxt, &attvalue);
6634 if ((attname != NULL) && (attvalue != NULL)) {
6635 /*
6636 * [ WFC: Unique Att Spec ]
6637 * No attribute name may appear more than once in the same
6638 * start-tag or empty-element tag.
6639 */
6640 for (i = 0; i < nbatts;i += 2) {
6641 if (xmlStrEqual(atts[i], attname)) {
6642 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6644 ctxt->sax->error(ctxt->userData,
6645 "Attribute %s redefined\n",
6646 attname);
6647 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006648 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006649 xmlFree(attname);
6650 xmlFree(attvalue);
6651 goto failed;
6652 }
6653 }
6654
6655 /*
6656 * Add the pair to atts
6657 */
6658 if (atts == NULL) {
6659 maxatts = 10;
6660 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6661 if (atts == NULL) {
6662 xmlGenericError(xmlGenericErrorContext,
6663 "malloc of %ld byte failed\n",
6664 maxatts * (long)sizeof(xmlChar *));
6665 return(NULL);
6666 }
6667 } else if (nbatts + 4 > maxatts) {
6668 maxatts *= 2;
6669 atts = (const xmlChar **) xmlRealloc((void *) atts,
6670 maxatts * sizeof(xmlChar *));
6671 if (atts == NULL) {
6672 xmlGenericError(xmlGenericErrorContext,
6673 "realloc of %ld byte failed\n",
6674 maxatts * (long)sizeof(xmlChar *));
6675 return(NULL);
6676 }
6677 }
6678 atts[nbatts++] = attname;
6679 atts[nbatts++] = attvalue;
6680 atts[nbatts] = NULL;
6681 atts[nbatts + 1] = NULL;
6682 } else {
6683 if (attname != NULL)
6684 xmlFree(attname);
6685 if (attvalue != NULL)
6686 xmlFree(attvalue);
6687 }
6688
6689failed:
6690
Daniel Veillard67df8092002-12-16 22:04:11 +00006691 if (CUR == 0) {
6692 GROW
6693 }
Owen Taylor3473f882001-02-23 17:55:21 +00006694 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6695 break;
6696 if (!IS_BLANK(RAW)) {
6697 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6698 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6699 ctxt->sax->error(ctxt->userData,
6700 "attributes construct error\n");
6701 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006702 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006703 }
6704 SKIP_BLANKS;
6705 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6706 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6708 ctxt->sax->error(ctxt->userData,
6709 "xmlParseStartTag: problem parsing attributes\n");
6710 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006711 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006712 break;
6713 }
6714 GROW;
6715 }
6716
6717 /*
6718 * SAX: Start of Element !
6719 */
6720 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6721 (!ctxt->disableSAX))
6722 ctxt->sax->startElement(ctxt->userData, name, atts);
6723
6724 if (atts != NULL) {
6725 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6726 xmlFree((void *) atts);
6727 }
6728 return(name);
6729}
6730
6731/**
6732 * xmlParseEndTag:
6733 * @ctxt: an XML parser context
6734 *
6735 * parse an end of tag
6736 *
6737 * [42] ETag ::= '</' Name S? '>'
6738 *
6739 * With namespace
6740 *
6741 * [NS 9] ETag ::= '</' QName S? '>'
6742 */
6743
6744void
6745xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6746 xmlChar *name;
6747 xmlChar *oldname;
6748
6749 GROW;
6750 if ((RAW != '<') || (NXT(1) != '/')) {
6751 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6753 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6754 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006755 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006756 return;
6757 }
6758 SKIP(2);
6759
Daniel Veillard46de64e2002-05-29 08:21:33 +00006760 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006761
6762 /*
6763 * We should definitely be at the ending "S? '>'" part
6764 */
6765 GROW;
6766 SKIP_BLANKS;
6767 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6768 ctxt->errNo = XML_ERR_GT_REQUIRED;
6769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6770 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6771 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006772 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006773 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006774 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006775
6776 /*
6777 * [ WFC: Element Type Match ]
6778 * The Name in an element's end-tag must match the element type in the
6779 * start-tag.
6780 *
6781 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006782 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006783 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006785 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006786 ctxt->sax->error(ctxt->userData,
6787 "Opening and ending tag mismatch: %s and %s\n",
6788 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006789 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006790 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006791 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006792 }
6793
6794 }
6795 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006796 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6797#if 0
6798 else {
6799 /*
6800 * Recover in case of one missing close
6801 */
6802 if ((ctxt->nameNr > 2) &&
6803 (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) {
6804 namePop(ctxt);
6805 spacePop(ctxt);
6806 }
6807 }
6808#endif
6809 if (name != NULL)
6810 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006811 }
6812
6813 /*
6814 * SAX: End of Tag
6815 */
6816 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6817 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006818 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006819
Owen Taylor3473f882001-02-23 17:55:21 +00006820 oldname = namePop(ctxt);
6821 spacePop(ctxt);
6822 if (oldname != NULL) {
6823#ifdef DEBUG_STACK
6824 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6825#endif
6826 xmlFree(oldname);
6827 }
6828 return;
6829}
6830
6831/**
6832 * xmlParseCDSect:
6833 * @ctxt: an XML parser context
6834 *
6835 * Parse escaped pure raw content.
6836 *
6837 * [18] CDSect ::= CDStart CData CDEnd
6838 *
6839 * [19] CDStart ::= '<![CDATA['
6840 *
6841 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6842 *
6843 * [21] CDEnd ::= ']]>'
6844 */
6845void
6846xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6847 xmlChar *buf = NULL;
6848 int len = 0;
6849 int size = XML_PARSER_BUFFER_SIZE;
6850 int r, rl;
6851 int s, sl;
6852 int cur, l;
6853 int count = 0;
6854
6855 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6856 (NXT(2) == '[') && (NXT(3) == 'C') &&
6857 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6858 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6859 (NXT(8) == '[')) {
6860 SKIP(9);
6861 } else
6862 return;
6863
6864 ctxt->instate = XML_PARSER_CDATA_SECTION;
6865 r = CUR_CHAR(rl);
6866 if (!IS_CHAR(r)) {
6867 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6869 ctxt->sax->error(ctxt->userData,
6870 "CData section not finished\n");
6871 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006872 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006873 ctxt->instate = XML_PARSER_CONTENT;
6874 return;
6875 }
6876 NEXTL(rl);
6877 s = CUR_CHAR(sl);
6878 if (!IS_CHAR(s)) {
6879 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6881 ctxt->sax->error(ctxt->userData,
6882 "CData section not finished\n");
6883 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006884 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006885 ctxt->instate = XML_PARSER_CONTENT;
6886 return;
6887 }
6888 NEXTL(sl);
6889 cur = CUR_CHAR(l);
6890 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6891 if (buf == NULL) {
6892 xmlGenericError(xmlGenericErrorContext,
6893 "malloc of %d byte failed\n", size);
6894 return;
6895 }
6896 while (IS_CHAR(cur) &&
6897 ((r != ']') || (s != ']') || (cur != '>'))) {
6898 if (len + 5 >= size) {
6899 size *= 2;
6900 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6901 if (buf == NULL) {
6902 xmlGenericError(xmlGenericErrorContext,
6903 "realloc of %d byte failed\n", size);
6904 return;
6905 }
6906 }
6907 COPY_BUF(rl,buf,len,r);
6908 r = s;
6909 rl = sl;
6910 s = cur;
6911 sl = l;
6912 count++;
6913 if (count > 50) {
6914 GROW;
6915 count = 0;
6916 }
6917 NEXTL(l);
6918 cur = CUR_CHAR(l);
6919 }
6920 buf[len] = 0;
6921 ctxt->instate = XML_PARSER_CONTENT;
6922 if (cur != '>') {
6923 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6924 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6925 ctxt->sax->error(ctxt->userData,
6926 "CData section not finished\n%.50s\n", buf);
6927 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006928 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006929 xmlFree(buf);
6930 return;
6931 }
6932 NEXTL(l);
6933
6934 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006935 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006936 */
6937 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6938 if (ctxt->sax->cdataBlock != NULL)
6939 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006940 else if (ctxt->sax->characters != NULL)
6941 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006942 }
6943 xmlFree(buf);
6944}
6945
6946/**
6947 * xmlParseContent:
6948 * @ctxt: an XML parser context
6949 *
6950 * Parse a content:
6951 *
6952 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6953 */
6954
6955void
6956xmlParseContent(xmlParserCtxtPtr ctxt) {
6957 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006958 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006959 ((RAW != '<') || (NXT(1) != '/'))) {
6960 const xmlChar *test = CUR_PTR;
6961 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006962 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006963
6964 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006965 * First case : a Processing Instruction.
6966 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006967 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006968 xmlParsePI(ctxt);
6969 }
6970
6971 /*
6972 * Second case : a CDSection
6973 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006974 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006975 (NXT(2) == '[') && (NXT(3) == 'C') &&
6976 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6977 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6978 (NXT(8) == '[')) {
6979 xmlParseCDSect(ctxt);
6980 }
6981
6982 /*
6983 * Third case : a comment
6984 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006985 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006986 (NXT(2) == '-') && (NXT(3) == '-')) {
6987 xmlParseComment(ctxt);
6988 ctxt->instate = XML_PARSER_CONTENT;
6989 }
6990
6991 /*
6992 * Fourth case : a sub-element.
6993 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006994 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006995 xmlParseElement(ctxt);
6996 }
6997
6998 /*
6999 * Fifth case : a reference. If if has not been resolved,
7000 * parsing returns it's Name, create the node
7001 */
7002
Daniel Veillard21a0f912001-02-25 19:54:14 +00007003 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007004 xmlParseReference(ctxt);
7005 }
7006
7007 /*
7008 * Last case, text. Note that References are handled directly.
7009 */
7010 else {
7011 xmlParseCharData(ctxt, 0);
7012 }
7013
7014 GROW;
7015 /*
7016 * Pop-up of finished entities.
7017 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007018 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007019 xmlPopInput(ctxt);
7020 SHRINK;
7021
Daniel Veillardfdc91562002-07-01 21:52:03 +00007022 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007023 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7025 ctxt->sax->error(ctxt->userData,
7026 "detected an error in element content\n");
7027 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007028 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007029 ctxt->instate = XML_PARSER_EOF;
7030 break;
7031 }
7032 }
7033}
7034
7035/**
7036 * xmlParseElement:
7037 * @ctxt: an XML parser context
7038 *
7039 * parse an XML element, this is highly recursive
7040 *
7041 * [39] element ::= EmptyElemTag | STag content ETag
7042 *
7043 * [ WFC: Element Type Match ]
7044 * The Name in an element's end-tag must match the element type in the
7045 * start-tag.
7046 *
7047 * [ VC: Element Valid ]
7048 * An element is valid if there is a declaration matching elementdecl
7049 * where the Name matches the element type and one of the following holds:
7050 * - The declaration matches EMPTY and the element has no content.
7051 * - The declaration matches children and the sequence of child elements
7052 * belongs to the language generated by the regular expression in the
7053 * content model, with optional white space (characters matching the
7054 * nonterminal S) between each pair of child elements.
7055 * - The declaration matches Mixed and the content consists of character
7056 * data and child elements whose types match names in the content model.
7057 * - The declaration matches ANY, and the types of any child elements have
7058 * been declared.
7059 */
7060
7061void
7062xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007063 xmlChar *name;
7064 xmlChar *oldname;
7065 xmlParserNodeInfo node_info;
7066 xmlNodePtr ret;
7067
7068 /* Capture start position */
7069 if (ctxt->record_info) {
7070 node_info.begin_pos = ctxt->input->consumed +
7071 (CUR_PTR - ctxt->input->base);
7072 node_info.begin_line = ctxt->input->line;
7073 }
7074
7075 if (ctxt->spaceNr == 0)
7076 spacePush(ctxt, -1);
7077 else
7078 spacePush(ctxt, *ctxt->space);
7079
7080 name = xmlParseStartTag(ctxt);
7081 if (name == NULL) {
7082 spacePop(ctxt);
7083 return;
7084 }
7085 namePush(ctxt, name);
7086 ret = ctxt->node;
7087
7088 /*
7089 * [ VC: Root Element Type ]
7090 * The Name in the document type declaration must match the element
7091 * type of the root element.
7092 */
7093 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7094 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7095 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7096
7097 /*
7098 * Check for an Empty Element.
7099 */
7100 if ((RAW == '/') && (NXT(1) == '>')) {
7101 SKIP(2);
7102 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7103 (!ctxt->disableSAX))
7104 ctxt->sax->endElement(ctxt->userData, name);
7105 oldname = namePop(ctxt);
7106 spacePop(ctxt);
7107 if (oldname != NULL) {
7108#ifdef DEBUG_STACK
7109 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7110#endif
7111 xmlFree(oldname);
7112 }
7113 if ( ret != NULL && ctxt->record_info ) {
7114 node_info.end_pos = ctxt->input->consumed +
7115 (CUR_PTR - ctxt->input->base);
7116 node_info.end_line = ctxt->input->line;
7117 node_info.node = ret;
7118 xmlParserAddNodeInfo(ctxt, &node_info);
7119 }
7120 return;
7121 }
7122 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007123 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007124 } else {
7125 ctxt->errNo = XML_ERR_GT_REQUIRED;
7126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7127 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007128 "Couldn't find end of Start Tag %s\n",
7129 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007130 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007131 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007132
7133 /*
7134 * end of parsing of this node.
7135 */
7136 nodePop(ctxt);
7137 oldname = namePop(ctxt);
7138 spacePop(ctxt);
7139 if (oldname != NULL) {
7140#ifdef DEBUG_STACK
7141 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7142#endif
7143 xmlFree(oldname);
7144 }
7145
7146 /*
7147 * Capture end position and add node
7148 */
7149 if ( ret != NULL && ctxt->record_info ) {
7150 node_info.end_pos = ctxt->input->consumed +
7151 (CUR_PTR - ctxt->input->base);
7152 node_info.end_line = ctxt->input->line;
7153 node_info.node = ret;
7154 xmlParserAddNodeInfo(ctxt, &node_info);
7155 }
7156 return;
7157 }
7158
7159 /*
7160 * Parse the content of the element:
7161 */
7162 xmlParseContent(ctxt);
7163 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007164 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007165 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7166 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007167 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007168 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007169 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007170
7171 /*
7172 * end of parsing of this node.
7173 */
7174 nodePop(ctxt);
7175 oldname = namePop(ctxt);
7176 spacePop(ctxt);
7177 if (oldname != NULL) {
7178#ifdef DEBUG_STACK
7179 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7180#endif
7181 xmlFree(oldname);
7182 }
7183 return;
7184 }
7185
7186 /*
7187 * parse the end of tag: '</' should be here.
7188 */
7189 xmlParseEndTag(ctxt);
7190
7191 /*
7192 * Capture end position and add node
7193 */
7194 if ( ret != NULL && ctxt->record_info ) {
7195 node_info.end_pos = ctxt->input->consumed +
7196 (CUR_PTR - ctxt->input->base);
7197 node_info.end_line = ctxt->input->line;
7198 node_info.node = ret;
7199 xmlParserAddNodeInfo(ctxt, &node_info);
7200 }
7201}
7202
7203/**
7204 * xmlParseVersionNum:
7205 * @ctxt: an XML parser context
7206 *
7207 * parse the XML version value.
7208 *
7209 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7210 *
7211 * Returns the string giving the XML version number, or NULL
7212 */
7213xmlChar *
7214xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7215 xmlChar *buf = NULL;
7216 int len = 0;
7217 int size = 10;
7218 xmlChar cur;
7219
7220 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7221 if (buf == NULL) {
7222 xmlGenericError(xmlGenericErrorContext,
7223 "malloc of %d byte failed\n", size);
7224 return(NULL);
7225 }
7226 cur = CUR;
7227 while (((cur >= 'a') && (cur <= 'z')) ||
7228 ((cur >= 'A') && (cur <= 'Z')) ||
7229 ((cur >= '0') && (cur <= '9')) ||
7230 (cur == '_') || (cur == '.') ||
7231 (cur == ':') || (cur == '-')) {
7232 if (len + 1 >= size) {
7233 size *= 2;
7234 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7235 if (buf == NULL) {
7236 xmlGenericError(xmlGenericErrorContext,
7237 "realloc of %d byte failed\n", size);
7238 return(NULL);
7239 }
7240 }
7241 buf[len++] = cur;
7242 NEXT;
7243 cur=CUR;
7244 }
7245 buf[len] = 0;
7246 return(buf);
7247}
7248
7249/**
7250 * xmlParseVersionInfo:
7251 * @ctxt: an XML parser context
7252 *
7253 * parse the XML version.
7254 *
7255 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7256 *
7257 * [25] Eq ::= S? '=' S?
7258 *
7259 * Returns the version string, e.g. "1.0"
7260 */
7261
7262xmlChar *
7263xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7264 xmlChar *version = NULL;
7265 const xmlChar *q;
7266
7267 if ((RAW == 'v') && (NXT(1) == 'e') &&
7268 (NXT(2) == 'r') && (NXT(3) == 's') &&
7269 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7270 (NXT(6) == 'n')) {
7271 SKIP(7);
7272 SKIP_BLANKS;
7273 if (RAW != '=') {
7274 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7275 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7276 ctxt->sax->error(ctxt->userData,
7277 "xmlParseVersionInfo : expected '='\n");
7278 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007279 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007280 return(NULL);
7281 }
7282 NEXT;
7283 SKIP_BLANKS;
7284 if (RAW == '"') {
7285 NEXT;
7286 q = CUR_PTR;
7287 version = xmlParseVersionNum(ctxt);
7288 if (RAW != '"') {
7289 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7290 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7291 ctxt->sax->error(ctxt->userData,
7292 "String not closed\n%.50s\n", q);
7293 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007294 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007295 } else
7296 NEXT;
7297 } else if (RAW == '\''){
7298 NEXT;
7299 q = CUR_PTR;
7300 version = xmlParseVersionNum(ctxt);
7301 if (RAW != '\'') {
7302 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7304 ctxt->sax->error(ctxt->userData,
7305 "String not closed\n%.50s\n", q);
7306 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007307 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007308 } else
7309 NEXT;
7310 } else {
7311 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7312 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7313 ctxt->sax->error(ctxt->userData,
7314 "xmlParseVersionInfo : expected ' or \"\n");
7315 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007316 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007317 }
7318 }
7319 return(version);
7320}
7321
7322/**
7323 * xmlParseEncName:
7324 * @ctxt: an XML parser context
7325 *
7326 * parse the XML encoding name
7327 *
7328 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7329 *
7330 * Returns the encoding name value or NULL
7331 */
7332xmlChar *
7333xmlParseEncName(xmlParserCtxtPtr ctxt) {
7334 xmlChar *buf = NULL;
7335 int len = 0;
7336 int size = 10;
7337 xmlChar cur;
7338
7339 cur = CUR;
7340 if (((cur >= 'a') && (cur <= 'z')) ||
7341 ((cur >= 'A') && (cur <= 'Z'))) {
7342 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7343 if (buf == NULL) {
7344 xmlGenericError(xmlGenericErrorContext,
7345 "malloc of %d byte failed\n", size);
7346 return(NULL);
7347 }
7348
7349 buf[len++] = cur;
7350 NEXT;
7351 cur = CUR;
7352 while (((cur >= 'a') && (cur <= 'z')) ||
7353 ((cur >= 'A') && (cur <= 'Z')) ||
7354 ((cur >= '0') && (cur <= '9')) ||
7355 (cur == '.') || (cur == '_') ||
7356 (cur == '-')) {
7357 if (len + 1 >= size) {
7358 size *= 2;
7359 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7360 if (buf == NULL) {
7361 xmlGenericError(xmlGenericErrorContext,
7362 "realloc of %d byte failed\n", size);
7363 return(NULL);
7364 }
7365 }
7366 buf[len++] = cur;
7367 NEXT;
7368 cur = CUR;
7369 if (cur == 0) {
7370 SHRINK;
7371 GROW;
7372 cur = CUR;
7373 }
7374 }
7375 buf[len] = 0;
7376 } else {
7377 ctxt->errNo = XML_ERR_ENCODING_NAME;
7378 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7379 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7380 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007381 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007382 }
7383 return(buf);
7384}
7385
7386/**
7387 * xmlParseEncodingDecl:
7388 * @ctxt: an XML parser context
7389 *
7390 * parse the XML encoding declaration
7391 *
7392 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7393 *
7394 * this setups the conversion filters.
7395 *
7396 * Returns the encoding value or NULL
7397 */
7398
7399xmlChar *
7400xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7401 xmlChar *encoding = NULL;
7402 const xmlChar *q;
7403
7404 SKIP_BLANKS;
7405 if ((RAW == 'e') && (NXT(1) == 'n') &&
7406 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7407 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7408 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7409 SKIP(8);
7410 SKIP_BLANKS;
7411 if (RAW != '=') {
7412 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7414 ctxt->sax->error(ctxt->userData,
7415 "xmlParseEncodingDecl : expected '='\n");
7416 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007417 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007418 return(NULL);
7419 }
7420 NEXT;
7421 SKIP_BLANKS;
7422 if (RAW == '"') {
7423 NEXT;
7424 q = CUR_PTR;
7425 encoding = xmlParseEncName(ctxt);
7426 if (RAW != '"') {
7427 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7428 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7429 ctxt->sax->error(ctxt->userData,
7430 "String not closed\n%.50s\n", q);
7431 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007432 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007433 } else
7434 NEXT;
7435 } else if (RAW == '\''){
7436 NEXT;
7437 q = CUR_PTR;
7438 encoding = xmlParseEncName(ctxt);
7439 if (RAW != '\'') {
7440 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7442 ctxt->sax->error(ctxt->userData,
7443 "String not closed\n%.50s\n", q);
7444 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007445 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007446 } else
7447 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007448 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007449 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7450 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7451 ctxt->sax->error(ctxt->userData,
7452 "xmlParseEncodingDecl : expected ' or \"\n");
7453 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007454 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007455 }
7456 if (encoding != NULL) {
7457 xmlCharEncoding enc;
7458 xmlCharEncodingHandlerPtr handler;
7459
7460 if (ctxt->input->encoding != NULL)
7461 xmlFree((xmlChar *) ctxt->input->encoding);
7462 ctxt->input->encoding = encoding;
7463
7464 enc = xmlParseCharEncoding((const char *) encoding);
7465 /*
7466 * registered set of known encodings
7467 */
7468 if (enc != XML_CHAR_ENCODING_ERROR) {
7469 xmlSwitchEncoding(ctxt, enc);
7470 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007471 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007472 xmlFree(encoding);
7473 return(NULL);
7474 }
7475 } else {
7476 /*
7477 * fallback for unknown encodings
7478 */
7479 handler = xmlFindCharEncodingHandler((const char *) encoding);
7480 if (handler != NULL) {
7481 xmlSwitchToEncoding(ctxt, handler);
7482 } else {
7483 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7485 ctxt->sax->error(ctxt->userData,
7486 "Unsupported encoding %s\n", encoding);
7487 return(NULL);
7488 }
7489 }
7490 }
7491 }
7492 return(encoding);
7493}
7494
7495/**
7496 * xmlParseSDDecl:
7497 * @ctxt: an XML parser context
7498 *
7499 * parse the XML standalone declaration
7500 *
7501 * [32] SDDecl ::= S 'standalone' Eq
7502 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7503 *
7504 * [ VC: Standalone Document Declaration ]
7505 * TODO The standalone document declaration must have the value "no"
7506 * if any external markup declarations contain declarations of:
7507 * - attributes with default values, if elements to which these
7508 * attributes apply appear in the document without specifications
7509 * of values for these attributes, or
7510 * - entities (other than amp, lt, gt, apos, quot), if references
7511 * to those entities appear in the document, or
7512 * - attributes with values subject to normalization, where the
7513 * attribute appears in the document with a value which will change
7514 * as a result of normalization, or
7515 * - element types with element content, if white space occurs directly
7516 * within any instance of those types.
7517 *
7518 * Returns 1 if standalone, 0 otherwise
7519 */
7520
7521int
7522xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7523 int standalone = -1;
7524
7525 SKIP_BLANKS;
7526 if ((RAW == 's') && (NXT(1) == 't') &&
7527 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7528 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7529 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7530 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7531 SKIP(10);
7532 SKIP_BLANKS;
7533 if (RAW != '=') {
7534 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7536 ctxt->sax->error(ctxt->userData,
7537 "XML standalone declaration : expected '='\n");
7538 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007539 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007540 return(standalone);
7541 }
7542 NEXT;
7543 SKIP_BLANKS;
7544 if (RAW == '\''){
7545 NEXT;
7546 if ((RAW == 'n') && (NXT(1) == 'o')) {
7547 standalone = 0;
7548 SKIP(2);
7549 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7550 (NXT(2) == 's')) {
7551 standalone = 1;
7552 SKIP(3);
7553 } else {
7554 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7556 ctxt->sax->error(ctxt->userData,
7557 "standalone accepts only 'yes' or 'no'\n");
7558 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007559 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007560 }
7561 if (RAW != '\'') {
7562 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7564 ctxt->sax->error(ctxt->userData, "String not closed\n");
7565 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007566 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007567 } else
7568 NEXT;
7569 } else if (RAW == '"'){
7570 NEXT;
7571 if ((RAW == 'n') && (NXT(1) == 'o')) {
7572 standalone = 0;
7573 SKIP(2);
7574 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7575 (NXT(2) == 's')) {
7576 standalone = 1;
7577 SKIP(3);
7578 } else {
7579 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7581 ctxt->sax->error(ctxt->userData,
7582 "standalone accepts only 'yes' or 'no'\n");
7583 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007584 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007585 }
7586 if (RAW != '"') {
7587 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7589 ctxt->sax->error(ctxt->userData, "String not closed\n");
7590 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007591 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007592 } else
7593 NEXT;
7594 } else {
7595 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7596 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7597 ctxt->sax->error(ctxt->userData,
7598 "Standalone value not found\n");
7599 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007600 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007601 }
7602 }
7603 return(standalone);
7604}
7605
7606/**
7607 * xmlParseXMLDecl:
7608 * @ctxt: an XML parser context
7609 *
7610 * parse an XML declaration header
7611 *
7612 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7613 */
7614
7615void
7616xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7617 xmlChar *version;
7618
7619 /*
7620 * We know that '<?xml' is here.
7621 */
7622 SKIP(5);
7623
7624 if (!IS_BLANK(RAW)) {
7625 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7627 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7628 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007629 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007630 }
7631 SKIP_BLANKS;
7632
7633 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007634 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007635 */
7636 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007637 if (version == NULL) {
7638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7639 ctxt->sax->error(ctxt->userData,
7640 "Malformed declaration expecting version\n");
7641 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007642 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007643 } else {
7644 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7645 /*
7646 * TODO: Blueberry should be detected here
7647 */
7648 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7649 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7650 version);
7651 }
7652 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007653 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007654 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007655 }
Owen Taylor3473f882001-02-23 17:55:21 +00007656
7657 /*
7658 * We may have the encoding declaration
7659 */
7660 if (!IS_BLANK(RAW)) {
7661 if ((RAW == '?') && (NXT(1) == '>')) {
7662 SKIP(2);
7663 return;
7664 }
7665 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7667 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7668 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007669 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007670 }
7671 xmlParseEncodingDecl(ctxt);
7672 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7673 /*
7674 * The XML REC instructs us to stop parsing right here
7675 */
7676 return;
7677 }
7678
7679 /*
7680 * We may have the standalone status.
7681 */
7682 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7683 if ((RAW == '?') && (NXT(1) == '>')) {
7684 SKIP(2);
7685 return;
7686 }
7687 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7689 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7690 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007691 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007692 }
7693 SKIP_BLANKS;
7694 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7695
7696 SKIP_BLANKS;
7697 if ((RAW == '?') && (NXT(1) == '>')) {
7698 SKIP(2);
7699 } else if (RAW == '>') {
7700 /* Deprecated old WD ... */
7701 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7703 ctxt->sax->error(ctxt->userData,
7704 "XML declaration must end-up with '?>'\n");
7705 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007706 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007707 NEXT;
7708 } else {
7709 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7711 ctxt->sax->error(ctxt->userData,
7712 "parsing XML declaration: '?>' expected\n");
7713 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007714 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007715 MOVETO_ENDTAG(CUR_PTR);
7716 NEXT;
7717 }
7718}
7719
7720/**
7721 * xmlParseMisc:
7722 * @ctxt: an XML parser context
7723 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007724 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007725 *
7726 * [27] Misc ::= Comment | PI | S
7727 */
7728
7729void
7730xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007731 while (((RAW == '<') && (NXT(1) == '?')) ||
7732 ((RAW == '<') && (NXT(1) == '!') &&
7733 (NXT(2) == '-') && (NXT(3) == '-')) ||
7734 IS_BLANK(CUR)) {
7735 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007736 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007737 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007738 NEXT;
7739 } else
7740 xmlParseComment(ctxt);
7741 }
7742}
7743
7744/**
7745 * xmlParseDocument:
7746 * @ctxt: an XML parser context
7747 *
7748 * parse an XML document (and build a tree if using the standard SAX
7749 * interface).
7750 *
7751 * [1] document ::= prolog element Misc*
7752 *
7753 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7754 *
7755 * Returns 0, -1 in case of error. the parser context is augmented
7756 * as a result of the parsing.
7757 */
7758
7759int
7760xmlParseDocument(xmlParserCtxtPtr ctxt) {
7761 xmlChar start[4];
7762 xmlCharEncoding enc;
7763
7764 xmlInitParser();
7765
7766 GROW;
7767
7768 /*
7769 * SAX: beginning of the document processing.
7770 */
7771 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7772 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7773
Daniel Veillard50f34372001-08-03 12:06:36 +00007774 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007775 /*
7776 * Get the 4 first bytes and decode the charset
7777 * if enc != XML_CHAR_ENCODING_NONE
7778 * plug some encoding conversion routines.
7779 */
7780 start[0] = RAW;
7781 start[1] = NXT(1);
7782 start[2] = NXT(2);
7783 start[3] = NXT(3);
7784 enc = xmlDetectCharEncoding(start, 4);
7785 if (enc != XML_CHAR_ENCODING_NONE) {
7786 xmlSwitchEncoding(ctxt, enc);
7787 }
Owen Taylor3473f882001-02-23 17:55:21 +00007788 }
7789
7790
7791 if (CUR == 0) {
7792 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7794 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7795 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007796 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007797 }
7798
7799 /*
7800 * Check for the XMLDecl in the Prolog.
7801 */
7802 GROW;
7803 if ((RAW == '<') && (NXT(1) == '?') &&
7804 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7805 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7806
7807 /*
7808 * Note that we will switch encoding on the fly.
7809 */
7810 xmlParseXMLDecl(ctxt);
7811 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7812 /*
7813 * The XML REC instructs us to stop parsing right here
7814 */
7815 return(-1);
7816 }
7817 ctxt->standalone = ctxt->input->standalone;
7818 SKIP_BLANKS;
7819 } else {
7820 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7821 }
7822 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7823 ctxt->sax->startDocument(ctxt->userData);
7824
7825 /*
7826 * The Misc part of the Prolog
7827 */
7828 GROW;
7829 xmlParseMisc(ctxt);
7830
7831 /*
7832 * Then possibly doc type declaration(s) and more Misc
7833 * (doctypedecl Misc*)?
7834 */
7835 GROW;
7836 if ((RAW == '<') && (NXT(1) == '!') &&
7837 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7838 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7839 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7840 (NXT(8) == 'E')) {
7841
7842 ctxt->inSubset = 1;
7843 xmlParseDocTypeDecl(ctxt);
7844 if (RAW == '[') {
7845 ctxt->instate = XML_PARSER_DTD;
7846 xmlParseInternalSubset(ctxt);
7847 }
7848
7849 /*
7850 * Create and update the external subset.
7851 */
7852 ctxt->inSubset = 2;
7853 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7854 (!ctxt->disableSAX))
7855 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7856 ctxt->extSubSystem, ctxt->extSubURI);
7857 ctxt->inSubset = 0;
7858
7859
7860 ctxt->instate = XML_PARSER_PROLOG;
7861 xmlParseMisc(ctxt);
7862 }
7863
7864 /*
7865 * Time to start parsing the tree itself
7866 */
7867 GROW;
7868 if (RAW != '<') {
7869 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7871 ctxt->sax->error(ctxt->userData,
7872 "Start tag expected, '<' not found\n");
7873 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007874 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007875 ctxt->instate = XML_PARSER_EOF;
7876 } else {
7877 ctxt->instate = XML_PARSER_CONTENT;
7878 xmlParseElement(ctxt);
7879 ctxt->instate = XML_PARSER_EPILOG;
7880
7881
7882 /*
7883 * The Misc part at the end
7884 */
7885 xmlParseMisc(ctxt);
7886
Daniel Veillard561b7f82002-03-20 21:55:57 +00007887 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007888 ctxt->errNo = XML_ERR_DOCUMENT_END;
7889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7890 ctxt->sax->error(ctxt->userData,
7891 "Extra content at the end of the document\n");
7892 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007893 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007894 }
7895 ctxt->instate = XML_PARSER_EOF;
7896 }
7897
7898 /*
7899 * SAX: end of the document processing.
7900 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007901 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007902 ctxt->sax->endDocument(ctxt->userData);
7903
Daniel Veillard5997aca2002-03-18 18:36:20 +00007904 /*
7905 * Remove locally kept entity definitions if the tree was not built
7906 */
7907 if ((ctxt->myDoc != NULL) &&
7908 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7909 xmlFreeDoc(ctxt->myDoc);
7910 ctxt->myDoc = NULL;
7911 }
7912
Daniel Veillardc7612992002-02-17 22:47:37 +00007913 if (! ctxt->wellFormed) {
7914 ctxt->valid = 0;
7915 return(-1);
7916 }
Owen Taylor3473f882001-02-23 17:55:21 +00007917 return(0);
7918}
7919
7920/**
7921 * xmlParseExtParsedEnt:
7922 * @ctxt: an XML parser context
7923 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007924 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007925 * An external general parsed entity is well-formed if it matches the
7926 * production labeled extParsedEnt.
7927 *
7928 * [78] extParsedEnt ::= TextDecl? content
7929 *
7930 * Returns 0, -1 in case of error. the parser context is augmented
7931 * as a result of the parsing.
7932 */
7933
7934int
7935xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7936 xmlChar start[4];
7937 xmlCharEncoding enc;
7938
7939 xmlDefaultSAXHandlerInit();
7940
7941 GROW;
7942
7943 /*
7944 * SAX: beginning of the document processing.
7945 */
7946 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7947 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7948
7949 /*
7950 * Get the 4 first bytes and decode the charset
7951 * if enc != XML_CHAR_ENCODING_NONE
7952 * plug some encoding conversion routines.
7953 */
7954 start[0] = RAW;
7955 start[1] = NXT(1);
7956 start[2] = NXT(2);
7957 start[3] = NXT(3);
7958 enc = xmlDetectCharEncoding(start, 4);
7959 if (enc != XML_CHAR_ENCODING_NONE) {
7960 xmlSwitchEncoding(ctxt, enc);
7961 }
7962
7963
7964 if (CUR == 0) {
7965 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7967 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7968 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007969 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007970 }
7971
7972 /*
7973 * Check for the XMLDecl in the Prolog.
7974 */
7975 GROW;
7976 if ((RAW == '<') && (NXT(1) == '?') &&
7977 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7978 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7979
7980 /*
7981 * Note that we will switch encoding on the fly.
7982 */
7983 xmlParseXMLDecl(ctxt);
7984 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7985 /*
7986 * The XML REC instructs us to stop parsing right here
7987 */
7988 return(-1);
7989 }
7990 SKIP_BLANKS;
7991 } else {
7992 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7993 }
7994 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7995 ctxt->sax->startDocument(ctxt->userData);
7996
7997 /*
7998 * Doing validity checking on chunk doesn't make sense
7999 */
8000 ctxt->instate = XML_PARSER_CONTENT;
8001 ctxt->validate = 0;
8002 ctxt->loadsubset = 0;
8003 ctxt->depth = 0;
8004
8005 xmlParseContent(ctxt);
8006
8007 if ((RAW == '<') && (NXT(1) == '/')) {
8008 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8010 ctxt->sax->error(ctxt->userData,
8011 "chunk is not well balanced\n");
8012 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008013 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008014 } else if (RAW != 0) {
8015 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8017 ctxt->sax->error(ctxt->userData,
8018 "extra content at the end of well balanced chunk\n");
8019 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008020 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008021 }
8022
8023 /*
8024 * SAX: end of the document processing.
8025 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008026 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008027 ctxt->sax->endDocument(ctxt->userData);
8028
8029 if (! ctxt->wellFormed) return(-1);
8030 return(0);
8031}
8032
8033/************************************************************************
8034 * *
8035 * Progressive parsing interfaces *
8036 * *
8037 ************************************************************************/
8038
8039/**
8040 * xmlParseLookupSequence:
8041 * @ctxt: an XML parser context
8042 * @first: the first char to lookup
8043 * @next: the next char to lookup or zero
8044 * @third: the next char to lookup or zero
8045 *
8046 * Try to find if a sequence (first, next, third) or just (first next) or
8047 * (first) is available in the input stream.
8048 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8049 * to avoid rescanning sequences of bytes, it DOES change the state of the
8050 * parser, do not use liberally.
8051 *
8052 * Returns the index to the current parsing point if the full sequence
8053 * is available, -1 otherwise.
8054 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008055static int
Owen Taylor3473f882001-02-23 17:55:21 +00008056xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8057 xmlChar next, xmlChar third) {
8058 int base, len;
8059 xmlParserInputPtr in;
8060 const xmlChar *buf;
8061
8062 in = ctxt->input;
8063 if (in == NULL) return(-1);
8064 base = in->cur - in->base;
8065 if (base < 0) return(-1);
8066 if (ctxt->checkIndex > base)
8067 base = ctxt->checkIndex;
8068 if (in->buf == NULL) {
8069 buf = in->base;
8070 len = in->length;
8071 } else {
8072 buf = in->buf->buffer->content;
8073 len = in->buf->buffer->use;
8074 }
8075 /* take into account the sequence length */
8076 if (third) len -= 2;
8077 else if (next) len --;
8078 for (;base < len;base++) {
8079 if (buf[base] == first) {
8080 if (third != 0) {
8081 if ((buf[base + 1] != next) ||
8082 (buf[base + 2] != third)) continue;
8083 } else if (next != 0) {
8084 if (buf[base + 1] != next) continue;
8085 }
8086 ctxt->checkIndex = 0;
8087#ifdef DEBUG_PUSH
8088 if (next == 0)
8089 xmlGenericError(xmlGenericErrorContext,
8090 "PP: lookup '%c' found at %d\n",
8091 first, base);
8092 else if (third == 0)
8093 xmlGenericError(xmlGenericErrorContext,
8094 "PP: lookup '%c%c' found at %d\n",
8095 first, next, base);
8096 else
8097 xmlGenericError(xmlGenericErrorContext,
8098 "PP: lookup '%c%c%c' found at %d\n",
8099 first, next, third, base);
8100#endif
8101 return(base - (in->cur - in->base));
8102 }
8103 }
8104 ctxt->checkIndex = base;
8105#ifdef DEBUG_PUSH
8106 if (next == 0)
8107 xmlGenericError(xmlGenericErrorContext,
8108 "PP: lookup '%c' failed\n", first);
8109 else if (third == 0)
8110 xmlGenericError(xmlGenericErrorContext,
8111 "PP: lookup '%c%c' failed\n", first, next);
8112 else
8113 xmlGenericError(xmlGenericErrorContext,
8114 "PP: lookup '%c%c%c' failed\n", first, next, third);
8115#endif
8116 return(-1);
8117}
8118
8119/**
8120 * xmlParseTryOrFinish:
8121 * @ctxt: an XML parser context
8122 * @terminate: last chunk indicator
8123 *
8124 * Try to progress on parsing
8125 *
8126 * Returns zero if no parsing was possible
8127 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008128static int
Owen Taylor3473f882001-02-23 17:55:21 +00008129xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8130 int ret = 0;
8131 int avail;
8132 xmlChar cur, next;
8133
8134#ifdef DEBUG_PUSH
8135 switch (ctxt->instate) {
8136 case XML_PARSER_EOF:
8137 xmlGenericError(xmlGenericErrorContext,
8138 "PP: try EOF\n"); break;
8139 case XML_PARSER_START:
8140 xmlGenericError(xmlGenericErrorContext,
8141 "PP: try START\n"); break;
8142 case XML_PARSER_MISC:
8143 xmlGenericError(xmlGenericErrorContext,
8144 "PP: try MISC\n");break;
8145 case XML_PARSER_COMMENT:
8146 xmlGenericError(xmlGenericErrorContext,
8147 "PP: try COMMENT\n");break;
8148 case XML_PARSER_PROLOG:
8149 xmlGenericError(xmlGenericErrorContext,
8150 "PP: try PROLOG\n");break;
8151 case XML_PARSER_START_TAG:
8152 xmlGenericError(xmlGenericErrorContext,
8153 "PP: try START_TAG\n");break;
8154 case XML_PARSER_CONTENT:
8155 xmlGenericError(xmlGenericErrorContext,
8156 "PP: try CONTENT\n");break;
8157 case XML_PARSER_CDATA_SECTION:
8158 xmlGenericError(xmlGenericErrorContext,
8159 "PP: try CDATA_SECTION\n");break;
8160 case XML_PARSER_END_TAG:
8161 xmlGenericError(xmlGenericErrorContext,
8162 "PP: try END_TAG\n");break;
8163 case XML_PARSER_ENTITY_DECL:
8164 xmlGenericError(xmlGenericErrorContext,
8165 "PP: try ENTITY_DECL\n");break;
8166 case XML_PARSER_ENTITY_VALUE:
8167 xmlGenericError(xmlGenericErrorContext,
8168 "PP: try ENTITY_VALUE\n");break;
8169 case XML_PARSER_ATTRIBUTE_VALUE:
8170 xmlGenericError(xmlGenericErrorContext,
8171 "PP: try ATTRIBUTE_VALUE\n");break;
8172 case XML_PARSER_DTD:
8173 xmlGenericError(xmlGenericErrorContext,
8174 "PP: try DTD\n");break;
8175 case XML_PARSER_EPILOG:
8176 xmlGenericError(xmlGenericErrorContext,
8177 "PP: try EPILOG\n");break;
8178 case XML_PARSER_PI:
8179 xmlGenericError(xmlGenericErrorContext,
8180 "PP: try PI\n");break;
8181 case XML_PARSER_IGNORE:
8182 xmlGenericError(xmlGenericErrorContext,
8183 "PP: try IGNORE\n");break;
8184 }
8185#endif
8186
8187 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008188 SHRINK;
8189
Owen Taylor3473f882001-02-23 17:55:21 +00008190 /*
8191 * Pop-up of finished entities.
8192 */
8193 while ((RAW == 0) && (ctxt->inputNr > 1))
8194 xmlPopInput(ctxt);
8195
8196 if (ctxt->input ==NULL) break;
8197 if (ctxt->input->buf == NULL)
8198 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008199 else {
8200 /*
8201 * If we are operating on converted input, try to flush
8202 * remainng chars to avoid them stalling in the non-converted
8203 * buffer.
8204 */
8205 if ((ctxt->input->buf->raw != NULL) &&
8206 (ctxt->input->buf->raw->use > 0)) {
8207 int base = ctxt->input->base -
8208 ctxt->input->buf->buffer->content;
8209 int current = ctxt->input->cur - ctxt->input->base;
8210
8211 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8212 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8213 ctxt->input->cur = ctxt->input->base + current;
8214 ctxt->input->end =
8215 &ctxt->input->buf->buffer->content[
8216 ctxt->input->buf->buffer->use];
8217 }
8218 avail = ctxt->input->buf->buffer->use -
8219 (ctxt->input->cur - ctxt->input->base);
8220 }
Owen Taylor3473f882001-02-23 17:55:21 +00008221 if (avail < 1)
8222 goto done;
8223 switch (ctxt->instate) {
8224 case XML_PARSER_EOF:
8225 /*
8226 * Document parsing is done !
8227 */
8228 goto done;
8229 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008230 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8231 xmlChar start[4];
8232 xmlCharEncoding enc;
8233
8234 /*
8235 * Very first chars read from the document flow.
8236 */
8237 if (avail < 4)
8238 goto done;
8239
8240 /*
8241 * Get the 4 first bytes and decode the charset
8242 * if enc != XML_CHAR_ENCODING_NONE
8243 * plug some encoding conversion routines.
8244 */
8245 start[0] = RAW;
8246 start[1] = NXT(1);
8247 start[2] = NXT(2);
8248 start[3] = NXT(3);
8249 enc = xmlDetectCharEncoding(start, 4);
8250 if (enc != XML_CHAR_ENCODING_NONE) {
8251 xmlSwitchEncoding(ctxt, enc);
8252 }
8253 break;
8254 }
Owen Taylor3473f882001-02-23 17:55:21 +00008255
8256 cur = ctxt->input->cur[0];
8257 next = ctxt->input->cur[1];
8258 if (cur == 0) {
8259 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8260 ctxt->sax->setDocumentLocator(ctxt->userData,
8261 &xmlDefaultSAXLocator);
8262 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8263 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8264 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8265 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008266 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008267 ctxt->instate = XML_PARSER_EOF;
8268#ifdef DEBUG_PUSH
8269 xmlGenericError(xmlGenericErrorContext,
8270 "PP: entering EOF\n");
8271#endif
8272 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8273 ctxt->sax->endDocument(ctxt->userData);
8274 goto done;
8275 }
8276 if ((cur == '<') && (next == '?')) {
8277 /* PI or XML decl */
8278 if (avail < 5) return(ret);
8279 if ((!terminate) &&
8280 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8281 return(ret);
8282 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8283 ctxt->sax->setDocumentLocator(ctxt->userData,
8284 &xmlDefaultSAXLocator);
8285 if ((ctxt->input->cur[2] == 'x') &&
8286 (ctxt->input->cur[3] == 'm') &&
8287 (ctxt->input->cur[4] == 'l') &&
8288 (IS_BLANK(ctxt->input->cur[5]))) {
8289 ret += 5;
8290#ifdef DEBUG_PUSH
8291 xmlGenericError(xmlGenericErrorContext,
8292 "PP: Parsing XML Decl\n");
8293#endif
8294 xmlParseXMLDecl(ctxt);
8295 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8296 /*
8297 * The XML REC instructs us to stop parsing right
8298 * here
8299 */
8300 ctxt->instate = XML_PARSER_EOF;
8301 return(0);
8302 }
8303 ctxt->standalone = ctxt->input->standalone;
8304 if ((ctxt->encoding == NULL) &&
8305 (ctxt->input->encoding != NULL))
8306 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8307 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8308 (!ctxt->disableSAX))
8309 ctxt->sax->startDocument(ctxt->userData);
8310 ctxt->instate = XML_PARSER_MISC;
8311#ifdef DEBUG_PUSH
8312 xmlGenericError(xmlGenericErrorContext,
8313 "PP: entering MISC\n");
8314#endif
8315 } else {
8316 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8317 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8318 (!ctxt->disableSAX))
8319 ctxt->sax->startDocument(ctxt->userData);
8320 ctxt->instate = XML_PARSER_MISC;
8321#ifdef DEBUG_PUSH
8322 xmlGenericError(xmlGenericErrorContext,
8323 "PP: entering MISC\n");
8324#endif
8325 }
8326 } else {
8327 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8328 ctxt->sax->setDocumentLocator(ctxt->userData,
8329 &xmlDefaultSAXLocator);
8330 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8331 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8332 (!ctxt->disableSAX))
8333 ctxt->sax->startDocument(ctxt->userData);
8334 ctxt->instate = XML_PARSER_MISC;
8335#ifdef DEBUG_PUSH
8336 xmlGenericError(xmlGenericErrorContext,
8337 "PP: entering MISC\n");
8338#endif
8339 }
8340 break;
8341 case XML_PARSER_MISC:
8342 SKIP_BLANKS;
8343 if (ctxt->input->buf == NULL)
8344 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8345 else
8346 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8347 if (avail < 2)
8348 goto done;
8349 cur = ctxt->input->cur[0];
8350 next = ctxt->input->cur[1];
8351 if ((cur == '<') && (next == '?')) {
8352 if ((!terminate) &&
8353 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8354 goto done;
8355#ifdef DEBUG_PUSH
8356 xmlGenericError(xmlGenericErrorContext,
8357 "PP: Parsing PI\n");
8358#endif
8359 xmlParsePI(ctxt);
8360 } else if ((cur == '<') && (next == '!') &&
8361 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8362 if ((!terminate) &&
8363 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8364 goto done;
8365#ifdef DEBUG_PUSH
8366 xmlGenericError(xmlGenericErrorContext,
8367 "PP: Parsing Comment\n");
8368#endif
8369 xmlParseComment(ctxt);
8370 ctxt->instate = XML_PARSER_MISC;
8371 } else if ((cur == '<') && (next == '!') &&
8372 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8373 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8374 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8375 (ctxt->input->cur[8] == 'E')) {
8376 if ((!terminate) &&
8377 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8378 goto done;
8379#ifdef DEBUG_PUSH
8380 xmlGenericError(xmlGenericErrorContext,
8381 "PP: Parsing internal subset\n");
8382#endif
8383 ctxt->inSubset = 1;
8384 xmlParseDocTypeDecl(ctxt);
8385 if (RAW == '[') {
8386 ctxt->instate = XML_PARSER_DTD;
8387#ifdef DEBUG_PUSH
8388 xmlGenericError(xmlGenericErrorContext,
8389 "PP: entering DTD\n");
8390#endif
8391 } else {
8392 /*
8393 * Create and update the external subset.
8394 */
8395 ctxt->inSubset = 2;
8396 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8397 (ctxt->sax->externalSubset != NULL))
8398 ctxt->sax->externalSubset(ctxt->userData,
8399 ctxt->intSubName, ctxt->extSubSystem,
8400 ctxt->extSubURI);
8401 ctxt->inSubset = 0;
8402 ctxt->instate = XML_PARSER_PROLOG;
8403#ifdef DEBUG_PUSH
8404 xmlGenericError(xmlGenericErrorContext,
8405 "PP: entering PROLOG\n");
8406#endif
8407 }
8408 } else if ((cur == '<') && (next == '!') &&
8409 (avail < 9)) {
8410 goto done;
8411 } else {
8412 ctxt->instate = XML_PARSER_START_TAG;
8413#ifdef DEBUG_PUSH
8414 xmlGenericError(xmlGenericErrorContext,
8415 "PP: entering START_TAG\n");
8416#endif
8417 }
8418 break;
8419 case XML_PARSER_IGNORE:
8420 xmlGenericError(xmlGenericErrorContext,
8421 "PP: internal error, state == IGNORE");
8422 ctxt->instate = XML_PARSER_DTD;
8423#ifdef DEBUG_PUSH
8424 xmlGenericError(xmlGenericErrorContext,
8425 "PP: entering DTD\n");
8426#endif
8427 break;
8428 case XML_PARSER_PROLOG:
8429 SKIP_BLANKS;
8430 if (ctxt->input->buf == NULL)
8431 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8432 else
8433 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8434 if (avail < 2)
8435 goto done;
8436 cur = ctxt->input->cur[0];
8437 next = ctxt->input->cur[1];
8438 if ((cur == '<') && (next == '?')) {
8439 if ((!terminate) &&
8440 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8441 goto done;
8442#ifdef DEBUG_PUSH
8443 xmlGenericError(xmlGenericErrorContext,
8444 "PP: Parsing PI\n");
8445#endif
8446 xmlParsePI(ctxt);
8447 } else if ((cur == '<') && (next == '!') &&
8448 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8449 if ((!terminate) &&
8450 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8451 goto done;
8452#ifdef DEBUG_PUSH
8453 xmlGenericError(xmlGenericErrorContext,
8454 "PP: Parsing Comment\n");
8455#endif
8456 xmlParseComment(ctxt);
8457 ctxt->instate = XML_PARSER_PROLOG;
8458 } else if ((cur == '<') && (next == '!') &&
8459 (avail < 4)) {
8460 goto done;
8461 } else {
8462 ctxt->instate = XML_PARSER_START_TAG;
8463#ifdef DEBUG_PUSH
8464 xmlGenericError(xmlGenericErrorContext,
8465 "PP: entering START_TAG\n");
8466#endif
8467 }
8468 break;
8469 case XML_PARSER_EPILOG:
8470 SKIP_BLANKS;
8471 if (ctxt->input->buf == NULL)
8472 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8473 else
8474 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8475 if (avail < 2)
8476 goto done;
8477 cur = ctxt->input->cur[0];
8478 next = ctxt->input->cur[1];
8479 if ((cur == '<') && (next == '?')) {
8480 if ((!terminate) &&
8481 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8482 goto done;
8483#ifdef DEBUG_PUSH
8484 xmlGenericError(xmlGenericErrorContext,
8485 "PP: Parsing PI\n");
8486#endif
8487 xmlParsePI(ctxt);
8488 ctxt->instate = XML_PARSER_EPILOG;
8489 } else if ((cur == '<') && (next == '!') &&
8490 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8491 if ((!terminate) &&
8492 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8493 goto done;
8494#ifdef DEBUG_PUSH
8495 xmlGenericError(xmlGenericErrorContext,
8496 "PP: Parsing Comment\n");
8497#endif
8498 xmlParseComment(ctxt);
8499 ctxt->instate = XML_PARSER_EPILOG;
8500 } else if ((cur == '<') && (next == '!') &&
8501 (avail < 4)) {
8502 goto done;
8503 } else {
8504 ctxt->errNo = XML_ERR_DOCUMENT_END;
8505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8506 ctxt->sax->error(ctxt->userData,
8507 "Extra content at the end of the document\n");
8508 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008509 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008510 ctxt->instate = XML_PARSER_EOF;
8511#ifdef DEBUG_PUSH
8512 xmlGenericError(xmlGenericErrorContext,
8513 "PP: entering EOF\n");
8514#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008515 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008516 ctxt->sax->endDocument(ctxt->userData);
8517 goto done;
8518 }
8519 break;
8520 case XML_PARSER_START_TAG: {
8521 xmlChar *name, *oldname;
8522
8523 if ((avail < 2) && (ctxt->inputNr == 1))
8524 goto done;
8525 cur = ctxt->input->cur[0];
8526 if (cur != '<') {
8527 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8528 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8529 ctxt->sax->error(ctxt->userData,
8530 "Start tag expect, '<' not found\n");
8531 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008532 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008533 ctxt->instate = XML_PARSER_EOF;
8534#ifdef DEBUG_PUSH
8535 xmlGenericError(xmlGenericErrorContext,
8536 "PP: entering EOF\n");
8537#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008538 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008539 ctxt->sax->endDocument(ctxt->userData);
8540 goto done;
8541 }
8542 if ((!terminate) &&
8543 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8544 goto done;
8545 if (ctxt->spaceNr == 0)
8546 spacePush(ctxt, -1);
8547 else
8548 spacePush(ctxt, *ctxt->space);
8549 name = xmlParseStartTag(ctxt);
8550 if (name == NULL) {
8551 spacePop(ctxt);
8552 ctxt->instate = XML_PARSER_EOF;
8553#ifdef DEBUG_PUSH
8554 xmlGenericError(xmlGenericErrorContext,
8555 "PP: entering EOF\n");
8556#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008557 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008558 ctxt->sax->endDocument(ctxt->userData);
8559 goto done;
8560 }
8561 namePush(ctxt, xmlStrdup(name));
8562
8563 /*
8564 * [ VC: Root Element Type ]
8565 * The Name in the document type declaration must match
8566 * the element type of the root element.
8567 */
8568 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8569 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8570 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8571
8572 /*
8573 * Check for an Empty Element.
8574 */
8575 if ((RAW == '/') && (NXT(1) == '>')) {
8576 SKIP(2);
8577 if ((ctxt->sax != NULL) &&
8578 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8579 ctxt->sax->endElement(ctxt->userData, name);
8580 xmlFree(name);
8581 oldname = namePop(ctxt);
8582 spacePop(ctxt);
8583 if (oldname != NULL) {
8584#ifdef DEBUG_STACK
8585 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8586#endif
8587 xmlFree(oldname);
8588 }
8589 if (ctxt->name == NULL) {
8590 ctxt->instate = XML_PARSER_EPILOG;
8591#ifdef DEBUG_PUSH
8592 xmlGenericError(xmlGenericErrorContext,
8593 "PP: entering EPILOG\n");
8594#endif
8595 } else {
8596 ctxt->instate = XML_PARSER_CONTENT;
8597#ifdef DEBUG_PUSH
8598 xmlGenericError(xmlGenericErrorContext,
8599 "PP: entering CONTENT\n");
8600#endif
8601 }
8602 break;
8603 }
8604 if (RAW == '>') {
8605 NEXT;
8606 } else {
8607 ctxt->errNo = XML_ERR_GT_REQUIRED;
8608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8609 ctxt->sax->error(ctxt->userData,
8610 "Couldn't find end of Start Tag %s\n",
8611 name);
8612 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008613 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008614
8615 /*
8616 * end of parsing of this node.
8617 */
8618 nodePop(ctxt);
8619 oldname = namePop(ctxt);
8620 spacePop(ctxt);
8621 if (oldname != NULL) {
8622#ifdef DEBUG_STACK
8623 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8624#endif
8625 xmlFree(oldname);
8626 }
8627 }
8628 xmlFree(name);
8629 ctxt->instate = XML_PARSER_CONTENT;
8630#ifdef DEBUG_PUSH
8631 xmlGenericError(xmlGenericErrorContext,
8632 "PP: entering CONTENT\n");
8633#endif
8634 break;
8635 }
8636 case XML_PARSER_CONTENT: {
8637 const xmlChar *test;
8638 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008639 if ((avail < 2) && (ctxt->inputNr == 1))
8640 goto done;
8641 cur = ctxt->input->cur[0];
8642 next = ctxt->input->cur[1];
8643
8644 test = CUR_PTR;
8645 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008646 if ((cur == '<') && (next == '?')) {
8647 if ((!terminate) &&
8648 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8649 goto done;
8650#ifdef DEBUG_PUSH
8651 xmlGenericError(xmlGenericErrorContext,
8652 "PP: Parsing PI\n");
8653#endif
8654 xmlParsePI(ctxt);
8655 } else if ((cur == '<') && (next == '!') &&
8656 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8657 if ((!terminate) &&
8658 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8659 goto done;
8660#ifdef DEBUG_PUSH
8661 xmlGenericError(xmlGenericErrorContext,
8662 "PP: Parsing Comment\n");
8663#endif
8664 xmlParseComment(ctxt);
8665 ctxt->instate = XML_PARSER_CONTENT;
8666 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8667 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8668 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8669 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8670 (ctxt->input->cur[8] == '[')) {
8671 SKIP(9);
8672 ctxt->instate = XML_PARSER_CDATA_SECTION;
8673#ifdef DEBUG_PUSH
8674 xmlGenericError(xmlGenericErrorContext,
8675 "PP: entering CDATA_SECTION\n");
8676#endif
8677 break;
8678 } else if ((cur == '<') && (next == '!') &&
8679 (avail < 9)) {
8680 goto done;
8681 } else if ((cur == '<') && (next == '/')) {
8682 ctxt->instate = XML_PARSER_END_TAG;
8683#ifdef DEBUG_PUSH
8684 xmlGenericError(xmlGenericErrorContext,
8685 "PP: entering END_TAG\n");
8686#endif
8687 break;
8688 } else if (cur == '<') {
8689 ctxt->instate = XML_PARSER_START_TAG;
8690#ifdef DEBUG_PUSH
8691 xmlGenericError(xmlGenericErrorContext,
8692 "PP: entering START_TAG\n");
8693#endif
8694 break;
8695 } else if (cur == '&') {
8696 if ((!terminate) &&
8697 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8698 goto done;
8699#ifdef DEBUG_PUSH
8700 xmlGenericError(xmlGenericErrorContext,
8701 "PP: Parsing Reference\n");
8702#endif
8703 xmlParseReference(ctxt);
8704 } else {
8705 /* TODO Avoid the extra copy, handle directly !!! */
8706 /*
8707 * Goal of the following test is:
8708 * - minimize calls to the SAX 'character' callback
8709 * when they are mergeable
8710 * - handle an problem for isBlank when we only parse
8711 * a sequence of blank chars and the next one is
8712 * not available to check against '<' presence.
8713 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008714 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008715 * of the parser.
8716 */
8717 if ((ctxt->inputNr == 1) &&
8718 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8719 if ((!terminate) &&
8720 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8721 goto done;
8722 }
8723 ctxt->checkIndex = 0;
8724#ifdef DEBUG_PUSH
8725 xmlGenericError(xmlGenericErrorContext,
8726 "PP: Parsing char data\n");
8727#endif
8728 xmlParseCharData(ctxt, 0);
8729 }
8730 /*
8731 * Pop-up of finished entities.
8732 */
8733 while ((RAW == 0) && (ctxt->inputNr > 1))
8734 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008735 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008736 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8738 ctxt->sax->error(ctxt->userData,
8739 "detected an error in element content\n");
8740 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008741 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008742 ctxt->instate = XML_PARSER_EOF;
8743 break;
8744 }
8745 break;
8746 }
8747 case XML_PARSER_CDATA_SECTION: {
8748 /*
8749 * The Push mode need to have the SAX callback for
8750 * cdataBlock merge back contiguous callbacks.
8751 */
8752 int base;
8753
8754 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8755 if (base < 0) {
8756 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8757 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8758 if (ctxt->sax->cdataBlock != NULL)
8759 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8760 XML_PARSER_BIG_BUFFER_SIZE);
8761 }
8762 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8763 ctxt->checkIndex = 0;
8764 }
8765 goto done;
8766 } else {
8767 if ((ctxt->sax != NULL) && (base > 0) &&
8768 (!ctxt->disableSAX)) {
8769 if (ctxt->sax->cdataBlock != NULL)
8770 ctxt->sax->cdataBlock(ctxt->userData,
8771 ctxt->input->cur, base);
8772 }
8773 SKIP(base + 3);
8774 ctxt->checkIndex = 0;
8775 ctxt->instate = XML_PARSER_CONTENT;
8776#ifdef DEBUG_PUSH
8777 xmlGenericError(xmlGenericErrorContext,
8778 "PP: entering CONTENT\n");
8779#endif
8780 }
8781 break;
8782 }
8783 case XML_PARSER_END_TAG:
8784 if (avail < 2)
8785 goto done;
8786 if ((!terminate) &&
8787 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8788 goto done;
8789 xmlParseEndTag(ctxt);
8790 if (ctxt->name == NULL) {
8791 ctxt->instate = XML_PARSER_EPILOG;
8792#ifdef DEBUG_PUSH
8793 xmlGenericError(xmlGenericErrorContext,
8794 "PP: entering EPILOG\n");
8795#endif
8796 } else {
8797 ctxt->instate = XML_PARSER_CONTENT;
8798#ifdef DEBUG_PUSH
8799 xmlGenericError(xmlGenericErrorContext,
8800 "PP: entering CONTENT\n");
8801#endif
8802 }
8803 break;
8804 case XML_PARSER_DTD: {
8805 /*
8806 * Sorry but progressive parsing of the internal subset
8807 * is not expected to be supported. We first check that
8808 * the full content of the internal subset is available and
8809 * the parsing is launched only at that point.
8810 * Internal subset ends up with "']' S? '>'" in an unescaped
8811 * section and not in a ']]>' sequence which are conditional
8812 * sections (whoever argued to keep that crap in XML deserve
8813 * a place in hell !).
8814 */
8815 int base, i;
8816 xmlChar *buf;
8817 xmlChar quote = 0;
8818
8819 base = ctxt->input->cur - ctxt->input->base;
8820 if (base < 0) return(0);
8821 if (ctxt->checkIndex > base)
8822 base = ctxt->checkIndex;
8823 buf = ctxt->input->buf->buffer->content;
8824 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8825 base++) {
8826 if (quote != 0) {
8827 if (buf[base] == quote)
8828 quote = 0;
8829 continue;
8830 }
8831 if (buf[base] == '"') {
8832 quote = '"';
8833 continue;
8834 }
8835 if (buf[base] == '\'') {
8836 quote = '\'';
8837 continue;
8838 }
8839 if (buf[base] == ']') {
8840 if ((unsigned int) base +1 >=
8841 ctxt->input->buf->buffer->use)
8842 break;
8843 if (buf[base + 1] == ']') {
8844 /* conditional crap, skip both ']' ! */
8845 base++;
8846 continue;
8847 }
8848 for (i = 0;
8849 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8850 i++) {
8851 if (buf[base + i] == '>')
8852 goto found_end_int_subset;
8853 }
8854 break;
8855 }
8856 }
8857 /*
8858 * We didn't found the end of the Internal subset
8859 */
8860 if (quote == 0)
8861 ctxt->checkIndex = base;
8862#ifdef DEBUG_PUSH
8863 if (next == 0)
8864 xmlGenericError(xmlGenericErrorContext,
8865 "PP: lookup of int subset end filed\n");
8866#endif
8867 goto done;
8868
8869found_end_int_subset:
8870 xmlParseInternalSubset(ctxt);
8871 ctxt->inSubset = 2;
8872 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8873 (ctxt->sax->externalSubset != NULL))
8874 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8875 ctxt->extSubSystem, ctxt->extSubURI);
8876 ctxt->inSubset = 0;
8877 ctxt->instate = XML_PARSER_PROLOG;
8878 ctxt->checkIndex = 0;
8879#ifdef DEBUG_PUSH
8880 xmlGenericError(xmlGenericErrorContext,
8881 "PP: entering PROLOG\n");
8882#endif
8883 break;
8884 }
8885 case XML_PARSER_COMMENT:
8886 xmlGenericError(xmlGenericErrorContext,
8887 "PP: internal error, state == COMMENT\n");
8888 ctxt->instate = XML_PARSER_CONTENT;
8889#ifdef DEBUG_PUSH
8890 xmlGenericError(xmlGenericErrorContext,
8891 "PP: entering CONTENT\n");
8892#endif
8893 break;
8894 case XML_PARSER_PI:
8895 xmlGenericError(xmlGenericErrorContext,
8896 "PP: internal error, state == PI\n");
8897 ctxt->instate = XML_PARSER_CONTENT;
8898#ifdef DEBUG_PUSH
8899 xmlGenericError(xmlGenericErrorContext,
8900 "PP: entering CONTENT\n");
8901#endif
8902 break;
8903 case XML_PARSER_ENTITY_DECL:
8904 xmlGenericError(xmlGenericErrorContext,
8905 "PP: internal error, state == ENTITY_DECL\n");
8906 ctxt->instate = XML_PARSER_DTD;
8907#ifdef DEBUG_PUSH
8908 xmlGenericError(xmlGenericErrorContext,
8909 "PP: entering DTD\n");
8910#endif
8911 break;
8912 case XML_PARSER_ENTITY_VALUE:
8913 xmlGenericError(xmlGenericErrorContext,
8914 "PP: internal error, state == ENTITY_VALUE\n");
8915 ctxt->instate = XML_PARSER_CONTENT;
8916#ifdef DEBUG_PUSH
8917 xmlGenericError(xmlGenericErrorContext,
8918 "PP: entering DTD\n");
8919#endif
8920 break;
8921 case XML_PARSER_ATTRIBUTE_VALUE:
8922 xmlGenericError(xmlGenericErrorContext,
8923 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8924 ctxt->instate = XML_PARSER_START_TAG;
8925#ifdef DEBUG_PUSH
8926 xmlGenericError(xmlGenericErrorContext,
8927 "PP: entering START_TAG\n");
8928#endif
8929 break;
8930 case XML_PARSER_SYSTEM_LITERAL:
8931 xmlGenericError(xmlGenericErrorContext,
8932 "PP: internal error, state == SYSTEM_LITERAL\n");
8933 ctxt->instate = XML_PARSER_START_TAG;
8934#ifdef DEBUG_PUSH
8935 xmlGenericError(xmlGenericErrorContext,
8936 "PP: entering START_TAG\n");
8937#endif
8938 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008939 case XML_PARSER_PUBLIC_LITERAL:
8940 xmlGenericError(xmlGenericErrorContext,
8941 "PP: internal error, state == PUBLIC_LITERAL\n");
8942 ctxt->instate = XML_PARSER_START_TAG;
8943#ifdef DEBUG_PUSH
8944 xmlGenericError(xmlGenericErrorContext,
8945 "PP: entering START_TAG\n");
8946#endif
8947 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008948 }
8949 }
8950done:
8951#ifdef DEBUG_PUSH
8952 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8953#endif
8954 return(ret);
8955}
8956
8957/**
Owen Taylor3473f882001-02-23 17:55:21 +00008958 * xmlParseChunk:
8959 * @ctxt: an XML parser context
8960 * @chunk: an char array
8961 * @size: the size in byte of the chunk
8962 * @terminate: last chunk indicator
8963 *
8964 * Parse a Chunk of memory
8965 *
8966 * Returns zero if no error, the xmlParserErrors otherwise.
8967 */
8968int
8969xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8970 int terminate) {
8971 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8972 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8973 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8974 int cur = ctxt->input->cur - ctxt->input->base;
8975
8976 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8977 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8978 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008979 ctxt->input->end =
8980 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008981#ifdef DEBUG_PUSH
8982 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8983#endif
8984
8985 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8986 xmlParseTryOrFinish(ctxt, terminate);
8987 } else if (ctxt->instate != XML_PARSER_EOF) {
8988 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8989 xmlParserInputBufferPtr in = ctxt->input->buf;
8990 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8991 (in->raw != NULL)) {
8992 int nbchars;
8993
8994 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8995 if (nbchars < 0) {
8996 xmlGenericError(xmlGenericErrorContext,
8997 "xmlParseChunk: encoder error\n");
8998 return(XML_ERR_INVALID_ENCODING);
8999 }
9000 }
9001 }
9002 }
9003 xmlParseTryOrFinish(ctxt, terminate);
9004 if (terminate) {
9005 /*
9006 * Check for termination
9007 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009008 int avail = 0;
9009 if (ctxt->input->buf == NULL)
9010 avail = ctxt->input->length -
9011 (ctxt->input->cur - ctxt->input->base);
9012 else
9013 avail = ctxt->input->buf->buffer->use -
9014 (ctxt->input->cur - ctxt->input->base);
9015
Owen Taylor3473f882001-02-23 17:55:21 +00009016 if ((ctxt->instate != XML_PARSER_EOF) &&
9017 (ctxt->instate != XML_PARSER_EPILOG)) {
9018 ctxt->errNo = XML_ERR_DOCUMENT_END;
9019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9020 ctxt->sax->error(ctxt->userData,
9021 "Extra content at the end of the document\n");
9022 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009023 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009024 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009025 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9026 ctxt->errNo = XML_ERR_DOCUMENT_END;
9027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9028 ctxt->sax->error(ctxt->userData,
9029 "Extra content at the end of the document\n");
9030 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009031 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009032
9033 }
Owen Taylor3473f882001-02-23 17:55:21 +00009034 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009035 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009036 ctxt->sax->endDocument(ctxt->userData);
9037 }
9038 ctxt->instate = XML_PARSER_EOF;
9039 }
9040 return((xmlParserErrors) ctxt->errNo);
9041}
9042
9043/************************************************************************
9044 * *
9045 * I/O front end functions to the parser *
9046 * *
9047 ************************************************************************/
9048
9049/**
9050 * xmlStopParser:
9051 * @ctxt: an XML parser context
9052 *
9053 * Blocks further parser processing
9054 */
9055void
9056xmlStopParser(xmlParserCtxtPtr ctxt) {
9057 ctxt->instate = XML_PARSER_EOF;
9058 if (ctxt->input != NULL)
9059 ctxt->input->cur = BAD_CAST"";
9060}
9061
9062/**
9063 * xmlCreatePushParserCtxt:
9064 * @sax: a SAX handler
9065 * @user_data: The user data returned on SAX callbacks
9066 * @chunk: a pointer to an array of chars
9067 * @size: number of chars in the array
9068 * @filename: an optional file name or URI
9069 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009070 * Create a parser context for using the XML parser in push mode.
9071 * If @buffer and @size are non-NULL, the data is used to detect
9072 * the encoding. The remaining characters will be parsed so they
9073 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009074 * To allow content encoding detection, @size should be >= 4
9075 * The value of @filename is used for fetching external entities
9076 * and error/warning reports.
9077 *
9078 * Returns the new parser context or NULL
9079 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009080
Owen Taylor3473f882001-02-23 17:55:21 +00009081xmlParserCtxtPtr
9082xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9083 const char *chunk, int size, const char *filename) {
9084 xmlParserCtxtPtr ctxt;
9085 xmlParserInputPtr inputStream;
9086 xmlParserInputBufferPtr buf;
9087 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9088
9089 /*
9090 * plug some encoding conversion routines
9091 */
9092 if ((chunk != NULL) && (size >= 4))
9093 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9094
9095 buf = xmlAllocParserInputBuffer(enc);
9096 if (buf == NULL) return(NULL);
9097
9098 ctxt = xmlNewParserCtxt();
9099 if (ctxt == NULL) {
9100 xmlFree(buf);
9101 return(NULL);
9102 }
9103 if (sax != NULL) {
9104 if (ctxt->sax != &xmlDefaultSAXHandler)
9105 xmlFree(ctxt->sax);
9106 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9107 if (ctxt->sax == NULL) {
9108 xmlFree(buf);
9109 xmlFree(ctxt);
9110 return(NULL);
9111 }
9112 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9113 if (user_data != NULL)
9114 ctxt->userData = user_data;
9115 }
9116 if (filename == NULL) {
9117 ctxt->directory = NULL;
9118 } else {
9119 ctxt->directory = xmlParserGetDirectory(filename);
9120 }
9121
9122 inputStream = xmlNewInputStream(ctxt);
9123 if (inputStream == NULL) {
9124 xmlFreeParserCtxt(ctxt);
9125 return(NULL);
9126 }
9127
9128 if (filename == NULL)
9129 inputStream->filename = NULL;
9130 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009131 inputStream->filename = (char *)
9132 xmlNormalizeWindowsPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009133 inputStream->buf = buf;
9134 inputStream->base = inputStream->buf->buffer->content;
9135 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009136 inputStream->end =
9137 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009138
9139 inputPush(ctxt, inputStream);
9140
9141 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9142 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009143 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9144 int cur = ctxt->input->cur - ctxt->input->base;
9145
Owen Taylor3473f882001-02-23 17:55:21 +00009146 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009147
9148 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9149 ctxt->input->cur = ctxt->input->base + cur;
9150 ctxt->input->end =
9151 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009152#ifdef DEBUG_PUSH
9153 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9154#endif
9155 }
9156
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009157 if (enc != XML_CHAR_ENCODING_NONE) {
9158 xmlSwitchEncoding(ctxt, enc);
9159 }
9160
Owen Taylor3473f882001-02-23 17:55:21 +00009161 return(ctxt);
9162}
9163
9164/**
9165 * xmlCreateIOParserCtxt:
9166 * @sax: a SAX handler
9167 * @user_data: The user data returned on SAX callbacks
9168 * @ioread: an I/O read function
9169 * @ioclose: an I/O close function
9170 * @ioctx: an I/O handler
9171 * @enc: the charset encoding if known
9172 *
9173 * Create a parser context for using the XML parser with an existing
9174 * I/O stream
9175 *
9176 * Returns the new parser context or NULL
9177 */
9178xmlParserCtxtPtr
9179xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9180 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9181 void *ioctx, xmlCharEncoding enc) {
9182 xmlParserCtxtPtr ctxt;
9183 xmlParserInputPtr inputStream;
9184 xmlParserInputBufferPtr buf;
9185
9186 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9187 if (buf == NULL) return(NULL);
9188
9189 ctxt = xmlNewParserCtxt();
9190 if (ctxt == NULL) {
9191 xmlFree(buf);
9192 return(NULL);
9193 }
9194 if (sax != NULL) {
9195 if (ctxt->sax != &xmlDefaultSAXHandler)
9196 xmlFree(ctxt->sax);
9197 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9198 if (ctxt->sax == NULL) {
9199 xmlFree(buf);
9200 xmlFree(ctxt);
9201 return(NULL);
9202 }
9203 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9204 if (user_data != NULL)
9205 ctxt->userData = user_data;
9206 }
9207
9208 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9209 if (inputStream == NULL) {
9210 xmlFreeParserCtxt(ctxt);
9211 return(NULL);
9212 }
9213 inputPush(ctxt, inputStream);
9214
9215 return(ctxt);
9216}
9217
9218/************************************************************************
9219 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009220 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009221 * *
9222 ************************************************************************/
9223
9224/**
9225 * xmlIOParseDTD:
9226 * @sax: the SAX handler block or NULL
9227 * @input: an Input Buffer
9228 * @enc: the charset encoding if known
9229 *
9230 * Load and parse a DTD
9231 *
9232 * Returns the resulting xmlDtdPtr or NULL in case of error.
9233 * @input will be freed at parsing end.
9234 */
9235
9236xmlDtdPtr
9237xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9238 xmlCharEncoding enc) {
9239 xmlDtdPtr ret = NULL;
9240 xmlParserCtxtPtr ctxt;
9241 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009242 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009243
9244 if (input == NULL)
9245 return(NULL);
9246
9247 ctxt = xmlNewParserCtxt();
9248 if (ctxt == NULL) {
9249 return(NULL);
9250 }
9251
9252 /*
9253 * Set-up the SAX context
9254 */
9255 if (sax != NULL) {
9256 if (ctxt->sax != NULL)
9257 xmlFree(ctxt->sax);
9258 ctxt->sax = sax;
9259 ctxt->userData = NULL;
9260 }
9261
9262 /*
9263 * generate a parser input from the I/O handler
9264 */
9265
9266 pinput = xmlNewIOInputStream(ctxt, input, enc);
9267 if (pinput == NULL) {
9268 if (sax != NULL) ctxt->sax = NULL;
9269 xmlFreeParserCtxt(ctxt);
9270 return(NULL);
9271 }
9272
9273 /*
9274 * plug some encoding conversion routines here.
9275 */
9276 xmlPushInput(ctxt, pinput);
9277
9278 pinput->filename = NULL;
9279 pinput->line = 1;
9280 pinput->col = 1;
9281 pinput->base = ctxt->input->cur;
9282 pinput->cur = ctxt->input->cur;
9283 pinput->free = NULL;
9284
9285 /*
9286 * let's parse that entity knowing it's an external subset.
9287 */
9288 ctxt->inSubset = 2;
9289 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9290 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9291 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009292
9293 if (enc == XML_CHAR_ENCODING_NONE) {
9294 /*
9295 * Get the 4 first bytes and decode the charset
9296 * if enc != XML_CHAR_ENCODING_NONE
9297 * plug some encoding conversion routines.
9298 */
9299 start[0] = RAW;
9300 start[1] = NXT(1);
9301 start[2] = NXT(2);
9302 start[3] = NXT(3);
9303 enc = xmlDetectCharEncoding(start, 4);
9304 if (enc != XML_CHAR_ENCODING_NONE) {
9305 xmlSwitchEncoding(ctxt, enc);
9306 }
9307 }
9308
Owen Taylor3473f882001-02-23 17:55:21 +00009309 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9310
9311 if (ctxt->myDoc != NULL) {
9312 if (ctxt->wellFormed) {
9313 ret = ctxt->myDoc->extSubset;
9314 ctxt->myDoc->extSubset = NULL;
9315 } else {
9316 ret = NULL;
9317 }
9318 xmlFreeDoc(ctxt->myDoc);
9319 ctxt->myDoc = NULL;
9320 }
9321 if (sax != NULL) ctxt->sax = NULL;
9322 xmlFreeParserCtxt(ctxt);
9323
9324 return(ret);
9325}
9326
9327/**
9328 * xmlSAXParseDTD:
9329 * @sax: the SAX handler block
9330 * @ExternalID: a NAME* containing the External ID of the DTD
9331 * @SystemID: a NAME* containing the URL to the DTD
9332 *
9333 * Load and parse an external subset.
9334 *
9335 * Returns the resulting xmlDtdPtr or NULL in case of error.
9336 */
9337
9338xmlDtdPtr
9339xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9340 const xmlChar *SystemID) {
9341 xmlDtdPtr ret = NULL;
9342 xmlParserCtxtPtr ctxt;
9343 xmlParserInputPtr input = NULL;
9344 xmlCharEncoding enc;
9345
9346 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9347
9348 ctxt = xmlNewParserCtxt();
9349 if (ctxt == NULL) {
9350 return(NULL);
9351 }
9352
9353 /*
9354 * Set-up the SAX context
9355 */
9356 if (sax != NULL) {
9357 if (ctxt->sax != NULL)
9358 xmlFree(ctxt->sax);
9359 ctxt->sax = sax;
9360 ctxt->userData = NULL;
9361 }
9362
9363 /*
9364 * Ask the Entity resolver to load the damn thing
9365 */
9366
9367 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9368 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9369 if (input == NULL) {
9370 if (sax != NULL) ctxt->sax = NULL;
9371 xmlFreeParserCtxt(ctxt);
9372 return(NULL);
9373 }
9374
9375 /*
9376 * plug some encoding conversion routines here.
9377 */
9378 xmlPushInput(ctxt, input);
9379 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9380 xmlSwitchEncoding(ctxt, enc);
9381
9382 if (input->filename == NULL)
9383 input->filename = (char *) xmlStrdup(SystemID);
9384 input->line = 1;
9385 input->col = 1;
9386 input->base = ctxt->input->cur;
9387 input->cur = ctxt->input->cur;
9388 input->free = NULL;
9389
9390 /*
9391 * let's parse that entity knowing it's an external subset.
9392 */
9393 ctxt->inSubset = 2;
9394 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9395 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9396 ExternalID, SystemID);
9397 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9398
9399 if (ctxt->myDoc != NULL) {
9400 if (ctxt->wellFormed) {
9401 ret = ctxt->myDoc->extSubset;
9402 ctxt->myDoc->extSubset = NULL;
9403 } else {
9404 ret = NULL;
9405 }
9406 xmlFreeDoc(ctxt->myDoc);
9407 ctxt->myDoc = NULL;
9408 }
9409 if (sax != NULL) ctxt->sax = NULL;
9410 xmlFreeParserCtxt(ctxt);
9411
9412 return(ret);
9413}
9414
9415/**
9416 * xmlParseDTD:
9417 * @ExternalID: a NAME* containing the External ID of the DTD
9418 * @SystemID: a NAME* containing the URL to the DTD
9419 *
9420 * Load and parse an external subset.
9421 *
9422 * Returns the resulting xmlDtdPtr or NULL in case of error.
9423 */
9424
9425xmlDtdPtr
9426xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9427 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9428}
9429
9430/************************************************************************
9431 * *
9432 * Front ends when parsing an Entity *
9433 * *
9434 ************************************************************************/
9435
9436/**
Owen Taylor3473f882001-02-23 17:55:21 +00009437 * xmlParseCtxtExternalEntity:
9438 * @ctx: the existing parsing context
9439 * @URL: the URL for the entity to load
9440 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009441 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009442 *
9443 * Parse an external general entity within an existing parsing context
9444 * An external general parsed entity is well-formed if it matches the
9445 * production labeled extParsedEnt.
9446 *
9447 * [78] extParsedEnt ::= TextDecl? content
9448 *
9449 * Returns 0 if the entity is well formed, -1 in case of args problem and
9450 * the parser error code otherwise
9451 */
9452
9453int
9454xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009455 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009456 xmlParserCtxtPtr ctxt;
9457 xmlDocPtr newDoc;
9458 xmlSAXHandlerPtr oldsax = NULL;
9459 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009460 xmlChar start[4];
9461 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009462
9463 if (ctx->depth > 40) {
9464 return(XML_ERR_ENTITY_LOOP);
9465 }
9466
Daniel Veillardcda96922001-08-21 10:56:31 +00009467 if (lst != NULL)
9468 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009469 if ((URL == NULL) && (ID == NULL))
9470 return(-1);
9471 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9472 return(-1);
9473
9474
9475 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9476 if (ctxt == NULL) return(-1);
9477 ctxt->userData = ctxt;
9478 oldsax = ctxt->sax;
9479 ctxt->sax = ctx->sax;
9480 newDoc = xmlNewDoc(BAD_CAST "1.0");
9481 if (newDoc == NULL) {
9482 xmlFreeParserCtxt(ctxt);
9483 return(-1);
9484 }
9485 if (ctx->myDoc != NULL) {
9486 newDoc->intSubset = ctx->myDoc->intSubset;
9487 newDoc->extSubset = ctx->myDoc->extSubset;
9488 }
9489 if (ctx->myDoc->URL != NULL) {
9490 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9491 }
9492 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9493 if (newDoc->children == NULL) {
9494 ctxt->sax = oldsax;
9495 xmlFreeParserCtxt(ctxt);
9496 newDoc->intSubset = NULL;
9497 newDoc->extSubset = NULL;
9498 xmlFreeDoc(newDoc);
9499 return(-1);
9500 }
9501 nodePush(ctxt, newDoc->children);
9502 if (ctx->myDoc == NULL) {
9503 ctxt->myDoc = newDoc;
9504 } else {
9505 ctxt->myDoc = ctx->myDoc;
9506 newDoc->children->doc = ctx->myDoc;
9507 }
9508
Daniel Veillard87a764e2001-06-20 17:41:10 +00009509 /*
9510 * Get the 4 first bytes and decode the charset
9511 * if enc != XML_CHAR_ENCODING_NONE
9512 * plug some encoding conversion routines.
9513 */
9514 GROW
9515 start[0] = RAW;
9516 start[1] = NXT(1);
9517 start[2] = NXT(2);
9518 start[3] = NXT(3);
9519 enc = xmlDetectCharEncoding(start, 4);
9520 if (enc != XML_CHAR_ENCODING_NONE) {
9521 xmlSwitchEncoding(ctxt, enc);
9522 }
9523
Owen Taylor3473f882001-02-23 17:55:21 +00009524 /*
9525 * Parse a possible text declaration first
9526 */
Owen Taylor3473f882001-02-23 17:55:21 +00009527 if ((RAW == '<') && (NXT(1) == '?') &&
9528 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9529 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9530 xmlParseTextDecl(ctxt);
9531 }
9532
9533 /*
9534 * Doing validity checking on chunk doesn't make sense
9535 */
9536 ctxt->instate = XML_PARSER_CONTENT;
9537 ctxt->validate = ctx->validate;
9538 ctxt->loadsubset = ctx->loadsubset;
9539 ctxt->depth = ctx->depth + 1;
9540 ctxt->replaceEntities = ctx->replaceEntities;
9541 if (ctxt->validate) {
9542 ctxt->vctxt.error = ctx->vctxt.error;
9543 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009544 } else {
9545 ctxt->vctxt.error = NULL;
9546 ctxt->vctxt.warning = NULL;
9547 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009548 ctxt->vctxt.nodeTab = NULL;
9549 ctxt->vctxt.nodeNr = 0;
9550 ctxt->vctxt.nodeMax = 0;
9551 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009552
9553 xmlParseContent(ctxt);
9554
9555 if ((RAW == '<') && (NXT(1) == '/')) {
9556 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9558 ctxt->sax->error(ctxt->userData,
9559 "chunk is not well balanced\n");
9560 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009561 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009562 } else if (RAW != 0) {
9563 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9565 ctxt->sax->error(ctxt->userData,
9566 "extra content at the end of well balanced chunk\n");
9567 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009568 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009569 }
9570 if (ctxt->node != newDoc->children) {
9571 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9573 ctxt->sax->error(ctxt->userData,
9574 "chunk is not well balanced\n");
9575 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009576 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009577 }
9578
9579 if (!ctxt->wellFormed) {
9580 if (ctxt->errNo == 0)
9581 ret = 1;
9582 else
9583 ret = ctxt->errNo;
9584 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009585 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009586 xmlNodePtr cur;
9587
9588 /*
9589 * Return the newly created nodeset after unlinking it from
9590 * they pseudo parent.
9591 */
9592 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009593 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009594 while (cur != NULL) {
9595 cur->parent = NULL;
9596 cur = cur->next;
9597 }
9598 newDoc->children->children = NULL;
9599 }
9600 ret = 0;
9601 }
9602 ctxt->sax = oldsax;
9603 xmlFreeParserCtxt(ctxt);
9604 newDoc->intSubset = NULL;
9605 newDoc->extSubset = NULL;
9606 xmlFreeDoc(newDoc);
9607
9608 return(ret);
9609}
9610
9611/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009612 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009613 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009614 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009615 * @sax: the SAX handler bloc (possibly NULL)
9616 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9617 * @depth: Used for loop detection, use 0
9618 * @URL: the URL for the entity to load
9619 * @ID: the System ID for the entity to load
9620 * @list: the return value for the set of parsed nodes
9621 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009622 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009623 *
9624 * Returns 0 if the entity is well formed, -1 in case of args problem and
9625 * the parser error code otherwise
9626 */
9627
Daniel Veillard257d9102001-05-08 10:41:44 +00009628static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009629xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9630 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009631 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009632 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009633 xmlParserCtxtPtr ctxt;
9634 xmlDocPtr newDoc;
9635 xmlSAXHandlerPtr oldsax = NULL;
9636 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009637 xmlChar start[4];
9638 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009639
9640 if (depth > 40) {
9641 return(XML_ERR_ENTITY_LOOP);
9642 }
9643
9644
9645
9646 if (list != NULL)
9647 *list = NULL;
9648 if ((URL == NULL) && (ID == NULL))
9649 return(-1);
9650 if (doc == NULL) /* @@ relax but check for dereferences */
9651 return(-1);
9652
9653
9654 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9655 if (ctxt == NULL) return(-1);
9656 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009657 if (oldctxt != NULL) {
9658 ctxt->_private = oldctxt->_private;
9659 ctxt->loadsubset = oldctxt->loadsubset;
9660 ctxt->validate = oldctxt->validate;
9661 ctxt->external = oldctxt->external;
9662 } else {
9663 /*
9664 * Doing validity checking on chunk without context
9665 * doesn't make sense
9666 */
9667 ctxt->_private = NULL;
9668 ctxt->validate = 0;
9669 ctxt->external = 2;
9670 ctxt->loadsubset = 0;
9671 }
Owen Taylor3473f882001-02-23 17:55:21 +00009672 if (sax != NULL) {
9673 oldsax = ctxt->sax;
9674 ctxt->sax = sax;
9675 if (user_data != NULL)
9676 ctxt->userData = user_data;
9677 }
9678 newDoc = xmlNewDoc(BAD_CAST "1.0");
9679 if (newDoc == NULL) {
9680 xmlFreeParserCtxt(ctxt);
9681 return(-1);
9682 }
9683 if (doc != NULL) {
9684 newDoc->intSubset = doc->intSubset;
9685 newDoc->extSubset = doc->extSubset;
9686 }
9687 if (doc->URL != NULL) {
9688 newDoc->URL = xmlStrdup(doc->URL);
9689 }
9690 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9691 if (newDoc->children == NULL) {
9692 if (sax != NULL)
9693 ctxt->sax = oldsax;
9694 xmlFreeParserCtxt(ctxt);
9695 newDoc->intSubset = NULL;
9696 newDoc->extSubset = NULL;
9697 xmlFreeDoc(newDoc);
9698 return(-1);
9699 }
9700 nodePush(ctxt, newDoc->children);
9701 if (doc == NULL) {
9702 ctxt->myDoc = newDoc;
9703 } else {
9704 ctxt->myDoc = doc;
9705 newDoc->children->doc = doc;
9706 }
9707
Daniel Veillard87a764e2001-06-20 17:41:10 +00009708 /*
9709 * Get the 4 first bytes and decode the charset
9710 * if enc != XML_CHAR_ENCODING_NONE
9711 * plug some encoding conversion routines.
9712 */
9713 GROW;
9714 start[0] = RAW;
9715 start[1] = NXT(1);
9716 start[2] = NXT(2);
9717 start[3] = NXT(3);
9718 enc = xmlDetectCharEncoding(start, 4);
9719 if (enc != XML_CHAR_ENCODING_NONE) {
9720 xmlSwitchEncoding(ctxt, enc);
9721 }
9722
Owen Taylor3473f882001-02-23 17:55:21 +00009723 /*
9724 * Parse a possible text declaration first
9725 */
Owen Taylor3473f882001-02-23 17:55:21 +00009726 if ((RAW == '<') && (NXT(1) == '?') &&
9727 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9728 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9729 xmlParseTextDecl(ctxt);
9730 }
9731
Owen Taylor3473f882001-02-23 17:55:21 +00009732 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009733 ctxt->depth = depth;
9734
9735 xmlParseContent(ctxt);
9736
Daniel Veillard561b7f82002-03-20 21:55:57 +00009737 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009738 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9740 ctxt->sax->error(ctxt->userData,
9741 "chunk is not well balanced\n");
9742 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009743 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009744 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009745 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9747 ctxt->sax->error(ctxt->userData,
9748 "extra content at the end of well balanced chunk\n");
9749 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009750 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009751 }
9752 if (ctxt->node != newDoc->children) {
9753 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9755 ctxt->sax->error(ctxt->userData,
9756 "chunk is not well balanced\n");
9757 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009758 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009759 }
9760
9761 if (!ctxt->wellFormed) {
9762 if (ctxt->errNo == 0)
9763 ret = 1;
9764 else
9765 ret = ctxt->errNo;
9766 } else {
9767 if (list != NULL) {
9768 xmlNodePtr cur;
9769
9770 /*
9771 * Return the newly created nodeset after unlinking it from
9772 * they pseudo parent.
9773 */
9774 cur = newDoc->children->children;
9775 *list = cur;
9776 while (cur != NULL) {
9777 cur->parent = NULL;
9778 cur = cur->next;
9779 }
9780 newDoc->children->children = NULL;
9781 }
9782 ret = 0;
9783 }
9784 if (sax != NULL)
9785 ctxt->sax = oldsax;
9786 xmlFreeParserCtxt(ctxt);
9787 newDoc->intSubset = NULL;
9788 newDoc->extSubset = NULL;
9789 xmlFreeDoc(newDoc);
9790
9791 return(ret);
9792}
9793
9794/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009795 * xmlParseExternalEntity:
9796 * @doc: the document the chunk pertains to
9797 * @sax: the SAX handler bloc (possibly NULL)
9798 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9799 * @depth: Used for loop detection, use 0
9800 * @URL: the URL for the entity to load
9801 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009802 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009803 *
9804 * Parse an external general entity
9805 * An external general parsed entity is well-formed if it matches the
9806 * production labeled extParsedEnt.
9807 *
9808 * [78] extParsedEnt ::= TextDecl? content
9809 *
9810 * Returns 0 if the entity is well formed, -1 in case of args problem and
9811 * the parser error code otherwise
9812 */
9813
9814int
9815xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009816 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009817 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009818 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009819}
9820
9821/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009822 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009823 * @doc: the document the chunk pertains to
9824 * @sax: the SAX handler bloc (possibly NULL)
9825 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9826 * @depth: Used for loop detection, use 0
9827 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009828 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009829 *
9830 * Parse a well-balanced chunk of an XML document
9831 * called by the parser
9832 * The allowed sequence for the Well Balanced Chunk is the one defined by
9833 * the content production in the XML grammar:
9834 *
9835 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9836 *
9837 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9838 * the parser error code otherwise
9839 */
9840
9841int
9842xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009843 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009844 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9845 depth, string, lst, 0 );
9846}
9847
9848/**
Daniel Veillard328f48c2002-11-15 15:24:34 +00009849 * xmlParseBalancedChunkMemoryInternal:
9850 * @oldctxt: the existing parsing context
9851 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9852 * @user_data: the user data field for the parser context
9853 * @lst: the return value for the set of parsed nodes
9854 *
9855 *
9856 * Parse a well-balanced chunk of an XML document
9857 * called by the parser
9858 * The allowed sequence for the Well Balanced Chunk is the one defined by
9859 * the content production in the XML grammar:
9860 *
9861 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9862 *
9863 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9864 * the parser error code otherwise
9865 *
9866 * In case recover is set to 1, the nodelist will not be empty even if
9867 * the parsed chunk is not well balanced.
9868 */
9869static int
9870xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9871 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9872 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009873 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009874 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009875 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009876 int size;
9877 int ret = 0;
9878
9879 if (oldctxt->depth > 40) {
9880 return(XML_ERR_ENTITY_LOOP);
9881 }
9882
9883
9884 if (lst != NULL)
9885 *lst = NULL;
9886 if (string == NULL)
9887 return(-1);
9888
9889 size = xmlStrlen(string);
9890
9891 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9892 if (ctxt == NULL) return(-1);
9893 if (user_data != NULL)
9894 ctxt->userData = user_data;
9895 else
9896 ctxt->userData = ctxt;
9897
9898 oldsax = ctxt->sax;
9899 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +00009900 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009901 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +00009902 newDoc = xmlNewDoc(BAD_CAST "1.0");
9903 if (newDoc == NULL) {
9904 ctxt->sax = oldsax;
9905 xmlFreeParserCtxt(ctxt);
9906 return(-1);
9907 }
Daniel Veillard328f48c2002-11-15 15:24:34 +00009908 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009909 } else {
9910 ctxt->myDoc = oldctxt->myDoc;
9911 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009912 }
Daniel Veillard9bc53102002-11-25 13:20:04 +00009913 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +00009914 BAD_CAST "pseudoroot", NULL);
9915 if (ctxt->myDoc->children == NULL) {
9916 ctxt->sax = oldsax;
9917 xmlFreeParserCtxt(ctxt);
9918 if (newDoc != NULL)
9919 xmlFreeDoc(newDoc);
9920 return(-1);
9921 }
9922 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009923 ctxt->instate = XML_PARSER_CONTENT;
9924 ctxt->depth = oldctxt->depth + 1;
9925
9926 /*
9927 * Doing validity checking on chunk doesn't make sense
9928 */
9929 ctxt->validate = 0;
9930 ctxt->loadsubset = oldctxt->loadsubset;
9931
Daniel Veillard68e9e742002-11-16 15:35:11 +00009932 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009933 if ((RAW == '<') && (NXT(1) == '/')) {
9934 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9936 ctxt->sax->error(ctxt->userData,
9937 "chunk is not well balanced\n");
9938 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009939 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009940 } else if (RAW != 0) {
9941 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9943 ctxt->sax->error(ctxt->userData,
9944 "extra content at the end of well balanced chunk\n");
9945 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009946 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009947 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009948 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +00009949 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9950 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9951 ctxt->sax->error(ctxt->userData,
9952 "chunk is not well balanced\n");
9953 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009954 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009955 }
9956
9957 if (!ctxt->wellFormed) {
9958 if (ctxt->errNo == 0)
9959 ret = 1;
9960 else
9961 ret = ctxt->errNo;
9962 } else {
9963 ret = 0;
9964 }
9965
9966 if ((lst != NULL) && (ret == 0)) {
9967 xmlNodePtr cur;
9968
9969 /*
9970 * Return the newly created nodeset after unlinking it from
9971 * they pseudo parent.
9972 */
Daniel Veillard68e9e742002-11-16 15:35:11 +00009973 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009974 *lst = cur;
9975 while (cur != NULL) {
9976 cur->parent = NULL;
9977 cur = cur->next;
9978 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009979 ctxt->myDoc->children->children = NULL;
9980 }
9981 if (ctxt->myDoc != NULL) {
9982 xmlFreeNode(ctxt->myDoc->children);
9983 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009984 }
9985
9986 ctxt->sax = oldsax;
9987 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +00009988 if (newDoc != NULL)
9989 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009990
9991 return(ret);
9992}
9993
9994/**
Daniel Veillard58e44c92002-08-02 22:19:49 +00009995 * xmlParseBalancedChunkMemoryRecover:
9996 * @doc: the document the chunk pertains to
9997 * @sax: the SAX handler bloc (possibly NULL)
9998 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9999 * @depth: Used for loop detection, use 0
10000 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10001 * @lst: the return value for the set of parsed nodes
10002 * @recover: return nodes even if the data is broken (use 0)
10003 *
10004 *
10005 * Parse a well-balanced chunk of an XML document
10006 * called by the parser
10007 * The allowed sequence for the Well Balanced Chunk is the one defined by
10008 * the content production in the XML grammar:
10009 *
10010 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10011 *
10012 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10013 * the parser error code otherwise
10014 *
10015 * In case recover is set to 1, the nodelist will not be empty even if
10016 * the parsed chunk is not well balanced.
10017 */
10018int
10019xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10020 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10021 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010022 xmlParserCtxtPtr ctxt;
10023 xmlDocPtr newDoc;
10024 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010025 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010026 int size;
10027 int ret = 0;
10028
10029 if (depth > 40) {
10030 return(XML_ERR_ENTITY_LOOP);
10031 }
10032
10033
Daniel Veillardcda96922001-08-21 10:56:31 +000010034 if (lst != NULL)
10035 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010036 if (string == NULL)
10037 return(-1);
10038
10039 size = xmlStrlen(string);
10040
10041 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10042 if (ctxt == NULL) return(-1);
10043 ctxt->userData = ctxt;
10044 if (sax != NULL) {
10045 oldsax = ctxt->sax;
10046 ctxt->sax = sax;
10047 if (user_data != NULL)
10048 ctxt->userData = user_data;
10049 }
10050 newDoc = xmlNewDoc(BAD_CAST "1.0");
10051 if (newDoc == NULL) {
10052 xmlFreeParserCtxt(ctxt);
10053 return(-1);
10054 }
10055 if (doc != NULL) {
10056 newDoc->intSubset = doc->intSubset;
10057 newDoc->extSubset = doc->extSubset;
10058 }
10059 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10060 if (newDoc->children == NULL) {
10061 if (sax != NULL)
10062 ctxt->sax = oldsax;
10063 xmlFreeParserCtxt(ctxt);
10064 newDoc->intSubset = NULL;
10065 newDoc->extSubset = NULL;
10066 xmlFreeDoc(newDoc);
10067 return(-1);
10068 }
10069 nodePush(ctxt, newDoc->children);
10070 if (doc == NULL) {
10071 ctxt->myDoc = newDoc;
10072 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010073 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010074 newDoc->children->doc = doc;
10075 }
10076 ctxt->instate = XML_PARSER_CONTENT;
10077 ctxt->depth = depth;
10078
10079 /*
10080 * Doing validity checking on chunk doesn't make sense
10081 */
10082 ctxt->validate = 0;
10083 ctxt->loadsubset = 0;
10084
Daniel Veillardb39bc392002-10-26 19:29:51 +000010085 if ( doc != NULL ){
10086 content = doc->children;
10087 doc->children = NULL;
10088 xmlParseContent(ctxt);
10089 doc->children = content;
10090 }
10091 else {
10092 xmlParseContent(ctxt);
10093 }
Owen Taylor3473f882001-02-23 17:55:21 +000010094 if ((RAW == '<') && (NXT(1) == '/')) {
10095 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10097 ctxt->sax->error(ctxt->userData,
10098 "chunk is not well balanced\n");
10099 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010100 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010101 } else if (RAW != 0) {
10102 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10104 ctxt->sax->error(ctxt->userData,
10105 "extra content at the end of well balanced chunk\n");
10106 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010107 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010108 }
10109 if (ctxt->node != newDoc->children) {
10110 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10111 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10112 ctxt->sax->error(ctxt->userData,
10113 "chunk is not well balanced\n");
10114 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010115 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010116 }
10117
10118 if (!ctxt->wellFormed) {
10119 if (ctxt->errNo == 0)
10120 ret = 1;
10121 else
10122 ret = ctxt->errNo;
10123 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010124 ret = 0;
10125 }
10126
10127 if (lst != NULL && (ret == 0 || recover == 1)) {
10128 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010129
10130 /*
10131 * Return the newly created nodeset after unlinking it from
10132 * they pseudo parent.
10133 */
10134 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010135 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010136 while (cur != NULL) {
10137 cur->parent = NULL;
10138 cur = cur->next;
10139 }
10140 newDoc->children->children = NULL;
10141 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010142
Owen Taylor3473f882001-02-23 17:55:21 +000010143 if (sax != NULL)
10144 ctxt->sax = oldsax;
10145 xmlFreeParserCtxt(ctxt);
10146 newDoc->intSubset = NULL;
10147 newDoc->extSubset = NULL;
10148 xmlFreeDoc(newDoc);
10149
10150 return(ret);
10151}
10152
10153/**
10154 * xmlSAXParseEntity:
10155 * @sax: the SAX handler block
10156 * @filename: the filename
10157 *
10158 * parse an XML external entity out of context and build a tree.
10159 * It use the given SAX function block to handle the parsing callback.
10160 * If sax is NULL, fallback to the default DOM tree building routines.
10161 *
10162 * [78] extParsedEnt ::= TextDecl? content
10163 *
10164 * This correspond to a "Well Balanced" chunk
10165 *
10166 * Returns the resulting document tree
10167 */
10168
10169xmlDocPtr
10170xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10171 xmlDocPtr ret;
10172 xmlParserCtxtPtr ctxt;
10173 char *directory = NULL;
10174
10175 ctxt = xmlCreateFileParserCtxt(filename);
10176 if (ctxt == NULL) {
10177 return(NULL);
10178 }
10179 if (sax != NULL) {
10180 if (ctxt->sax != NULL)
10181 xmlFree(ctxt->sax);
10182 ctxt->sax = sax;
10183 ctxt->userData = NULL;
10184 }
10185
10186 if ((ctxt->directory == NULL) && (directory == NULL))
10187 directory = xmlParserGetDirectory(filename);
10188
10189 xmlParseExtParsedEnt(ctxt);
10190
10191 if (ctxt->wellFormed)
10192 ret = ctxt->myDoc;
10193 else {
10194 ret = NULL;
10195 xmlFreeDoc(ctxt->myDoc);
10196 ctxt->myDoc = NULL;
10197 }
10198 if (sax != NULL)
10199 ctxt->sax = NULL;
10200 xmlFreeParserCtxt(ctxt);
10201
10202 return(ret);
10203}
10204
10205/**
10206 * xmlParseEntity:
10207 * @filename: the filename
10208 *
10209 * parse an XML external entity out of context and build a tree.
10210 *
10211 * [78] extParsedEnt ::= TextDecl? content
10212 *
10213 * This correspond to a "Well Balanced" chunk
10214 *
10215 * Returns the resulting document tree
10216 */
10217
10218xmlDocPtr
10219xmlParseEntity(const char *filename) {
10220 return(xmlSAXParseEntity(NULL, filename));
10221}
10222
10223/**
10224 * xmlCreateEntityParserCtxt:
10225 * @URL: the entity URL
10226 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010227 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010228 *
10229 * Create a parser context for an external entity
10230 * Automatic support for ZLIB/Compress compressed document is provided
10231 * by default if found at compile-time.
10232 *
10233 * Returns the new parser context or NULL
10234 */
10235xmlParserCtxtPtr
10236xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10237 const xmlChar *base) {
10238 xmlParserCtxtPtr ctxt;
10239 xmlParserInputPtr inputStream;
10240 char *directory = NULL;
10241 xmlChar *uri;
10242
10243 ctxt = xmlNewParserCtxt();
10244 if (ctxt == NULL) {
10245 return(NULL);
10246 }
10247
10248 uri = xmlBuildURI(URL, base);
10249
10250 if (uri == NULL) {
10251 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10252 if (inputStream == NULL) {
10253 xmlFreeParserCtxt(ctxt);
10254 return(NULL);
10255 }
10256
10257 inputPush(ctxt, inputStream);
10258
10259 if ((ctxt->directory == NULL) && (directory == NULL))
10260 directory = xmlParserGetDirectory((char *)URL);
10261 if ((ctxt->directory == NULL) && (directory != NULL))
10262 ctxt->directory = directory;
10263 } else {
10264 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10265 if (inputStream == NULL) {
10266 xmlFree(uri);
10267 xmlFreeParserCtxt(ctxt);
10268 return(NULL);
10269 }
10270
10271 inputPush(ctxt, inputStream);
10272
10273 if ((ctxt->directory == NULL) && (directory == NULL))
10274 directory = xmlParserGetDirectory((char *)uri);
10275 if ((ctxt->directory == NULL) && (directory != NULL))
10276 ctxt->directory = directory;
10277 xmlFree(uri);
10278 }
10279
10280 return(ctxt);
10281}
10282
10283/************************************************************************
10284 * *
10285 * Front ends when parsing from a file *
10286 * *
10287 ************************************************************************/
10288
10289/**
10290 * xmlCreateFileParserCtxt:
10291 * @filename: the filename
10292 *
10293 * Create a parser context for a file content.
10294 * Automatic support for ZLIB/Compress compressed document is provided
10295 * by default if found at compile-time.
10296 *
10297 * Returns the new parser context or NULL
10298 */
10299xmlParserCtxtPtr
10300xmlCreateFileParserCtxt(const char *filename)
10301{
10302 xmlParserCtxtPtr ctxt;
10303 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010304 char *directory = NULL;
Daniel Veillardf4862f02002-09-10 11:13:43 +000010305 xmlChar *normalized;
Owen Taylor3473f882001-02-23 17:55:21 +000010306
Owen Taylor3473f882001-02-23 17:55:21 +000010307 ctxt = xmlNewParserCtxt();
10308 if (ctxt == NULL) {
10309 if (xmlDefaultSAXHandler.error != NULL) {
10310 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10311 }
10312 return(NULL);
10313 }
10314
Daniel Veillardf4862f02002-09-10 11:13:43 +000010315 normalized = xmlNormalizeWindowsPath((const xmlChar *) filename);
10316 if (normalized == NULL) {
10317 xmlFreeParserCtxt(ctxt);
10318 return(NULL);
10319 }
10320 inputStream = xmlLoadExternalEntity((char *) normalized, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010321 if (inputStream == NULL) {
10322 xmlFreeParserCtxt(ctxt);
Daniel Veillardf4862f02002-09-10 11:13:43 +000010323 xmlFree(normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010324 return(NULL);
10325 }
10326
Owen Taylor3473f882001-02-23 17:55:21 +000010327 inputPush(ctxt, inputStream);
10328 if ((ctxt->directory == NULL) && (directory == NULL))
Daniel Veillardf4862f02002-09-10 11:13:43 +000010329 directory = xmlParserGetDirectory((char *) normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010330 if ((ctxt->directory == NULL) && (directory != NULL))
10331 ctxt->directory = directory;
10332
Daniel Veillardf4862f02002-09-10 11:13:43 +000010333 xmlFree(normalized);
10334
Owen Taylor3473f882001-02-23 17:55:21 +000010335 return(ctxt);
10336}
10337
10338/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010339 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010340 * @sax: the SAX handler block
10341 * @filename: the filename
10342 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10343 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010344 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010345 *
10346 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10347 * compressed document is provided by default if found at compile-time.
10348 * It use the given SAX function block to handle the parsing callback.
10349 * If sax is NULL, fallback to the default DOM tree building routines.
10350 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010351 * User data (void *) is stored within the parser context in the
10352 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010353 *
Owen Taylor3473f882001-02-23 17:55:21 +000010354 * Returns the resulting document tree
10355 */
10356
10357xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010358xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10359 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010360 xmlDocPtr ret;
10361 xmlParserCtxtPtr ctxt;
10362 char *directory = NULL;
10363
Daniel Veillard635ef722001-10-29 11:48:19 +000010364 xmlInitParser();
10365
Owen Taylor3473f882001-02-23 17:55:21 +000010366 ctxt = xmlCreateFileParserCtxt(filename);
10367 if (ctxt == NULL) {
10368 return(NULL);
10369 }
10370 if (sax != NULL) {
10371 if (ctxt->sax != NULL)
10372 xmlFree(ctxt->sax);
10373 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010374 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010375 if (data!=NULL) {
10376 ctxt->_private=data;
10377 }
Owen Taylor3473f882001-02-23 17:55:21 +000010378
10379 if ((ctxt->directory == NULL) && (directory == NULL))
10380 directory = xmlParserGetDirectory(filename);
10381 if ((ctxt->directory == NULL) && (directory != NULL))
10382 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10383
Daniel Veillarddad3f682002-11-17 16:47:27 +000010384 ctxt->recovery = recovery;
10385
Owen Taylor3473f882001-02-23 17:55:21 +000010386 xmlParseDocument(ctxt);
10387
10388 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10389 else {
10390 ret = NULL;
10391 xmlFreeDoc(ctxt->myDoc);
10392 ctxt->myDoc = NULL;
10393 }
10394 if (sax != NULL)
10395 ctxt->sax = NULL;
10396 xmlFreeParserCtxt(ctxt);
10397
10398 return(ret);
10399}
10400
10401/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010402 * xmlSAXParseFile:
10403 * @sax: the SAX handler block
10404 * @filename: the filename
10405 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10406 * documents
10407 *
10408 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10409 * compressed document is provided by default if found at compile-time.
10410 * It use the given SAX function block to handle the parsing callback.
10411 * If sax is NULL, fallback to the default DOM tree building routines.
10412 *
10413 * Returns the resulting document tree
10414 */
10415
10416xmlDocPtr
10417xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10418 int recovery) {
10419 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10420}
10421
10422/**
Owen Taylor3473f882001-02-23 17:55:21 +000010423 * xmlRecoverDoc:
10424 * @cur: a pointer to an array of xmlChar
10425 *
10426 * parse an XML in-memory document and build a tree.
10427 * In the case the document is not Well Formed, a tree is built anyway
10428 *
10429 * Returns the resulting document tree
10430 */
10431
10432xmlDocPtr
10433xmlRecoverDoc(xmlChar *cur) {
10434 return(xmlSAXParseDoc(NULL, cur, 1));
10435}
10436
10437/**
10438 * xmlParseFile:
10439 * @filename: the filename
10440 *
10441 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10442 * compressed document is provided by default if found at compile-time.
10443 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010444 * Returns the resulting document tree if the file was wellformed,
10445 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010446 */
10447
10448xmlDocPtr
10449xmlParseFile(const char *filename) {
10450 return(xmlSAXParseFile(NULL, filename, 0));
10451}
10452
10453/**
10454 * xmlRecoverFile:
10455 * @filename: the filename
10456 *
10457 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10458 * compressed document is provided by default if found at compile-time.
10459 * In the case the document is not Well Formed, a tree is built anyway
10460 *
10461 * Returns the resulting document tree
10462 */
10463
10464xmlDocPtr
10465xmlRecoverFile(const char *filename) {
10466 return(xmlSAXParseFile(NULL, filename, 1));
10467}
10468
10469
10470/**
10471 * xmlSetupParserForBuffer:
10472 * @ctxt: an XML parser context
10473 * @buffer: a xmlChar * buffer
10474 * @filename: a file name
10475 *
10476 * Setup the parser context to parse a new buffer; Clears any prior
10477 * contents from the parser context. The buffer parameter must not be
10478 * NULL, but the filename parameter can be
10479 */
10480void
10481xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10482 const char* filename)
10483{
10484 xmlParserInputPtr input;
10485
10486 input = xmlNewInputStream(ctxt);
10487 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010488 xmlGenericError(xmlGenericErrorContext,
10489 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010490 xmlFree(ctxt);
10491 return;
10492 }
10493
10494 xmlClearParserCtxt(ctxt);
10495 if (filename != NULL)
10496 input->filename = xmlMemStrdup(filename);
10497 input->base = buffer;
10498 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010499 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010500 inputPush(ctxt, input);
10501}
10502
10503/**
10504 * xmlSAXUserParseFile:
10505 * @sax: a SAX handler
10506 * @user_data: The user data returned on SAX callbacks
10507 * @filename: a file name
10508 *
10509 * parse an XML file and call the given SAX handler routines.
10510 * Automatic support for ZLIB/Compress compressed document is provided
10511 *
10512 * Returns 0 in case of success or a error number otherwise
10513 */
10514int
10515xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10516 const char *filename) {
10517 int ret = 0;
10518 xmlParserCtxtPtr ctxt;
10519
10520 ctxt = xmlCreateFileParserCtxt(filename);
10521 if (ctxt == NULL) return -1;
10522 if (ctxt->sax != &xmlDefaultSAXHandler)
10523 xmlFree(ctxt->sax);
10524 ctxt->sax = sax;
10525 if (user_data != NULL)
10526 ctxt->userData = user_data;
10527
10528 xmlParseDocument(ctxt);
10529
10530 if (ctxt->wellFormed)
10531 ret = 0;
10532 else {
10533 if (ctxt->errNo != 0)
10534 ret = ctxt->errNo;
10535 else
10536 ret = -1;
10537 }
10538 if (sax != NULL)
10539 ctxt->sax = NULL;
10540 xmlFreeParserCtxt(ctxt);
10541
10542 return ret;
10543}
10544
10545/************************************************************************
10546 * *
10547 * Front ends when parsing from memory *
10548 * *
10549 ************************************************************************/
10550
10551/**
10552 * xmlCreateMemoryParserCtxt:
10553 * @buffer: a pointer to a char array
10554 * @size: the size of the array
10555 *
10556 * Create a parser context for an XML in-memory document.
10557 *
10558 * Returns the new parser context or NULL
10559 */
10560xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010561xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010562 xmlParserCtxtPtr ctxt;
10563 xmlParserInputPtr input;
10564 xmlParserInputBufferPtr buf;
10565
10566 if (buffer == NULL)
10567 return(NULL);
10568 if (size <= 0)
10569 return(NULL);
10570
10571 ctxt = xmlNewParserCtxt();
10572 if (ctxt == NULL)
10573 return(NULL);
10574
10575 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010576 if (buf == NULL) {
10577 xmlFreeParserCtxt(ctxt);
10578 return(NULL);
10579 }
Owen Taylor3473f882001-02-23 17:55:21 +000010580
10581 input = xmlNewInputStream(ctxt);
10582 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010583 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010584 xmlFreeParserCtxt(ctxt);
10585 return(NULL);
10586 }
10587
10588 input->filename = NULL;
10589 input->buf = buf;
10590 input->base = input->buf->buffer->content;
10591 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010592 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010593
10594 inputPush(ctxt, input);
10595 return(ctxt);
10596}
10597
10598/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010599 * xmlSAXParseMemoryWithData:
10600 * @sax: the SAX handler block
10601 * @buffer: an pointer to a char array
10602 * @size: the size of the array
10603 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10604 * documents
10605 * @data: the userdata
10606 *
10607 * parse an XML in-memory block and use the given SAX function block
10608 * to handle the parsing callback. If sax is NULL, fallback to the default
10609 * DOM tree building routines.
10610 *
10611 * User data (void *) is stored within the parser context in the
10612 * context's _private member, so it is available nearly everywhere in libxml
10613 *
10614 * Returns the resulting document tree
10615 */
10616
10617xmlDocPtr
10618xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10619 int size, int recovery, void *data) {
10620 xmlDocPtr ret;
10621 xmlParserCtxtPtr ctxt;
10622
10623 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10624 if (ctxt == NULL) return(NULL);
10625 if (sax != NULL) {
10626 if (ctxt->sax != NULL)
10627 xmlFree(ctxt->sax);
10628 ctxt->sax = sax;
10629 }
10630 if (data!=NULL) {
10631 ctxt->_private=data;
10632 }
10633
10634 xmlParseDocument(ctxt);
10635
10636 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10637 else {
10638 ret = NULL;
10639 xmlFreeDoc(ctxt->myDoc);
10640 ctxt->myDoc = NULL;
10641 }
10642 if (sax != NULL)
10643 ctxt->sax = NULL;
10644 xmlFreeParserCtxt(ctxt);
10645
10646 return(ret);
10647}
10648
10649/**
Owen Taylor3473f882001-02-23 17:55:21 +000010650 * xmlSAXParseMemory:
10651 * @sax: the SAX handler block
10652 * @buffer: an pointer to a char array
10653 * @size: the size of the array
10654 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10655 * documents
10656 *
10657 * parse an XML in-memory block and use the given SAX function block
10658 * to handle the parsing callback. If sax is NULL, fallback to the default
10659 * DOM tree building routines.
10660 *
10661 * Returns the resulting document tree
10662 */
10663xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010664xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10665 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010666 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010667}
10668
10669/**
10670 * xmlParseMemory:
10671 * @buffer: an pointer to a char array
10672 * @size: the size of the array
10673 *
10674 * parse an XML in-memory block and build a tree.
10675 *
10676 * Returns the resulting document tree
10677 */
10678
Daniel Veillard50822cb2001-07-26 20:05:51 +000010679xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010680 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10681}
10682
10683/**
10684 * xmlRecoverMemory:
10685 * @buffer: an pointer to a char array
10686 * @size: the size of the array
10687 *
10688 * parse an XML in-memory block and build a tree.
10689 * In the case the document is not Well Formed, a tree is built anyway
10690 *
10691 * Returns the resulting document tree
10692 */
10693
Daniel Veillard50822cb2001-07-26 20:05:51 +000010694xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010695 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10696}
10697
10698/**
10699 * xmlSAXUserParseMemory:
10700 * @sax: a SAX handler
10701 * @user_data: The user data returned on SAX callbacks
10702 * @buffer: an in-memory XML document input
10703 * @size: the length of the XML document in bytes
10704 *
10705 * A better SAX parsing routine.
10706 * parse an XML in-memory buffer and call the given SAX handler routines.
10707 *
10708 * Returns 0 in case of success or a error number otherwise
10709 */
10710int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010711 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010712 int ret = 0;
10713 xmlParserCtxtPtr ctxt;
10714 xmlSAXHandlerPtr oldsax = NULL;
10715
Daniel Veillard9e923512002-08-14 08:48:52 +000010716 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010717 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10718 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010719 oldsax = ctxt->sax;
10720 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010721 if (user_data != NULL)
10722 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010723
10724 xmlParseDocument(ctxt);
10725
10726 if (ctxt->wellFormed)
10727 ret = 0;
10728 else {
10729 if (ctxt->errNo != 0)
10730 ret = ctxt->errNo;
10731 else
10732 ret = -1;
10733 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010734 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010735 xmlFreeParserCtxt(ctxt);
10736
10737 return ret;
10738}
10739
10740/**
10741 * xmlCreateDocParserCtxt:
10742 * @cur: a pointer to an array of xmlChar
10743 *
10744 * Creates a parser context for an XML in-memory document.
10745 *
10746 * Returns the new parser context or NULL
10747 */
10748xmlParserCtxtPtr
10749xmlCreateDocParserCtxt(xmlChar *cur) {
10750 int len;
10751
10752 if (cur == NULL)
10753 return(NULL);
10754 len = xmlStrlen(cur);
10755 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10756}
10757
10758/**
10759 * xmlSAXParseDoc:
10760 * @sax: the SAX handler block
10761 * @cur: a pointer to an array of xmlChar
10762 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10763 * documents
10764 *
10765 * parse an XML in-memory document and build a tree.
10766 * It use the given SAX function block to handle the parsing callback.
10767 * If sax is NULL, fallback to the default DOM tree building routines.
10768 *
10769 * Returns the resulting document tree
10770 */
10771
10772xmlDocPtr
10773xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10774 xmlDocPtr ret;
10775 xmlParserCtxtPtr ctxt;
10776
10777 if (cur == NULL) return(NULL);
10778
10779
10780 ctxt = xmlCreateDocParserCtxt(cur);
10781 if (ctxt == NULL) return(NULL);
10782 if (sax != NULL) {
10783 ctxt->sax = sax;
10784 ctxt->userData = NULL;
10785 }
10786
10787 xmlParseDocument(ctxt);
10788 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10789 else {
10790 ret = NULL;
10791 xmlFreeDoc(ctxt->myDoc);
10792 ctxt->myDoc = NULL;
10793 }
10794 if (sax != NULL)
10795 ctxt->sax = NULL;
10796 xmlFreeParserCtxt(ctxt);
10797
10798 return(ret);
10799}
10800
10801/**
10802 * xmlParseDoc:
10803 * @cur: a pointer to an array of xmlChar
10804 *
10805 * parse an XML in-memory document and build a tree.
10806 *
10807 * Returns the resulting document tree
10808 */
10809
10810xmlDocPtr
10811xmlParseDoc(xmlChar *cur) {
10812 return(xmlSAXParseDoc(NULL, cur, 0));
10813}
10814
Daniel Veillard8107a222002-01-13 14:10:10 +000010815/************************************************************************
10816 * *
10817 * Specific function to keep track of entities references *
10818 * and used by the XSLT debugger *
10819 * *
10820 ************************************************************************/
10821
10822static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10823
10824/**
10825 * xmlAddEntityReference:
10826 * @ent : A valid entity
10827 * @firstNode : A valid first node for children of entity
10828 * @lastNode : A valid last node of children entity
10829 *
10830 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10831 */
10832static void
10833xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10834 xmlNodePtr lastNode)
10835{
10836 if (xmlEntityRefFunc != NULL) {
10837 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10838 }
10839}
10840
10841
10842/**
10843 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000010844 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000010845 *
10846 * Set the function to call call back when a xml reference has been made
10847 */
10848void
10849xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10850{
10851 xmlEntityRefFunc = func;
10852}
Owen Taylor3473f882001-02-23 17:55:21 +000010853
10854/************************************************************************
10855 * *
10856 * Miscellaneous *
10857 * *
10858 ************************************************************************/
10859
10860#ifdef LIBXML_XPATH_ENABLED
10861#include <libxml/xpath.h>
10862#endif
10863
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010864extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010865static int xmlParserInitialized = 0;
10866
10867/**
10868 * xmlInitParser:
10869 *
10870 * Initialization function for the XML parser.
10871 * This is not reentrant. Call once before processing in case of
10872 * use in multithreaded programs.
10873 */
10874
10875void
10876xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010877 if (xmlParserInitialized != 0)
10878 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010879
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010880 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10881 (xmlGenericError == NULL))
10882 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010883 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010884 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010885 xmlInitCharEncodingHandlers();
10886 xmlInitializePredefinedEntities();
10887 xmlDefaultSAXHandlerInit();
10888 xmlRegisterDefaultInputCallbacks();
10889 xmlRegisterDefaultOutputCallbacks();
10890#ifdef LIBXML_HTML_ENABLED
10891 htmlInitAutoClose();
10892 htmlDefaultSAXHandlerInit();
10893#endif
10894#ifdef LIBXML_XPATH_ENABLED
10895 xmlXPathInit();
10896#endif
10897 xmlParserInitialized = 1;
10898}
10899
10900/**
10901 * xmlCleanupParser:
10902 *
10903 * Cleanup function for the XML parser. It tries to reclaim all
10904 * parsing related global memory allocated for the parser processing.
10905 * It doesn't deallocate any document related memory. Calling this
10906 * function should not prevent reusing the parser.
10907 */
10908
10909void
10910xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010911 xmlCleanupCharEncodingHandlers();
10912 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010913#ifdef LIBXML_CATALOG_ENABLED
10914 xmlCatalogCleanup();
10915#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010916 xmlCleanupThreads();
10917 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010918}