blob: 66908264fca945b5d88eaf063df48d9771b788d8 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Daniel Veillard328f48c2002-11-15 15:24:34 +0000108static int
109xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
110 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000111/************************************************************************
112 * *
113 * Parser stacks related functions and macros *
114 * *
115 ************************************************************************/
116
117xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
118 const xmlChar ** str);
119
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000120/**
121 * inputPush:
122 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000123 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000124 *
125 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000126 *
127 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000129extern int
130inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
131{
132 if (ctxt->inputNr >= ctxt->inputMax) {
133 ctxt->inputMax *= 2;
134 ctxt->inputTab =
135 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
136 ctxt->inputMax *
137 sizeof(ctxt->inputTab[0]));
138 if (ctxt->inputTab == NULL) {
139 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
140 return (0);
141 }
142 }
143 ctxt->inputTab[ctxt->inputNr] = value;
144 ctxt->input = value;
145 return (ctxt->inputNr++);
146}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000148 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000149 * @ctxt: an XML parser context
150 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000151 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000152 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000153 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000154 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000155extern xmlParserInputPtr
156inputPop(xmlParserCtxtPtr ctxt)
157{
158 xmlParserInputPtr ret;
159
160 if (ctxt->inputNr <= 0)
161 return (0);
162 ctxt->inputNr--;
163 if (ctxt->inputNr > 0)
164 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
165 else
166 ctxt->input = NULL;
167 ret = ctxt->inputTab[ctxt->inputNr];
168 ctxt->inputTab[ctxt->inputNr] = 0;
169 return (ret);
170}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000171/**
172 * nodePush:
173 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000174 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000175 *
176 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000177 *
178 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000180extern int
181nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
182{
183 if (ctxt->nodeNr >= ctxt->nodeMax) {
184 ctxt->nodeMax *= 2;
185 ctxt->nodeTab =
186 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
187 ctxt->nodeMax *
188 sizeof(ctxt->nodeTab[0]));
189 if (ctxt->nodeTab == NULL) {
190 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
191 return (0);
192 }
193 }
194 ctxt->nodeTab[ctxt->nodeNr] = value;
195 ctxt->node = value;
196 return (ctxt->nodeNr++);
197}
198/**
199 * nodePop:
200 * @ctxt: an XML parser context
201 *
202 * Pops the top element node from the node stack
203 *
204 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000205 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000206extern xmlNodePtr
207nodePop(xmlParserCtxtPtr ctxt)
208{
209 xmlNodePtr ret;
210
211 if (ctxt->nodeNr <= 0)
212 return (0);
213 ctxt->nodeNr--;
214 if (ctxt->nodeNr > 0)
215 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
216 else
217 ctxt->node = NULL;
218 ret = ctxt->nodeTab[ctxt->nodeNr];
219 ctxt->nodeTab[ctxt->nodeNr] = 0;
220 return (ret);
221}
222/**
223 * namePush:
224 * @ctxt: an XML parser context
225 * @value: the element name
226 *
227 * Pushes a new element name on top of the name stack
228 *
229 * Returns 0 in case of error, the index in the stack otherwise
230 */
231extern int
232namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
233{
234 if (ctxt->nameNr >= ctxt->nameMax) {
235 ctxt->nameMax *= 2;
236 ctxt->nameTab =
237 (xmlChar * *)xmlRealloc(ctxt->nameTab,
238 ctxt->nameMax *
239 sizeof(ctxt->nameTab[0]));
240 if (ctxt->nameTab == NULL) {
241 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
242 return (0);
243 }
244 }
245 ctxt->nameTab[ctxt->nameNr] = value;
246 ctxt->name = value;
247 return (ctxt->nameNr++);
248}
249/**
250 * namePop:
251 * @ctxt: an XML parser context
252 *
253 * Pops the top element name from the name stack
254 *
255 * Returns the name just removed
256 */
257extern xmlChar *
258namePop(xmlParserCtxtPtr ctxt)
259{
260 xmlChar *ret;
261
262 if (ctxt->nameNr <= 0)
263 return (0);
264 ctxt->nameNr--;
265 if (ctxt->nameNr > 0)
266 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
267 else
268 ctxt->name = NULL;
269 ret = ctxt->nameTab[ctxt->nameNr];
270 ctxt->nameTab[ctxt->nameNr] = 0;
271 return (ret);
272}
Owen Taylor3473f882001-02-23 17:55:21 +0000273
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000274static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000275 if (ctxt->spaceNr >= ctxt->spaceMax) {
276 ctxt->spaceMax *= 2;
277 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
278 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
279 if (ctxt->spaceTab == NULL) {
280 xmlGenericError(xmlGenericErrorContext,
281 "realloc failed !\n");
282 return(0);
283 }
284 }
285 ctxt->spaceTab[ctxt->spaceNr] = val;
286 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
287 return(ctxt->spaceNr++);
288}
289
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000290static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000291 int ret;
292 if (ctxt->spaceNr <= 0) return(0);
293 ctxt->spaceNr--;
294 if (ctxt->spaceNr > 0)
295 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
296 else
297 ctxt->space = NULL;
298 ret = ctxt->spaceTab[ctxt->spaceNr];
299 ctxt->spaceTab[ctxt->spaceNr] = -1;
300 return(ret);
301}
302
303/*
304 * Macros for accessing the content. Those should be used only by the parser,
305 * and not exported.
306 *
307 * Dirty macros, i.e. one often need to make assumption on the context to
308 * use them
309 *
310 * CUR_PTR return the current pointer to the xmlChar to be parsed.
311 * To be used with extreme caution since operations consuming
312 * characters may move the input buffer to a different location !
313 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
314 * This should be used internally by the parser
315 * only to compare to ASCII values otherwise it would break when
316 * running with UTF-8 encoding.
317 * RAW same as CUR but in the input buffer, bypass any token
318 * extraction that may have been done
319 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
320 * to compare on ASCII based substring.
321 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
322 * strings within the parser.
323 *
324 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
325 *
326 * NEXT Skip to the next character, this does the proper decoding
327 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000328 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000329 * CUR_CHAR(l) returns the current unicode character (int), set l
330 * to the number of xmlChars used for the encoding [0-5].
331 * CUR_SCHAR same but operate on a string instead of the context
332 * COPY_BUF copy the current unicode char to the target buffer, increment
333 * the index
334 * GROW, SHRINK handling of input buffers
335 */
336
Daniel Veillardfdc91562002-07-01 21:52:03 +0000337#define RAW (*ctxt->input->cur)
338#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000339#define NXT(val) ctxt->input->cur[(val)]
340#define CUR_PTR ctxt->input->cur
341
342#define SKIP(val) do { \
343 ctxt->nbChars += (val),ctxt->input->cur += (val); \
344 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000345 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000346 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
347 xmlPopInput(ctxt); \
348 } while (0)
349
Daniel Veillard46de64e2002-05-29 08:21:33 +0000350#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
351 xmlSHRINK (ctxt);
352
353static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
354 xmlParserInputShrink(ctxt->input);
355 if ((*ctxt->input->cur == 0) &&
356 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
357 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000358 }
Owen Taylor3473f882001-02-23 17:55:21 +0000359
Daniel Veillard46de64e2002-05-29 08:21:33 +0000360#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
361 xmlGROW (ctxt);
362
363static void xmlGROW (xmlParserCtxtPtr ctxt) {
364 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
365 if ((*ctxt->input->cur == 0) &&
366 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
367 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000368 }
Owen Taylor3473f882001-02-23 17:55:21 +0000369
370#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
371
372#define NEXT xmlNextChar(ctxt)
373
Daniel Veillard21a0f912001-02-25 19:54:14 +0000374#define NEXT1 { \
375 ctxt->input->cur++; \
376 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000377 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000378 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
379 }
380
Owen Taylor3473f882001-02-23 17:55:21 +0000381#define NEXTL(l) do { \
382 if (*(ctxt->input->cur) == '\n') { \
383 ctxt->input->line++; ctxt->input->col = 1; \
384 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000385 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000386 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000387 } while (0)
388
389#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
390#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
391
392#define COPY_BUF(l,b,i,v) \
393 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000394 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000395
396/**
397 * xmlSkipBlankChars:
398 * @ctxt: the XML parser context
399 *
400 * skip all blanks character found at that point in the input streams.
401 * It pops up finished entities in the process if allowable at that point.
402 *
403 * Returns the number of space chars skipped
404 */
405
406int
407xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000408 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000409
410 /*
411 * It's Okay to use CUR/NEXT here since all the blanks are on
412 * the ASCII range.
413 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000414 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
415 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000416 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000417 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000418 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000419 cur = ctxt->input->cur;
420 while (IS_BLANK(*cur)) {
421 if (*cur == '\n') {
422 ctxt->input->line++; ctxt->input->col = 1;
423 }
424 cur++;
425 res++;
426 if (*cur == 0) {
427 ctxt->input->cur = cur;
428 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
429 cur = ctxt->input->cur;
430 }
431 }
432 ctxt->input->cur = cur;
433 } else {
434 int cur;
435 do {
436 cur = CUR;
437 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
438 NEXT;
439 cur = CUR;
440 res++;
441 }
442 while ((cur == 0) && (ctxt->inputNr > 1) &&
443 (ctxt->instate != XML_PARSER_COMMENT)) {
444 xmlPopInput(ctxt);
445 cur = CUR;
446 }
447 /*
448 * Need to handle support of entities branching here
449 */
450 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
451 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
452 }
Owen Taylor3473f882001-02-23 17:55:21 +0000453 return(res);
454}
455
456/************************************************************************
457 * *
458 * Commodity functions to handle entities *
459 * *
460 ************************************************************************/
461
462/**
463 * xmlPopInput:
464 * @ctxt: an XML parser context
465 *
466 * xmlPopInput: the current input pointed by ctxt->input came to an end
467 * pop it and return the next char.
468 *
469 * Returns the current xmlChar in the parser context
470 */
471xmlChar
472xmlPopInput(xmlParserCtxtPtr ctxt) {
473 if (ctxt->inputNr == 1) return(0); /* End of main Input */
474 if (xmlParserDebugEntities)
475 xmlGenericError(xmlGenericErrorContext,
476 "Popping input %d\n", ctxt->inputNr);
477 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000478 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000479 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
480 return(xmlPopInput(ctxt));
481 return(CUR);
482}
483
484/**
485 * xmlPushInput:
486 * @ctxt: an XML parser context
487 * @input: an XML parser input fragment (entity, XML fragment ...).
488 *
489 * xmlPushInput: switch to a new input stream which is stacked on top
490 * of the previous one(s).
491 */
492void
493xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
494 if (input == NULL) return;
495
496 if (xmlParserDebugEntities) {
497 if ((ctxt->input != NULL) && (ctxt->input->filename))
498 xmlGenericError(xmlGenericErrorContext,
499 "%s(%d): ", ctxt->input->filename,
500 ctxt->input->line);
501 xmlGenericError(xmlGenericErrorContext,
502 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
503 }
504 inputPush(ctxt, input);
505 GROW;
506}
507
508/**
509 * xmlParseCharRef:
510 * @ctxt: an XML parser context
511 *
512 * parse Reference declarations
513 *
514 * [66] CharRef ::= '&#' [0-9]+ ';' |
515 * '&#x' [0-9a-fA-F]+ ';'
516 *
517 * [ WFC: Legal Character ]
518 * Characters referred to using character references must match the
519 * production for Char.
520 *
521 * Returns the value parsed (as an int), 0 in case of error
522 */
523int
524xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000525 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000526 int count = 0;
527
Owen Taylor3473f882001-02-23 17:55:21 +0000528 /*
529 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
530 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000531 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000532 (NXT(2) == 'x')) {
533 SKIP(3);
534 GROW;
535 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000536 if (count++ > 20) {
537 count = 0;
538 GROW;
539 }
540 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000541 val = val * 16 + (CUR - '0');
542 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
543 val = val * 16 + (CUR - 'a') + 10;
544 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
545 val = val * 16 + (CUR - 'A') + 10;
546 else {
547 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
549 ctxt->sax->error(ctxt->userData,
550 "xmlParseCharRef: invalid hexadecimal value\n");
551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000553 val = 0;
554 break;
555 }
556 NEXT;
557 count++;
558 }
559 if (RAW == ';') {
560 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
561 ctxt->nbChars ++;
562 ctxt->input->cur++;
563 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000564 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000565 SKIP(2);
566 GROW;
567 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000568 if (count++ > 20) {
569 count = 0;
570 GROW;
571 }
572 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000573 val = val * 10 + (CUR - '0');
574 else {
575 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
577 ctxt->sax->error(ctxt->userData,
578 "xmlParseCharRef: invalid decimal value\n");
579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000581 val = 0;
582 break;
583 }
584 NEXT;
585 count++;
586 }
587 if (RAW == ';') {
588 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
589 ctxt->nbChars ++;
590 ctxt->input->cur++;
591 }
592 } else {
593 ctxt->errNo = XML_ERR_INVALID_CHARREF;
594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
595 ctxt->sax->error(ctxt->userData,
596 "xmlParseCharRef: invalid value\n");
597 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000599 }
600
601 /*
602 * [ WFC: Legal Character ]
603 * Characters referred to using character references must match the
604 * production for Char.
605 */
606 if (IS_CHAR(val)) {
607 return(val);
608 } else {
609 ctxt->errNo = XML_ERR_INVALID_CHAR;
610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000611 ctxt->sax->error(ctxt->userData,
612 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000613 val);
614 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000615 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000616 }
617 return(0);
618}
619
620/**
621 * xmlParseStringCharRef:
622 * @ctxt: an XML parser context
623 * @str: a pointer to an index in the string
624 *
625 * parse Reference declarations, variant parsing from a string rather
626 * than an an input flow.
627 *
628 * [66] CharRef ::= '&#' [0-9]+ ';' |
629 * '&#x' [0-9a-fA-F]+ ';'
630 *
631 * [ WFC: Legal Character ]
632 * Characters referred to using character references must match the
633 * production for Char.
634 *
635 * Returns the value parsed (as an int), 0 in case of error, str will be
636 * updated to the current value of the index
637 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000638static int
Owen Taylor3473f882001-02-23 17:55:21 +0000639xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
640 const xmlChar *ptr;
641 xmlChar cur;
642 int val = 0;
643
644 if ((str == NULL) || (*str == NULL)) return(0);
645 ptr = *str;
646 cur = *ptr;
647 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
648 ptr += 3;
649 cur = *ptr;
650 while (cur != ';') { /* Non input consuming loop */
651 if ((cur >= '0') && (cur <= '9'))
652 val = val * 16 + (cur - '0');
653 else if ((cur >= 'a') && (cur <= 'f'))
654 val = val * 16 + (cur - 'a') + 10;
655 else if ((cur >= 'A') && (cur <= 'F'))
656 val = val * 16 + (cur - 'A') + 10;
657 else {
658 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
660 ctxt->sax->error(ctxt->userData,
661 "xmlParseStringCharRef: invalid hexadecimal value\n");
662 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000663 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000664 val = 0;
665 break;
666 }
667 ptr++;
668 cur = *ptr;
669 }
670 if (cur == ';')
671 ptr++;
672 } else if ((cur == '&') && (ptr[1] == '#')){
673 ptr += 2;
674 cur = *ptr;
675 while (cur != ';') { /* Non input consuming loops */
676 if ((cur >= '0') && (cur <= '9'))
677 val = val * 10 + (cur - '0');
678 else {
679 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
681 ctxt->sax->error(ctxt->userData,
682 "xmlParseStringCharRef: invalid decimal value\n");
683 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000684 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000685 val = 0;
686 break;
687 }
688 ptr++;
689 cur = *ptr;
690 }
691 if (cur == ';')
692 ptr++;
693 } else {
694 ctxt->errNo = XML_ERR_INVALID_CHARREF;
695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
696 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000697 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000698 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000699 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000700 return(0);
701 }
702 *str = ptr;
703
704 /*
705 * [ WFC: Legal Character ]
706 * Characters referred to using character references must match the
707 * production for Char.
708 */
709 if (IS_CHAR(val)) {
710 return(val);
711 } else {
712 ctxt->errNo = XML_ERR_INVALID_CHAR;
713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
714 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000715 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000716 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000717 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000718 }
719 return(0);
720}
721
722/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000723 * xmlNewBlanksWrapperInputStream:
724 * @ctxt: an XML parser context
725 * @entity: an Entity pointer
726 *
727 * Create a new input stream for wrapping
728 * blanks around a PEReference
729 *
730 * Returns the new input stream or NULL
731 */
732
733static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
734
Daniel Veillardf4862f02002-09-10 11:13:43 +0000735static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000736xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
737 xmlParserInputPtr input;
738 xmlChar *buffer;
739 size_t length;
740 if (entity == NULL) {
741 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
743 ctxt->sax->error(ctxt->userData,
744 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
745 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
746 return(NULL);
747 }
748 if (xmlParserDebugEntities)
749 xmlGenericError(xmlGenericErrorContext,
750 "new blanks wrapper for entity: %s\n", entity->name);
751 input = xmlNewInputStream(ctxt);
752 if (input == NULL) {
753 return(NULL);
754 }
755 length = xmlStrlen(entity->name) + 5;
756 buffer = xmlMalloc(length);
757 if (buffer == NULL) {
758 return(NULL);
759 }
760 buffer [0] = ' ';
761 buffer [1] = '%';
762 buffer [length-3] = ';';
763 buffer [length-2] = ' ';
764 buffer [length-1] = 0;
765 memcpy(buffer + 2, entity->name, length - 5);
766 input->free = deallocblankswrapper;
767 input->base = buffer;
768 input->cur = buffer;
769 input->length = length;
770 input->end = &buffer[length];
771 return(input);
772}
773
774/**
Owen Taylor3473f882001-02-23 17:55:21 +0000775 * xmlParserHandlePEReference:
776 * @ctxt: the parser context
777 *
778 * [69] PEReference ::= '%' Name ';'
779 *
780 * [ WFC: No Recursion ]
781 * A parsed entity must not contain a recursive
782 * reference to itself, either directly or indirectly.
783 *
784 * [ WFC: Entity Declared ]
785 * In a document without any DTD, a document with only an internal DTD
786 * subset which contains no parameter entity references, or a document
787 * with "standalone='yes'", ... ... The declaration of a parameter
788 * entity must precede any reference to it...
789 *
790 * [ VC: Entity Declared ]
791 * In a document with an external subset or external parameter entities
792 * with "standalone='no'", ... ... The declaration of a parameter entity
793 * must precede any reference to it...
794 *
795 * [ WFC: In DTD ]
796 * Parameter-entity references may only appear in the DTD.
797 * NOTE: misleading but this is handled.
798 *
799 * A PEReference may have been detected in the current input stream
800 * the handling is done accordingly to
801 * http://www.w3.org/TR/REC-xml#entproc
802 * i.e.
803 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000804 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000805 */
806void
807xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
808 xmlChar *name;
809 xmlEntityPtr entity = NULL;
810 xmlParserInputPtr input;
811
Owen Taylor3473f882001-02-23 17:55:21 +0000812 if (RAW != '%') return;
813 switch(ctxt->instate) {
814 case XML_PARSER_CDATA_SECTION:
815 return;
816 case XML_PARSER_COMMENT:
817 return;
818 case XML_PARSER_START_TAG:
819 return;
820 case XML_PARSER_END_TAG:
821 return;
822 case XML_PARSER_EOF:
823 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
825 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
826 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000827 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000828 return;
829 case XML_PARSER_PROLOG:
830 case XML_PARSER_START:
831 case XML_PARSER_MISC:
832 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
834 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
835 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000836 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000837 return;
838 case XML_PARSER_ENTITY_DECL:
839 case XML_PARSER_CONTENT:
840 case XML_PARSER_ATTRIBUTE_VALUE:
841 case XML_PARSER_PI:
842 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000843 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000844 /* we just ignore it there */
845 return;
846 case XML_PARSER_EPILOG:
847 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
849 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
850 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000851 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000852 return;
853 case XML_PARSER_ENTITY_VALUE:
854 /*
855 * NOTE: in the case of entity values, we don't do the
856 * substitution here since we need the literal
857 * entity value to be able to save the internal
858 * subset of the document.
859 * This will be handled by xmlStringDecodeEntities
860 */
861 return;
862 case XML_PARSER_DTD:
863 /*
864 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
865 * In the internal DTD subset, parameter-entity references
866 * can occur only where markup declarations can occur, not
867 * within markup declarations.
868 * In that case this is handled in xmlParseMarkupDecl
869 */
870 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
871 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000872 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
873 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000874 break;
875 case XML_PARSER_IGNORE:
876 return;
877 }
878
879 NEXT;
880 name = xmlParseName(ctxt);
881 if (xmlParserDebugEntities)
882 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000883 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000884 if (name == NULL) {
885 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000887 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000888 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000889 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000890 } else {
891 if (RAW == ';') {
892 NEXT;
893 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
894 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
895 if (entity == NULL) {
896
897 /*
898 * [ WFC: Entity Declared ]
899 * In a document without any DTD, a document with only an
900 * internal DTD subset which contains no parameter entity
901 * references, or a document with "standalone='yes'", ...
902 * ... The declaration of a parameter entity must precede
903 * any reference to it...
904 */
905 if ((ctxt->standalone == 1) ||
906 ((ctxt->hasExternalSubset == 0) &&
907 (ctxt->hasPErefs == 0))) {
908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
909 ctxt->sax->error(ctxt->userData,
910 "PEReference: %%%s; not found\n", name);
911 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000912 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000913 } else {
914 /*
915 * [ VC: Entity Declared ]
916 * In a document with an external subset or external
917 * parameter entities with "standalone='no'", ...
918 * ... The declaration of a parameter entity must precede
919 * any reference to it...
920 */
921 if ((!ctxt->disableSAX) &&
922 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
923 ctxt->vctxt.error(ctxt->vctxt.userData,
924 "PEReference: %%%s; not found\n", name);
925 } else if ((!ctxt->disableSAX) &&
926 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
927 ctxt->sax->warning(ctxt->userData,
928 "PEReference: %%%s; not found\n", name);
929 ctxt->valid = 0;
930 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000931 } else if (ctxt->input->free != deallocblankswrapper) {
932 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
933 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000934 } else {
935 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
936 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000937 xmlChar start[4];
938 xmlCharEncoding enc;
939
Owen Taylor3473f882001-02-23 17:55:21 +0000940 /*
941 * handle the extra spaces added before and after
942 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000943 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000944 */
945 input = xmlNewEntityInputStream(ctxt, entity);
946 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000947
948 /*
949 * Get the 4 first bytes and decode the charset
950 * if enc != XML_CHAR_ENCODING_NONE
951 * plug some encoding conversion routines.
952 */
953 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000954 if (entity->length >= 4) {
955 start[0] = RAW;
956 start[1] = NXT(1);
957 start[2] = NXT(2);
958 start[3] = NXT(3);
959 enc = xmlDetectCharEncoding(start, 4);
960 if (enc != XML_CHAR_ENCODING_NONE) {
961 xmlSwitchEncoding(ctxt, enc);
962 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000963 }
964
Owen Taylor3473f882001-02-23 17:55:21 +0000965 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
966 (RAW == '<') && (NXT(1) == '?') &&
967 (NXT(2) == 'x') && (NXT(3) == 'm') &&
968 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
969 xmlParseTextDecl(ctxt);
970 }
Owen Taylor3473f882001-02-23 17:55:21 +0000971 } else {
972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
973 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000974 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000975 name);
976 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000977 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000978 }
979 }
980 } else {
981 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
983 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000984 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000985 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000986 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000987 }
988 xmlFree(name);
989 }
990}
991
992/*
993 * Macro used to grow the current buffer.
994 */
995#define growBuffer(buffer) { \
996 buffer##_size *= 2; \
997 buffer = (xmlChar *) \
998 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
999 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001000 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001001 return(NULL); \
1002 } \
1003}
1004
1005/**
1006 * xmlStringDecodeEntities:
1007 * @ctxt: the parser context
1008 * @str: the input string
1009 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1010 * @end: an end marker xmlChar, 0 if none
1011 * @end2: an end marker xmlChar, 0 if none
1012 * @end3: an end marker xmlChar, 0 if none
1013 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001014 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001015 *
1016 * [67] Reference ::= EntityRef | CharRef
1017 *
1018 * [69] PEReference ::= '%' Name ';'
1019 *
1020 * Returns A newly allocated string with the substitution done. The caller
1021 * must deallocate it !
1022 */
1023xmlChar *
1024xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1025 xmlChar end, xmlChar end2, xmlChar end3) {
1026 xmlChar *buffer = NULL;
1027 int buffer_size = 0;
1028
1029 xmlChar *current = NULL;
1030 xmlEntityPtr ent;
1031 int c,l;
1032 int nbchars = 0;
1033
1034 if (str == NULL)
1035 return(NULL);
1036
1037 if (ctxt->depth > 40) {
1038 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1040 ctxt->sax->error(ctxt->userData,
1041 "Detected entity reference loop\n");
1042 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001043 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001044 return(NULL);
1045 }
1046
1047 /*
1048 * allocate a translation buffer.
1049 */
1050 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1051 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1052 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001053 xmlGenericError(xmlGenericErrorContext,
1054 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001055 return(NULL);
1056 }
1057
1058 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001059 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001060 * we are operating on already parsed values.
1061 */
1062 c = CUR_SCHAR(str, l);
1063 while ((c != 0) && (c != end) && /* non input consuming loop */
1064 (c != end2) && (c != end3)) {
1065
1066 if (c == 0) break;
1067 if ((c == '&') && (str[1] == '#')) {
1068 int val = xmlParseStringCharRef(ctxt, &str);
1069 if (val != 0) {
1070 COPY_BUF(0,buffer,nbchars,val);
1071 }
1072 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1073 if (xmlParserDebugEntities)
1074 xmlGenericError(xmlGenericErrorContext,
1075 "String decoding Entity Reference: %.30s\n",
1076 str);
1077 ent = xmlParseStringEntityRef(ctxt, &str);
1078 if ((ent != NULL) &&
1079 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1080 if (ent->content != NULL) {
1081 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1082 } else {
1083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1084 ctxt->sax->error(ctxt->userData,
1085 "internal error entity has no content\n");
1086 }
1087 } else if ((ent != NULL) && (ent->content != NULL)) {
1088 xmlChar *rep;
1089
1090 ctxt->depth++;
1091 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1092 0, 0, 0);
1093 ctxt->depth--;
1094 if (rep != NULL) {
1095 current = rep;
1096 while (*current != 0) { /* non input consuming loop */
1097 buffer[nbchars++] = *current++;
1098 if (nbchars >
1099 buffer_size - XML_PARSER_BUFFER_SIZE) {
1100 growBuffer(buffer);
1101 }
1102 }
1103 xmlFree(rep);
1104 }
1105 } else if (ent != NULL) {
1106 int i = xmlStrlen(ent->name);
1107 const xmlChar *cur = ent->name;
1108
1109 buffer[nbchars++] = '&';
1110 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1111 growBuffer(buffer);
1112 }
1113 for (;i > 0;i--)
1114 buffer[nbchars++] = *cur++;
1115 buffer[nbchars++] = ';';
1116 }
1117 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1118 if (xmlParserDebugEntities)
1119 xmlGenericError(xmlGenericErrorContext,
1120 "String decoding PE Reference: %.30s\n", str);
1121 ent = xmlParseStringPEReference(ctxt, &str);
1122 if (ent != NULL) {
1123 xmlChar *rep;
1124
1125 ctxt->depth++;
1126 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1127 0, 0, 0);
1128 ctxt->depth--;
1129 if (rep != NULL) {
1130 current = rep;
1131 while (*current != 0) { /* non input consuming loop */
1132 buffer[nbchars++] = *current++;
1133 if (nbchars >
1134 buffer_size - XML_PARSER_BUFFER_SIZE) {
1135 growBuffer(buffer);
1136 }
1137 }
1138 xmlFree(rep);
1139 }
1140 }
1141 } else {
1142 COPY_BUF(l,buffer,nbchars,c);
1143 str += l;
1144 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1145 growBuffer(buffer);
1146 }
1147 }
1148 c = CUR_SCHAR(str, l);
1149 }
1150 buffer[nbchars++] = 0;
1151 return(buffer);
1152}
1153
1154
1155/************************************************************************
1156 * *
1157 * Commodity functions to handle xmlChars *
1158 * *
1159 ************************************************************************/
1160
1161/**
1162 * xmlStrndup:
1163 * @cur: the input xmlChar *
1164 * @len: the len of @cur
1165 *
1166 * a strndup for array of xmlChar's
1167 *
1168 * Returns a new xmlChar * or NULL
1169 */
1170xmlChar *
1171xmlStrndup(const xmlChar *cur, int len) {
1172 xmlChar *ret;
1173
1174 if ((cur == NULL) || (len < 0)) return(NULL);
1175 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1176 if (ret == NULL) {
1177 xmlGenericError(xmlGenericErrorContext,
1178 "malloc of %ld byte failed\n",
1179 (len + 1) * (long)sizeof(xmlChar));
1180 return(NULL);
1181 }
1182 memcpy(ret, cur, len * sizeof(xmlChar));
1183 ret[len] = 0;
1184 return(ret);
1185}
1186
1187/**
1188 * xmlStrdup:
1189 * @cur: the input xmlChar *
1190 *
1191 * a strdup for array of xmlChar's. Since they are supposed to be
1192 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1193 * a termination mark of '0'.
1194 *
1195 * Returns a new xmlChar * or NULL
1196 */
1197xmlChar *
1198xmlStrdup(const xmlChar *cur) {
1199 const xmlChar *p = cur;
1200
1201 if (cur == NULL) return(NULL);
1202 while (*p != 0) p++; /* non input consuming */
1203 return(xmlStrndup(cur, p - cur));
1204}
1205
1206/**
1207 * xmlCharStrndup:
1208 * @cur: the input char *
1209 * @len: the len of @cur
1210 *
1211 * a strndup for char's to xmlChar's
1212 *
1213 * Returns a new xmlChar * or NULL
1214 */
1215
1216xmlChar *
1217xmlCharStrndup(const char *cur, int len) {
1218 int i;
1219 xmlChar *ret;
1220
1221 if ((cur == NULL) || (len < 0)) return(NULL);
1222 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1223 if (ret == NULL) {
1224 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1225 (len + 1) * (long)sizeof(xmlChar));
1226 return(NULL);
1227 }
1228 for (i = 0;i < len;i++)
1229 ret[i] = (xmlChar) cur[i];
1230 ret[len] = 0;
1231 return(ret);
1232}
1233
1234/**
1235 * xmlCharStrdup:
1236 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001237 *
1238 * a strdup for char's to xmlChar's
1239 *
1240 * Returns a new xmlChar * or NULL
1241 */
1242
1243xmlChar *
1244xmlCharStrdup(const char *cur) {
1245 const char *p = cur;
1246
1247 if (cur == NULL) return(NULL);
1248 while (*p != '\0') p++; /* non input consuming */
1249 return(xmlCharStrndup(cur, p - cur));
1250}
1251
1252/**
1253 * xmlStrcmp:
1254 * @str1: the first xmlChar *
1255 * @str2: the second xmlChar *
1256 *
1257 * a strcmp for xmlChar's
1258 *
1259 * Returns the integer result of the comparison
1260 */
1261
1262int
1263xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1264 register int tmp;
1265
1266 if (str1 == str2) return(0);
1267 if (str1 == NULL) return(-1);
1268 if (str2 == NULL) return(1);
1269 do {
1270 tmp = *str1++ - *str2;
1271 if (tmp != 0) return(tmp);
1272 } while (*str2++ != 0);
1273 return 0;
1274}
1275
1276/**
1277 * xmlStrEqual:
1278 * @str1: the first xmlChar *
1279 * @str2: the second xmlChar *
1280 *
1281 * Check if both string are equal of have same content
1282 * Should be a bit more readable and faster than xmlStrEqual()
1283 *
1284 * Returns 1 if they are equal, 0 if they are different
1285 */
1286
1287int
1288xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1289 if (str1 == str2) return(1);
1290 if (str1 == NULL) return(0);
1291 if (str2 == NULL) return(0);
1292 do {
1293 if (*str1++ != *str2) return(0);
1294 } while (*str2++);
1295 return(1);
1296}
1297
1298/**
1299 * xmlStrncmp:
1300 * @str1: the first xmlChar *
1301 * @str2: the second xmlChar *
1302 * @len: the max comparison length
1303 *
1304 * a strncmp for xmlChar's
1305 *
1306 * Returns the integer result of the comparison
1307 */
1308
1309int
1310xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1311 register int tmp;
1312
1313 if (len <= 0) return(0);
1314 if (str1 == str2) return(0);
1315 if (str1 == NULL) return(-1);
1316 if (str2 == NULL) return(1);
1317 do {
1318 tmp = *str1++ - *str2;
1319 if (tmp != 0 || --len == 0) return(tmp);
1320 } while (*str2++ != 0);
1321 return 0;
1322}
1323
Daniel Veillardb44025c2001-10-11 22:55:55 +00001324static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001325 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1326 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1327 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1328 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1329 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1330 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1331 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1332 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1333 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1334 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1335 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1336 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1337 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1338 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1339 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1340 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1341 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1342 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1343 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1344 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1345 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1346 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1347 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1348 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1349 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1350 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1351 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1352 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1353 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1354 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1355 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1356 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1357};
1358
1359/**
1360 * xmlStrcasecmp:
1361 * @str1: the first xmlChar *
1362 * @str2: the second xmlChar *
1363 *
1364 * a strcasecmp for xmlChar's
1365 *
1366 * Returns the integer result of the comparison
1367 */
1368
1369int
1370xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1371 register int tmp;
1372
1373 if (str1 == str2) return(0);
1374 if (str1 == NULL) return(-1);
1375 if (str2 == NULL) return(1);
1376 do {
1377 tmp = casemap[*str1++] - casemap[*str2];
1378 if (tmp != 0) return(tmp);
1379 } while (*str2++ != 0);
1380 return 0;
1381}
1382
1383/**
1384 * xmlStrncasecmp:
1385 * @str1: the first xmlChar *
1386 * @str2: the second xmlChar *
1387 * @len: the max comparison length
1388 *
1389 * a strncasecmp for xmlChar's
1390 *
1391 * Returns the integer result of the comparison
1392 */
1393
1394int
1395xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1396 register int tmp;
1397
1398 if (len <= 0) return(0);
1399 if (str1 == str2) return(0);
1400 if (str1 == NULL) return(-1);
1401 if (str2 == NULL) return(1);
1402 do {
1403 tmp = casemap[*str1++] - casemap[*str2];
1404 if (tmp != 0 || --len == 0) return(tmp);
1405 } while (*str2++ != 0);
1406 return 0;
1407}
1408
1409/**
1410 * xmlStrchr:
1411 * @str: the xmlChar * array
1412 * @val: the xmlChar to search
1413 *
1414 * a strchr for xmlChar's
1415 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001416 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001417 */
1418
1419const xmlChar *
1420xmlStrchr(const xmlChar *str, xmlChar val) {
1421 if (str == NULL) return(NULL);
1422 while (*str != 0) { /* non input consuming */
1423 if (*str == val) return((xmlChar *) str);
1424 str++;
1425 }
1426 return(NULL);
1427}
1428
1429/**
1430 * xmlStrstr:
1431 * @str: the xmlChar * array (haystack)
1432 * @val: the xmlChar to search (needle)
1433 *
1434 * a strstr for xmlChar's
1435 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001436 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001437 */
1438
1439const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001440xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001441 int n;
1442
1443 if (str == NULL) return(NULL);
1444 if (val == NULL) return(NULL);
1445 n = xmlStrlen(val);
1446
1447 if (n == 0) return(str);
1448 while (*str != 0) { /* non input consuming */
1449 if (*str == *val) {
1450 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1451 }
1452 str++;
1453 }
1454 return(NULL);
1455}
1456
1457/**
1458 * xmlStrcasestr:
1459 * @str: the xmlChar * array (haystack)
1460 * @val: the xmlChar to search (needle)
1461 *
1462 * a case-ignoring strstr for xmlChar's
1463 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001464 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001465 */
1466
1467const xmlChar *
1468xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1469 int n;
1470
1471 if (str == NULL) return(NULL);
1472 if (val == NULL) return(NULL);
1473 n = xmlStrlen(val);
1474
1475 if (n == 0) return(str);
1476 while (*str != 0) { /* non input consuming */
1477 if (casemap[*str] == casemap[*val])
1478 if (!xmlStrncasecmp(str, val, n)) return(str);
1479 str++;
1480 }
1481 return(NULL);
1482}
1483
1484/**
1485 * xmlStrsub:
1486 * @str: the xmlChar * array (haystack)
1487 * @start: the index of the first char (zero based)
1488 * @len: the length of the substring
1489 *
1490 * Extract a substring of a given string
1491 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001492 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001493 */
1494
1495xmlChar *
1496xmlStrsub(const xmlChar *str, int start, int len) {
1497 int i;
1498
1499 if (str == NULL) return(NULL);
1500 if (start < 0) return(NULL);
1501 if (len < 0) return(NULL);
1502
1503 for (i = 0;i < start;i++) {
1504 if (*str == 0) return(NULL);
1505 str++;
1506 }
1507 if (*str == 0) return(NULL);
1508 return(xmlStrndup(str, len));
1509}
1510
1511/**
1512 * xmlStrlen:
1513 * @str: the xmlChar * array
1514 *
1515 * length of a xmlChar's string
1516 *
1517 * Returns the number of xmlChar contained in the ARRAY.
1518 */
1519
1520int
1521xmlStrlen(const xmlChar *str) {
1522 int len = 0;
1523
1524 if (str == NULL) return(0);
1525 while (*str != 0) { /* non input consuming */
1526 str++;
1527 len++;
1528 }
1529 return(len);
1530}
1531
1532/**
1533 * xmlStrncat:
1534 * @cur: the original xmlChar * array
1535 * @add: the xmlChar * array added
1536 * @len: the length of @add
1537 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001538 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001539 * first bytes of @add.
1540 *
1541 * Returns a new xmlChar *, the original @cur is reallocated if needed
1542 * and should not be freed
1543 */
1544
1545xmlChar *
1546xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1547 int size;
1548 xmlChar *ret;
1549
1550 if ((add == NULL) || (len == 0))
1551 return(cur);
1552 if (cur == NULL)
1553 return(xmlStrndup(add, len));
1554
1555 size = xmlStrlen(cur);
1556 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1557 if (ret == NULL) {
1558 xmlGenericError(xmlGenericErrorContext,
1559 "xmlStrncat: realloc of %ld byte failed\n",
1560 (size + len + 1) * (long)sizeof(xmlChar));
1561 return(cur);
1562 }
1563 memcpy(&ret[size], add, len * sizeof(xmlChar));
1564 ret[size + len] = 0;
1565 return(ret);
1566}
1567
1568/**
1569 * xmlStrcat:
1570 * @cur: the original xmlChar * array
1571 * @add: the xmlChar * array added
1572 *
1573 * a strcat for array of xmlChar's. Since they are supposed to be
1574 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1575 * a termination mark of '0'.
1576 *
1577 * Returns a new xmlChar * containing the concatenated string.
1578 */
1579xmlChar *
1580xmlStrcat(xmlChar *cur, const xmlChar *add) {
1581 const xmlChar *p = add;
1582
1583 if (add == NULL) return(cur);
1584 if (cur == NULL)
1585 return(xmlStrdup(add));
1586
1587 while (*p != 0) p++; /* non input consuming */
1588 return(xmlStrncat(cur, add, p - add));
1589}
1590
1591/************************************************************************
1592 * *
1593 * Commodity functions, cleanup needed ? *
1594 * *
1595 ************************************************************************/
1596
1597/**
1598 * areBlanks:
1599 * @ctxt: an XML parser context
1600 * @str: a xmlChar *
1601 * @len: the size of @str
1602 *
1603 * Is this a sequence of blank chars that one can ignore ?
1604 *
1605 * Returns 1 if ignorable 0 otherwise.
1606 */
1607
1608static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1609 int i, ret;
1610 xmlNodePtr lastChild;
1611
Daniel Veillard05c13a22001-09-09 08:38:09 +00001612 /*
1613 * Don't spend time trying to differentiate them, the same callback is
1614 * used !
1615 */
1616 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001617 return(0);
1618
Owen Taylor3473f882001-02-23 17:55:21 +00001619 /*
1620 * Check for xml:space value.
1621 */
1622 if (*(ctxt->space) == 1)
1623 return(0);
1624
1625 /*
1626 * Check that the string is made of blanks
1627 */
1628 for (i = 0;i < len;i++)
1629 if (!(IS_BLANK(str[i]))) return(0);
1630
1631 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001632 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001633 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001634 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001635 if (ctxt->myDoc != NULL) {
1636 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1637 if (ret == 0) return(1);
1638 if (ret == 1) return(0);
1639 }
1640
1641 /*
1642 * Otherwise, heuristic :-\
1643 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001644 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001645 if ((ctxt->node->children == NULL) &&
1646 (RAW == '<') && (NXT(1) == '/')) return(0);
1647
1648 lastChild = xmlGetLastChild(ctxt->node);
1649 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001650 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1651 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001652 } else if (xmlNodeIsText(lastChild))
1653 return(0);
1654 else if ((ctxt->node->children != NULL) &&
1655 (xmlNodeIsText(ctxt->node->children)))
1656 return(0);
1657 return(1);
1658}
1659
Owen Taylor3473f882001-02-23 17:55:21 +00001660/************************************************************************
1661 * *
1662 * Extra stuff for namespace support *
1663 * Relates to http://www.w3.org/TR/WD-xml-names *
1664 * *
1665 ************************************************************************/
1666
1667/**
1668 * xmlSplitQName:
1669 * @ctxt: an XML parser context
1670 * @name: an XML parser context
1671 * @prefix: a xmlChar **
1672 *
1673 * parse an UTF8 encoded XML qualified name string
1674 *
1675 * [NS 5] QName ::= (Prefix ':')? LocalPart
1676 *
1677 * [NS 6] Prefix ::= NCName
1678 *
1679 * [NS 7] LocalPart ::= NCName
1680 *
1681 * Returns the local part, and prefix is updated
1682 * to get the Prefix if any.
1683 */
1684
1685xmlChar *
1686xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1687 xmlChar buf[XML_MAX_NAMELEN + 5];
1688 xmlChar *buffer = NULL;
1689 int len = 0;
1690 int max = XML_MAX_NAMELEN;
1691 xmlChar *ret = NULL;
1692 const xmlChar *cur = name;
1693 int c;
1694
1695 *prefix = NULL;
1696
1697#ifndef XML_XML_NAMESPACE
1698 /* xml: prefix is not really a namespace */
1699 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1700 (cur[2] == 'l') && (cur[3] == ':'))
1701 return(xmlStrdup(name));
1702#endif
1703
1704 /* nasty but valid */
1705 if (cur[0] == ':')
1706 return(xmlStrdup(name));
1707
1708 c = *cur++;
1709 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1710 buf[len++] = c;
1711 c = *cur++;
1712 }
1713 if (len >= max) {
1714 /*
1715 * Okay someone managed to make a huge name, so he's ready to pay
1716 * for the processing speed.
1717 */
1718 max = len * 2;
1719
1720 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1721 if (buffer == NULL) {
1722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1723 ctxt->sax->error(ctxt->userData,
1724 "xmlSplitQName: out of memory\n");
1725 return(NULL);
1726 }
1727 memcpy(buffer, buf, len);
1728 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1729 if (len + 10 > max) {
1730 max *= 2;
1731 buffer = (xmlChar *) xmlRealloc(buffer,
1732 max * sizeof(xmlChar));
1733 if (buffer == NULL) {
1734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1735 ctxt->sax->error(ctxt->userData,
1736 "xmlSplitQName: out of memory\n");
1737 return(NULL);
1738 }
1739 }
1740 buffer[len++] = c;
1741 c = *cur++;
1742 }
1743 buffer[len] = 0;
1744 }
1745
1746 if (buffer == NULL)
1747 ret = xmlStrndup(buf, len);
1748 else {
1749 ret = buffer;
1750 buffer = NULL;
1751 max = XML_MAX_NAMELEN;
1752 }
1753
1754
1755 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001756 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001757 if (c == 0) return(ret);
1758 *prefix = ret;
1759 len = 0;
1760
Daniel Veillardbb284f42002-10-16 18:02:47 +00001761 /*
1762 * Check that the first character is proper to start
1763 * a new name
1764 */
1765 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1766 ((c >= 0x41) && (c <= 0x5A)) ||
1767 (c == '_') || (c == ':'))) {
1768 int l;
1769 int first = CUR_SCHAR(cur, l);
1770
1771 if (!IS_LETTER(first) && (first != '_')) {
1772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1773 ctxt->sax->error(ctxt->userData,
1774 "Name %s is not XML Namespace compliant\n",
1775 name);
1776 }
1777 }
1778 cur++;
1779
Owen Taylor3473f882001-02-23 17:55:21 +00001780 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1781 buf[len++] = c;
1782 c = *cur++;
1783 }
1784 if (len >= max) {
1785 /*
1786 * Okay someone managed to make a huge name, so he's ready to pay
1787 * for the processing speed.
1788 */
1789 max = len * 2;
1790
1791 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1792 if (buffer == NULL) {
1793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1794 ctxt->sax->error(ctxt->userData,
1795 "xmlSplitQName: out of memory\n");
1796 return(NULL);
1797 }
1798 memcpy(buffer, buf, len);
1799 while (c != 0) { /* tested bigname2.xml */
1800 if (len + 10 > max) {
1801 max *= 2;
1802 buffer = (xmlChar *) xmlRealloc(buffer,
1803 max * sizeof(xmlChar));
1804 if (buffer == NULL) {
1805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1806 ctxt->sax->error(ctxt->userData,
1807 "xmlSplitQName: out of memory\n");
1808 return(NULL);
1809 }
1810 }
1811 buffer[len++] = c;
1812 c = *cur++;
1813 }
1814 buffer[len] = 0;
1815 }
1816
1817 if (buffer == NULL)
1818 ret = xmlStrndup(buf, len);
1819 else {
1820 ret = buffer;
1821 }
1822 }
1823
1824 return(ret);
1825}
1826
1827/************************************************************************
1828 * *
1829 * The parser itself *
1830 * Relates to http://www.w3.org/TR/REC-xml *
1831 * *
1832 ************************************************************************/
1833
Daniel Veillard76d66f42001-05-16 21:05:17 +00001834static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001835/**
1836 * xmlParseName:
1837 * @ctxt: an XML parser context
1838 *
1839 * parse an XML name.
1840 *
1841 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1842 * CombiningChar | Extender
1843 *
1844 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1845 *
1846 * [6] Names ::= Name (S Name)*
1847 *
1848 * Returns the Name parsed or NULL
1849 */
1850
1851xmlChar *
1852xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001853 const xmlChar *in;
1854 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001855 int count = 0;
1856
1857 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001858
1859 /*
1860 * Accelerator for simple ASCII names
1861 */
1862 in = ctxt->input->cur;
1863 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1864 ((*in >= 0x41) && (*in <= 0x5A)) ||
1865 (*in == '_') || (*in == ':')) {
1866 in++;
1867 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1868 ((*in >= 0x41) && (*in <= 0x5A)) ||
1869 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001870 (*in == '_') || (*in == '-') ||
1871 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001872 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001873 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001874 count = in - ctxt->input->cur;
1875 ret = xmlStrndup(ctxt->input->cur, count);
1876 ctxt->input->cur = in;
1877 return(ret);
1878 }
1879 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001880 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001881}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001882
Daniel Veillard46de64e2002-05-29 08:21:33 +00001883/**
1884 * xmlParseNameAndCompare:
1885 * @ctxt: an XML parser context
1886 *
1887 * parse an XML name and compares for match
1888 * (specialized for endtag parsing)
1889 *
1890 *
1891 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1892 * and the name for mismatch
1893 */
1894
Daniel Veillardf4862f02002-09-10 11:13:43 +00001895static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001896xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1897 const xmlChar *cmp = other;
1898 const xmlChar *in;
1899 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001900
1901 GROW;
1902
1903 in = ctxt->input->cur;
1904 while (*in != 0 && *in == *cmp) {
1905 ++in;
1906 ++cmp;
1907 }
1908 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1909 /* success */
1910 ctxt->input->cur = in;
1911 return (xmlChar*) 1;
1912 }
1913 /* failure (or end of input buffer), check with full function */
1914 ret = xmlParseName (ctxt);
1915 if (ret != 0 && xmlStrEqual (ret, other)) {
1916 xmlFree (ret);
1917 return (xmlChar*) 1;
1918 }
1919 return ret;
1920}
1921
Daniel Veillard76d66f42001-05-16 21:05:17 +00001922static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001923xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1924 xmlChar buf[XML_MAX_NAMELEN + 5];
1925 int len = 0, l;
1926 int c;
1927 int count = 0;
1928
1929 /*
1930 * Handler for more complex cases
1931 */
1932 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001933 c = CUR_CHAR(l);
1934 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1935 (!IS_LETTER(c) && (c != '_') &&
1936 (c != ':'))) {
1937 return(NULL);
1938 }
1939
1940 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1941 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1942 (c == '.') || (c == '-') ||
1943 (c == '_') || (c == ':') ||
1944 (IS_COMBINING(c)) ||
1945 (IS_EXTENDER(c)))) {
1946 if (count++ > 100) {
1947 count = 0;
1948 GROW;
1949 }
1950 COPY_BUF(l,buf,len,c);
1951 NEXTL(l);
1952 c = CUR_CHAR(l);
1953 if (len >= XML_MAX_NAMELEN) {
1954 /*
1955 * Okay someone managed to make a huge name, so he's ready to pay
1956 * for the processing speed.
1957 */
1958 xmlChar *buffer;
1959 int max = len * 2;
1960
1961 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1962 if (buffer == NULL) {
1963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1964 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001965 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001966 return(NULL);
1967 }
1968 memcpy(buffer, buf, len);
1969 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1970 (c == '.') || (c == '-') ||
1971 (c == '_') || (c == ':') ||
1972 (IS_COMBINING(c)) ||
1973 (IS_EXTENDER(c))) {
1974 if (count++ > 100) {
1975 count = 0;
1976 GROW;
1977 }
1978 if (len + 10 > max) {
1979 max *= 2;
1980 buffer = (xmlChar *) xmlRealloc(buffer,
1981 max * sizeof(xmlChar));
1982 if (buffer == NULL) {
1983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1984 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001985 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001986 return(NULL);
1987 }
1988 }
1989 COPY_BUF(l,buffer,len,c);
1990 NEXTL(l);
1991 c = CUR_CHAR(l);
1992 }
1993 buffer[len] = 0;
1994 return(buffer);
1995 }
1996 }
1997 return(xmlStrndup(buf, len));
1998}
1999
2000/**
2001 * xmlParseStringName:
2002 * @ctxt: an XML parser context
2003 * @str: a pointer to the string pointer (IN/OUT)
2004 *
2005 * parse an XML name.
2006 *
2007 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2008 * CombiningChar | Extender
2009 *
2010 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2011 *
2012 * [6] Names ::= Name (S Name)*
2013 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002014 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002015 * is updated to the current location in the string.
2016 */
2017
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002018static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002019xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2020 xmlChar buf[XML_MAX_NAMELEN + 5];
2021 const xmlChar *cur = *str;
2022 int len = 0, l;
2023 int c;
2024
2025 c = CUR_SCHAR(cur, l);
2026 if (!IS_LETTER(c) && (c != '_') &&
2027 (c != ':')) {
2028 return(NULL);
2029 }
2030
2031 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2032 (c == '.') || (c == '-') ||
2033 (c == '_') || (c == ':') ||
2034 (IS_COMBINING(c)) ||
2035 (IS_EXTENDER(c))) {
2036 COPY_BUF(l,buf,len,c);
2037 cur += l;
2038 c = CUR_SCHAR(cur, l);
2039 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2040 /*
2041 * Okay someone managed to make a huge name, so he's ready to pay
2042 * for the processing speed.
2043 */
2044 xmlChar *buffer;
2045 int max = len * 2;
2046
2047 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2048 if (buffer == NULL) {
2049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2050 ctxt->sax->error(ctxt->userData,
2051 "xmlParseStringName: out of memory\n");
2052 return(NULL);
2053 }
2054 memcpy(buffer, buf, len);
2055 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2056 (c == '.') || (c == '-') ||
2057 (c == '_') || (c == ':') ||
2058 (IS_COMBINING(c)) ||
2059 (IS_EXTENDER(c))) {
2060 if (len + 10 > max) {
2061 max *= 2;
2062 buffer = (xmlChar *) xmlRealloc(buffer,
2063 max * sizeof(xmlChar));
2064 if (buffer == NULL) {
2065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2066 ctxt->sax->error(ctxt->userData,
2067 "xmlParseStringName: out of memory\n");
2068 return(NULL);
2069 }
2070 }
2071 COPY_BUF(l,buffer,len,c);
2072 cur += l;
2073 c = CUR_SCHAR(cur, l);
2074 }
2075 buffer[len] = 0;
2076 *str = cur;
2077 return(buffer);
2078 }
2079 }
2080 *str = cur;
2081 return(xmlStrndup(buf, len));
2082}
2083
2084/**
2085 * xmlParseNmtoken:
2086 * @ctxt: an XML parser context
2087 *
2088 * parse an XML Nmtoken.
2089 *
2090 * [7] Nmtoken ::= (NameChar)+
2091 *
2092 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2093 *
2094 * Returns the Nmtoken parsed or NULL
2095 */
2096
2097xmlChar *
2098xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2099 xmlChar buf[XML_MAX_NAMELEN + 5];
2100 int len = 0, l;
2101 int c;
2102 int count = 0;
2103
2104 GROW;
2105 c = CUR_CHAR(l);
2106
2107 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2108 (c == '.') || (c == '-') ||
2109 (c == '_') || (c == ':') ||
2110 (IS_COMBINING(c)) ||
2111 (IS_EXTENDER(c))) {
2112 if (count++ > 100) {
2113 count = 0;
2114 GROW;
2115 }
2116 COPY_BUF(l,buf,len,c);
2117 NEXTL(l);
2118 c = CUR_CHAR(l);
2119 if (len >= XML_MAX_NAMELEN) {
2120 /*
2121 * Okay someone managed to make a huge token, so he's ready to pay
2122 * for the processing speed.
2123 */
2124 xmlChar *buffer;
2125 int max = len * 2;
2126
2127 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2128 if (buffer == NULL) {
2129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2130 ctxt->sax->error(ctxt->userData,
2131 "xmlParseNmtoken: out of memory\n");
2132 return(NULL);
2133 }
2134 memcpy(buffer, buf, len);
2135 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2136 (c == '.') || (c == '-') ||
2137 (c == '_') || (c == ':') ||
2138 (IS_COMBINING(c)) ||
2139 (IS_EXTENDER(c))) {
2140 if (count++ > 100) {
2141 count = 0;
2142 GROW;
2143 }
2144 if (len + 10 > max) {
2145 max *= 2;
2146 buffer = (xmlChar *) xmlRealloc(buffer,
2147 max * sizeof(xmlChar));
2148 if (buffer == NULL) {
2149 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2150 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002151 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002152 return(NULL);
2153 }
2154 }
2155 COPY_BUF(l,buffer,len,c);
2156 NEXTL(l);
2157 c = CUR_CHAR(l);
2158 }
2159 buffer[len] = 0;
2160 return(buffer);
2161 }
2162 }
2163 if (len == 0)
2164 return(NULL);
2165 return(xmlStrndup(buf, len));
2166}
2167
2168/**
2169 * xmlParseEntityValue:
2170 * @ctxt: an XML parser context
2171 * @orig: if non-NULL store a copy of the original entity value
2172 *
2173 * parse a value for ENTITY declarations
2174 *
2175 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2176 * "'" ([^%&'] | PEReference | Reference)* "'"
2177 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002178 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002179 */
2180
2181xmlChar *
2182xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2183 xmlChar *buf = NULL;
2184 int len = 0;
2185 int size = XML_PARSER_BUFFER_SIZE;
2186 int c, l;
2187 xmlChar stop;
2188 xmlChar *ret = NULL;
2189 const xmlChar *cur = NULL;
2190 xmlParserInputPtr input;
2191
2192 if (RAW == '"') stop = '"';
2193 else if (RAW == '\'') stop = '\'';
2194 else {
2195 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2197 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2198 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002199 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002200 return(NULL);
2201 }
2202 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2203 if (buf == NULL) {
2204 xmlGenericError(xmlGenericErrorContext,
2205 "malloc of %d byte failed\n", size);
2206 return(NULL);
2207 }
2208
2209 /*
2210 * The content of the entity definition is copied in a buffer.
2211 */
2212
2213 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2214 input = ctxt->input;
2215 GROW;
2216 NEXT;
2217 c = CUR_CHAR(l);
2218 /*
2219 * NOTE: 4.4.5 Included in Literal
2220 * When a parameter entity reference appears in a literal entity
2221 * value, ... a single or double quote character in the replacement
2222 * text is always treated as a normal data character and will not
2223 * terminate the literal.
2224 * In practice it means we stop the loop only when back at parsing
2225 * the initial entity and the quote is found
2226 */
2227 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2228 (ctxt->input != input))) {
2229 if (len + 5 >= size) {
2230 size *= 2;
2231 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2232 if (buf == NULL) {
2233 xmlGenericError(xmlGenericErrorContext,
2234 "realloc of %d byte failed\n", size);
2235 return(NULL);
2236 }
2237 }
2238 COPY_BUF(l,buf,len,c);
2239 NEXTL(l);
2240 /*
2241 * Pop-up of finished entities.
2242 */
2243 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2244 xmlPopInput(ctxt);
2245
2246 GROW;
2247 c = CUR_CHAR(l);
2248 if (c == 0) {
2249 GROW;
2250 c = CUR_CHAR(l);
2251 }
2252 }
2253 buf[len] = 0;
2254
2255 /*
2256 * Raise problem w.r.t. '&' and '%' being used in non-entities
2257 * reference constructs. Note Charref will be handled in
2258 * xmlStringDecodeEntities()
2259 */
2260 cur = buf;
2261 while (*cur != 0) { /* non input consuming */
2262 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2263 xmlChar *name;
2264 xmlChar tmp = *cur;
2265
2266 cur++;
2267 name = xmlParseStringName(ctxt, &cur);
2268 if ((name == NULL) || (*cur != ';')) {
2269 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2271 ctxt->sax->error(ctxt->userData,
2272 "EntityValue: '%c' forbidden except for entities references\n",
2273 tmp);
2274 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002275 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002276 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002277 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2278 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002279 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2281 ctxt->sax->error(ctxt->userData,
2282 "EntityValue: PEReferences forbidden in internal subset\n",
2283 tmp);
2284 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002285 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002286 }
2287 if (name != NULL)
2288 xmlFree(name);
2289 }
2290 cur++;
2291 }
2292
2293 /*
2294 * Then PEReference entities are substituted.
2295 */
2296 if (c != stop) {
2297 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2298 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2299 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2300 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002301 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002302 xmlFree(buf);
2303 } else {
2304 NEXT;
2305 /*
2306 * NOTE: 4.4.7 Bypassed
2307 * When a general entity reference appears in the EntityValue in
2308 * an entity declaration, it is bypassed and left as is.
2309 * so XML_SUBSTITUTE_REF is not set here.
2310 */
2311 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2312 0, 0, 0);
2313 if (orig != NULL)
2314 *orig = buf;
2315 else
2316 xmlFree(buf);
2317 }
2318
2319 return(ret);
2320}
2321
2322/**
2323 * xmlParseAttValue:
2324 * @ctxt: an XML parser context
2325 *
2326 * parse a value for an attribute
2327 * Note: the parser won't do substitution of entities here, this
2328 * will be handled later in xmlStringGetNodeList
2329 *
2330 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2331 * "'" ([^<&'] | Reference)* "'"
2332 *
2333 * 3.3.3 Attribute-Value Normalization:
2334 * Before the value of an attribute is passed to the application or
2335 * checked for validity, the XML processor must normalize it as follows:
2336 * - a character reference is processed by appending the referenced
2337 * character to the attribute value
2338 * - an entity reference is processed by recursively processing the
2339 * replacement text of the entity
2340 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2341 * appending #x20 to the normalized value, except that only a single
2342 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2343 * parsed entity or the literal entity value of an internal parsed entity
2344 * - other characters are processed by appending them to the normalized value
2345 * If the declared value is not CDATA, then the XML processor must further
2346 * process the normalized attribute value by discarding any leading and
2347 * trailing space (#x20) characters, and by replacing sequences of space
2348 * (#x20) characters by a single space (#x20) character.
2349 * All attributes for which no declaration has been read should be treated
2350 * by a non-validating parser as if declared CDATA.
2351 *
2352 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2353 */
2354
2355xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002356xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2357
2358xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002359xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2360 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002361 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002362 xmlChar *ret = NULL;
2363 SHRINK;
2364 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002365 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002366 if (*in != '"' && *in != '\'') {
2367 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2369 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2370 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002371 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002372 return(NULL);
2373 }
2374 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2375 limit = *in;
2376 ++in;
2377
2378 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2379 *in != '&' && *in != '<'
2380 ) {
2381 ++in;
2382 }
2383 if (*in != limit) {
2384 return xmlParseAttValueComplex(ctxt);
2385 }
2386 ++in;
2387 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2388 CUR_PTR = in;
2389 return ret;
2390}
2391
Daniel Veillard01c13b52002-12-10 15:19:08 +00002392/**
2393 * xmlParseAttValueComplex:
2394 * @ctxt: an XML parser context
2395 *
2396 * parse a value for an attribute, this is the fallback function
2397 * of xmlParseAttValue() when the attribute parsing requires handling
2398 * of non-ASCII characters.
2399 *
2400 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2401 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002402xmlChar *
2403xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2404 xmlChar limit = 0;
2405 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002406 int len = 0;
2407 int buf_size = 0;
2408 int c, l;
2409 xmlChar *current = NULL;
2410 xmlEntityPtr ent;
2411
2412
2413 SHRINK;
2414 if (NXT(0) == '"') {
2415 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2416 limit = '"';
2417 NEXT;
2418 } else if (NXT(0) == '\'') {
2419 limit = '\'';
2420 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2421 NEXT;
2422 } else {
2423 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2424 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2425 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2426 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002427 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002428 return(NULL);
2429 }
2430
2431 /*
2432 * allocate a translation buffer.
2433 */
2434 buf_size = XML_PARSER_BUFFER_SIZE;
2435 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2436 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002437 xmlGenericError(xmlGenericErrorContext,
2438 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002439 return(NULL);
2440 }
2441
2442 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002443 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002444 */
2445 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002446 while ((NXT(0) != limit) && /* checked */
2447 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002448 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002449 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002450 if (NXT(1) == '#') {
2451 int val = xmlParseCharRef(ctxt);
2452 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002453 if (ctxt->replaceEntities) {
2454 if (len > buf_size - 10) {
2455 growBuffer(buf);
2456 }
2457 buf[len++] = '&';
2458 } else {
2459 /*
2460 * The reparsing will be done in xmlStringGetNodeList()
2461 * called by the attribute() function in SAX.c
2462 */
2463 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002464
Daniel Veillard319a7422001-09-11 09:27:09 +00002465 if (len > buf_size - 10) {
2466 growBuffer(buf);
2467 }
2468 current = &buffer[0];
2469 while (*current != 0) { /* non input consuming */
2470 buf[len++] = *current++;
2471 }
Owen Taylor3473f882001-02-23 17:55:21 +00002472 }
2473 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002474 if (len > buf_size - 10) {
2475 growBuffer(buf);
2476 }
Owen Taylor3473f882001-02-23 17:55:21 +00002477 len += xmlCopyChar(0, &buf[len], val);
2478 }
2479 } else {
2480 ent = xmlParseEntityRef(ctxt);
2481 if ((ent != NULL) &&
2482 (ctxt->replaceEntities != 0)) {
2483 xmlChar *rep;
2484
2485 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2486 rep = xmlStringDecodeEntities(ctxt, ent->content,
2487 XML_SUBSTITUTE_REF, 0, 0, 0);
2488 if (rep != NULL) {
2489 current = rep;
2490 while (*current != 0) { /* non input consuming */
2491 buf[len++] = *current++;
2492 if (len > buf_size - 10) {
2493 growBuffer(buf);
2494 }
2495 }
2496 xmlFree(rep);
2497 }
2498 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002499 if (len > buf_size - 10) {
2500 growBuffer(buf);
2501 }
Owen Taylor3473f882001-02-23 17:55:21 +00002502 if (ent->content != NULL)
2503 buf[len++] = ent->content[0];
2504 }
2505 } else if (ent != NULL) {
2506 int i = xmlStrlen(ent->name);
2507 const xmlChar *cur = ent->name;
2508
2509 /*
2510 * This may look absurd but is needed to detect
2511 * entities problems
2512 */
2513 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2514 (ent->content != NULL)) {
2515 xmlChar *rep;
2516 rep = xmlStringDecodeEntities(ctxt, ent->content,
2517 XML_SUBSTITUTE_REF, 0, 0, 0);
2518 if (rep != NULL)
2519 xmlFree(rep);
2520 }
2521
2522 /*
2523 * Just output the reference
2524 */
2525 buf[len++] = '&';
2526 if (len > buf_size - i - 10) {
2527 growBuffer(buf);
2528 }
2529 for (;i > 0;i--)
2530 buf[len++] = *cur++;
2531 buf[len++] = ';';
2532 }
2533 }
2534 } else {
2535 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2536 COPY_BUF(l,buf,len,0x20);
2537 if (len > buf_size - 10) {
2538 growBuffer(buf);
2539 }
2540 } else {
2541 COPY_BUF(l,buf,len,c);
2542 if (len > buf_size - 10) {
2543 growBuffer(buf);
2544 }
2545 }
2546 NEXTL(l);
2547 }
2548 GROW;
2549 c = CUR_CHAR(l);
2550 }
2551 buf[len++] = 0;
2552 if (RAW == '<') {
2553 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2555 ctxt->sax->error(ctxt->userData,
2556 "Unescaped '<' not allowed in attributes values\n");
2557 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002558 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002559 } else if (RAW != limit) {
2560 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2562 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2563 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002564 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002565 } else
2566 NEXT;
2567 return(buf);
2568}
2569
2570/**
2571 * xmlParseSystemLiteral:
2572 * @ctxt: an XML parser context
2573 *
2574 * parse an XML Literal
2575 *
2576 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2577 *
2578 * Returns the SystemLiteral parsed or NULL
2579 */
2580
2581xmlChar *
2582xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2583 xmlChar *buf = NULL;
2584 int len = 0;
2585 int size = XML_PARSER_BUFFER_SIZE;
2586 int cur, l;
2587 xmlChar stop;
2588 int state = ctxt->instate;
2589 int count = 0;
2590
2591 SHRINK;
2592 if (RAW == '"') {
2593 NEXT;
2594 stop = '"';
2595 } else if (RAW == '\'') {
2596 NEXT;
2597 stop = '\'';
2598 } else {
2599 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2601 ctxt->sax->error(ctxt->userData,
2602 "SystemLiteral \" or ' expected\n");
2603 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002604 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002605 return(NULL);
2606 }
2607
2608 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2609 if (buf == NULL) {
2610 xmlGenericError(xmlGenericErrorContext,
2611 "malloc of %d byte failed\n", size);
2612 return(NULL);
2613 }
2614 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2615 cur = CUR_CHAR(l);
2616 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2617 if (len + 5 >= size) {
2618 size *= 2;
2619 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2620 if (buf == NULL) {
2621 xmlGenericError(xmlGenericErrorContext,
2622 "realloc of %d byte failed\n", size);
2623 ctxt->instate = (xmlParserInputState) state;
2624 return(NULL);
2625 }
2626 }
2627 count++;
2628 if (count > 50) {
2629 GROW;
2630 count = 0;
2631 }
2632 COPY_BUF(l,buf,len,cur);
2633 NEXTL(l);
2634 cur = CUR_CHAR(l);
2635 if (cur == 0) {
2636 GROW;
2637 SHRINK;
2638 cur = CUR_CHAR(l);
2639 }
2640 }
2641 buf[len] = 0;
2642 ctxt->instate = (xmlParserInputState) state;
2643 if (!IS_CHAR(cur)) {
2644 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2646 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2647 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002648 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002649 } else {
2650 NEXT;
2651 }
2652 return(buf);
2653}
2654
2655/**
2656 * xmlParsePubidLiteral:
2657 * @ctxt: an XML parser context
2658 *
2659 * parse an XML public literal
2660 *
2661 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2662 *
2663 * Returns the PubidLiteral parsed or NULL.
2664 */
2665
2666xmlChar *
2667xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2668 xmlChar *buf = NULL;
2669 int len = 0;
2670 int size = XML_PARSER_BUFFER_SIZE;
2671 xmlChar cur;
2672 xmlChar stop;
2673 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002674 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002675
2676 SHRINK;
2677 if (RAW == '"') {
2678 NEXT;
2679 stop = '"';
2680 } else if (RAW == '\'') {
2681 NEXT;
2682 stop = '\'';
2683 } else {
2684 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2686 ctxt->sax->error(ctxt->userData,
2687 "SystemLiteral \" or ' expected\n");
2688 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002689 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002690 return(NULL);
2691 }
2692 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2693 if (buf == NULL) {
2694 xmlGenericError(xmlGenericErrorContext,
2695 "malloc of %d byte failed\n", size);
2696 return(NULL);
2697 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002698 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002699 cur = CUR;
2700 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2701 if (len + 1 >= size) {
2702 size *= 2;
2703 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2704 if (buf == NULL) {
2705 xmlGenericError(xmlGenericErrorContext,
2706 "realloc of %d byte failed\n", size);
2707 return(NULL);
2708 }
2709 }
2710 buf[len++] = cur;
2711 count++;
2712 if (count > 50) {
2713 GROW;
2714 count = 0;
2715 }
2716 NEXT;
2717 cur = CUR;
2718 if (cur == 0) {
2719 GROW;
2720 SHRINK;
2721 cur = CUR;
2722 }
2723 }
2724 buf[len] = 0;
2725 if (cur != stop) {
2726 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2728 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2729 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002730 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002731 } else {
2732 NEXT;
2733 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002734 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002735 return(buf);
2736}
2737
Daniel Veillard48b2f892001-02-25 16:11:03 +00002738void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002739/**
2740 * xmlParseCharData:
2741 * @ctxt: an XML parser context
2742 * @cdata: int indicating whether we are within a CDATA section
2743 *
2744 * parse a CharData section.
2745 * if we are within a CDATA section ']]>' marks an end of section.
2746 *
2747 * The right angle bracket (>) may be represented using the string "&gt;",
2748 * and must, for compatibility, be escaped using "&gt;" or a character
2749 * reference when it appears in the string "]]>" in content, when that
2750 * string is not marking the end of a CDATA section.
2751 *
2752 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2753 */
2754
2755void
2756xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002757 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002758 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002759 int line = ctxt->input->line;
2760 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002761
2762 SHRINK;
2763 GROW;
2764 /*
2765 * Accelerated common case where input don't need to be
2766 * modified before passing it to the handler.
2767 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002768 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002769 in = ctxt->input->cur;
2770 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002771get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002772 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2773 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002774 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002775 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002776 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002777 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002778 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002779 ctxt->input->line++;
2780 in++;
2781 }
2782 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002783 }
2784 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002785 if ((in[1] == ']') && (in[2] == '>')) {
2786 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2788 ctxt->sax->error(ctxt->userData,
2789 "Sequence ']]>' not allowed in content\n");
2790 ctxt->input->cur = in;
2791 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002792 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002793 return;
2794 }
2795 in++;
2796 goto get_more;
2797 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002798 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002799 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002800 if (IS_BLANK(*ctxt->input->cur)) {
2801 const xmlChar *tmp = ctxt->input->cur;
2802 ctxt->input->cur = in;
2803 if (areBlanks(ctxt, tmp, nbchar)) {
2804 if (ctxt->sax->ignorableWhitespace != NULL)
2805 ctxt->sax->ignorableWhitespace(ctxt->userData,
2806 tmp, nbchar);
2807 } else {
2808 if (ctxt->sax->characters != NULL)
2809 ctxt->sax->characters(ctxt->userData,
2810 tmp, nbchar);
2811 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002812 line = ctxt->input->line;
2813 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002814 } else {
2815 if (ctxt->sax->characters != NULL)
2816 ctxt->sax->characters(ctxt->userData,
2817 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002818 line = ctxt->input->line;
2819 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002820 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002821 }
2822 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002823 if (*in == 0xD) {
2824 in++;
2825 if (*in == 0xA) {
2826 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002827 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002828 ctxt->input->line++;
2829 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002830 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002831 in--;
2832 }
2833 if (*in == '<') {
2834 return;
2835 }
2836 if (*in == '&') {
2837 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002838 }
2839 SHRINK;
2840 GROW;
2841 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002842 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002843 nbchar = 0;
2844 }
Daniel Veillard50582112001-03-26 22:52:16 +00002845 ctxt->input->line = line;
2846 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002847 xmlParseCharDataComplex(ctxt, cdata);
2848}
2849
Daniel Veillard01c13b52002-12-10 15:19:08 +00002850/**
2851 * xmlParseCharDataComplex:
2852 * @ctxt: an XML parser context
2853 * @cdata: int indicating whether we are within a CDATA section
2854 *
2855 * parse a CharData section.this is the fallback function
2856 * of xmlParseCharData() when the parsing requires handling
2857 * of non-ASCII characters.
2858 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002859void
2860xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002861 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2862 int nbchar = 0;
2863 int cur, l;
2864 int count = 0;
2865
2866 SHRINK;
2867 GROW;
2868 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002869 while ((cur != '<') && /* checked */
2870 (cur != '&') &&
2871 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002872 if ((cur == ']') && (NXT(1) == ']') &&
2873 (NXT(2) == '>')) {
2874 if (cdata) break;
2875 else {
2876 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2878 ctxt->sax->error(ctxt->userData,
2879 "Sequence ']]>' not allowed in content\n");
2880 /* Should this be relaxed ??? I see a "must here */
2881 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002882 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002883 }
2884 }
2885 COPY_BUF(l,buf,nbchar,cur);
2886 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2887 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002888 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002889 */
2890 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2891 if (areBlanks(ctxt, buf, nbchar)) {
2892 if (ctxt->sax->ignorableWhitespace != NULL)
2893 ctxt->sax->ignorableWhitespace(ctxt->userData,
2894 buf, nbchar);
2895 } else {
2896 if (ctxt->sax->characters != NULL)
2897 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2898 }
2899 }
2900 nbchar = 0;
2901 }
2902 count++;
2903 if (count > 50) {
2904 GROW;
2905 count = 0;
2906 }
2907 NEXTL(l);
2908 cur = CUR_CHAR(l);
2909 }
2910 if (nbchar != 0) {
2911 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002912 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002913 */
2914 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2915 if (areBlanks(ctxt, buf, nbchar)) {
2916 if (ctxt->sax->ignorableWhitespace != NULL)
2917 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2918 } else {
2919 if (ctxt->sax->characters != NULL)
2920 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2921 }
2922 }
2923 }
2924}
2925
2926/**
2927 * xmlParseExternalID:
2928 * @ctxt: an XML parser context
2929 * @publicID: a xmlChar** receiving PubidLiteral
2930 * @strict: indicate whether we should restrict parsing to only
2931 * production [75], see NOTE below
2932 *
2933 * Parse an External ID or a Public ID
2934 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002935 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002936 * 'PUBLIC' S PubidLiteral S SystemLiteral
2937 *
2938 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2939 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2940 *
2941 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2942 *
2943 * Returns the function returns SystemLiteral and in the second
2944 * case publicID receives PubidLiteral, is strict is off
2945 * it is possible to return NULL and have publicID set.
2946 */
2947
2948xmlChar *
2949xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2950 xmlChar *URI = NULL;
2951
2952 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002953
2954 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002955 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2956 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2957 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2958 SKIP(6);
2959 if (!IS_BLANK(CUR)) {
2960 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2961 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2962 ctxt->sax->error(ctxt->userData,
2963 "Space required after 'SYSTEM'\n");
2964 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002965 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002966 }
2967 SKIP_BLANKS;
2968 URI = xmlParseSystemLiteral(ctxt);
2969 if (URI == NULL) {
2970 ctxt->errNo = XML_ERR_URI_REQUIRED;
2971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2972 ctxt->sax->error(ctxt->userData,
2973 "xmlParseExternalID: SYSTEM, no URI\n");
2974 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002975 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002976 }
2977 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2978 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2979 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2980 SKIP(6);
2981 if (!IS_BLANK(CUR)) {
2982 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2984 ctxt->sax->error(ctxt->userData,
2985 "Space required after 'PUBLIC'\n");
2986 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002987 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002988 }
2989 SKIP_BLANKS;
2990 *publicID = xmlParsePubidLiteral(ctxt);
2991 if (*publicID == NULL) {
2992 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2994 ctxt->sax->error(ctxt->userData,
2995 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2996 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002997 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002998 }
2999 if (strict) {
3000 /*
3001 * We don't handle [83] so "S SystemLiteral" is required.
3002 */
3003 if (!IS_BLANK(CUR)) {
3004 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3006 ctxt->sax->error(ctxt->userData,
3007 "Space required after the Public Identifier\n");
3008 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003009 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003010 }
3011 } else {
3012 /*
3013 * We handle [83] so we return immediately, if
3014 * "S SystemLiteral" is not detected. From a purely parsing
3015 * point of view that's a nice mess.
3016 */
3017 const xmlChar *ptr;
3018 GROW;
3019
3020 ptr = CUR_PTR;
3021 if (!IS_BLANK(*ptr)) return(NULL);
3022
3023 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3024 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3025 }
3026 SKIP_BLANKS;
3027 URI = xmlParseSystemLiteral(ctxt);
3028 if (URI == NULL) {
3029 ctxt->errNo = XML_ERR_URI_REQUIRED;
3030 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3031 ctxt->sax->error(ctxt->userData,
3032 "xmlParseExternalID: PUBLIC, no URI\n");
3033 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003034 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003035 }
3036 }
3037 return(URI);
3038}
3039
3040/**
3041 * xmlParseComment:
3042 * @ctxt: an XML parser context
3043 *
3044 * Skip an XML (SGML) comment <!-- .... -->
3045 * The spec says that "For compatibility, the string "--" (double-hyphen)
3046 * must not occur within comments. "
3047 *
3048 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3049 */
3050void
3051xmlParseComment(xmlParserCtxtPtr ctxt) {
3052 xmlChar *buf = NULL;
3053 int len;
3054 int size = XML_PARSER_BUFFER_SIZE;
3055 int q, ql;
3056 int r, rl;
3057 int cur, l;
3058 xmlParserInputState state;
3059 xmlParserInputPtr input = ctxt->input;
3060 int count = 0;
3061
3062 /*
3063 * Check that there is a comment right here.
3064 */
3065 if ((RAW != '<') || (NXT(1) != '!') ||
3066 (NXT(2) != '-') || (NXT(3) != '-')) return;
3067
3068 state = ctxt->instate;
3069 ctxt->instate = XML_PARSER_COMMENT;
3070 SHRINK;
3071 SKIP(4);
3072 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3073 if (buf == NULL) {
3074 xmlGenericError(xmlGenericErrorContext,
3075 "malloc of %d byte failed\n", size);
3076 ctxt->instate = state;
3077 return;
3078 }
3079 q = CUR_CHAR(ql);
3080 NEXTL(ql);
3081 r = CUR_CHAR(rl);
3082 NEXTL(rl);
3083 cur = CUR_CHAR(l);
3084 len = 0;
3085 while (IS_CHAR(cur) && /* checked */
3086 ((cur != '>') ||
3087 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003088 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003089 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3091 ctxt->sax->error(ctxt->userData,
3092 "Comment must not contain '--' (double-hyphen)`\n");
3093 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003094 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003095 }
3096 if (len + 5 >= size) {
3097 size *= 2;
3098 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3099 if (buf == NULL) {
3100 xmlGenericError(xmlGenericErrorContext,
3101 "realloc of %d byte failed\n", size);
3102 ctxt->instate = state;
3103 return;
3104 }
3105 }
3106 COPY_BUF(ql,buf,len,q);
3107 q = r;
3108 ql = rl;
3109 r = cur;
3110 rl = l;
3111
3112 count++;
3113 if (count > 50) {
3114 GROW;
3115 count = 0;
3116 }
3117 NEXTL(l);
3118 cur = CUR_CHAR(l);
3119 if (cur == 0) {
3120 SHRINK;
3121 GROW;
3122 cur = CUR_CHAR(l);
3123 }
3124 }
3125 buf[len] = 0;
3126 if (!IS_CHAR(cur)) {
3127 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3129 ctxt->sax->error(ctxt->userData,
3130 "Comment not terminated \n<!--%.50s\n", buf);
3131 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003132 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003133 xmlFree(buf);
3134 } else {
3135 if (input != ctxt->input) {
3136 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3138 ctxt->sax->error(ctxt->userData,
3139"Comment doesn't start and stop in the same entity\n");
3140 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003141 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003142 }
3143 NEXT;
3144 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3145 (!ctxt->disableSAX))
3146 ctxt->sax->comment(ctxt->userData, buf);
3147 xmlFree(buf);
3148 }
3149 ctxt->instate = state;
3150}
3151
3152/**
3153 * xmlParsePITarget:
3154 * @ctxt: an XML parser context
3155 *
3156 * parse the name of a PI
3157 *
3158 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3159 *
3160 * Returns the PITarget name or NULL
3161 */
3162
3163xmlChar *
3164xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3165 xmlChar *name;
3166
3167 name = xmlParseName(ctxt);
3168 if ((name != NULL) &&
3169 ((name[0] == 'x') || (name[0] == 'X')) &&
3170 ((name[1] == 'm') || (name[1] == 'M')) &&
3171 ((name[2] == 'l') || (name[2] == 'L'))) {
3172 int i;
3173 if ((name[0] == 'x') && (name[1] == 'm') &&
3174 (name[2] == 'l') && (name[3] == 0)) {
3175 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3177 ctxt->sax->error(ctxt->userData,
3178 "XML declaration allowed only at the start of the document\n");
3179 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003180 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003181 return(name);
3182 } else if (name[3] == 0) {
3183 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3185 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3186 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003187 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003188 return(name);
3189 }
3190 for (i = 0;;i++) {
3191 if (xmlW3CPIs[i] == NULL) break;
3192 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3193 return(name);
3194 }
3195 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3196 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3197 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003198 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003199 }
3200 }
3201 return(name);
3202}
3203
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003204#ifdef LIBXML_CATALOG_ENABLED
3205/**
3206 * xmlParseCatalogPI:
3207 * @ctxt: an XML parser context
3208 * @catalog: the PI value string
3209 *
3210 * parse an XML Catalog Processing Instruction.
3211 *
3212 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3213 *
3214 * Occurs only if allowed by the user and if happening in the Misc
3215 * part of the document before any doctype informations
3216 * This will add the given catalog to the parsing context in order
3217 * to be used if there is a resolution need further down in the document
3218 */
3219
3220static void
3221xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3222 xmlChar *URL = NULL;
3223 const xmlChar *tmp, *base;
3224 xmlChar marker;
3225
3226 tmp = catalog;
3227 while (IS_BLANK(*tmp)) tmp++;
3228 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3229 goto error;
3230 tmp += 7;
3231 while (IS_BLANK(*tmp)) tmp++;
3232 if (*tmp != '=') {
3233 return;
3234 }
3235 tmp++;
3236 while (IS_BLANK(*tmp)) tmp++;
3237 marker = *tmp;
3238 if ((marker != '\'') && (marker != '"'))
3239 goto error;
3240 tmp++;
3241 base = tmp;
3242 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3243 if (*tmp == 0)
3244 goto error;
3245 URL = xmlStrndup(base, tmp - base);
3246 tmp++;
3247 while (IS_BLANK(*tmp)) tmp++;
3248 if (*tmp != 0)
3249 goto error;
3250
3251 if (URL != NULL) {
3252 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3253 xmlFree(URL);
3254 }
3255 return;
3256
3257error:
3258 ctxt->errNo = XML_WAR_CATALOG_PI;
3259 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3260 ctxt->sax->warning(ctxt->userData,
3261 "Catalog PI syntax error: %s\n", catalog);
3262 if (URL != NULL)
3263 xmlFree(URL);
3264}
3265#endif
3266
Owen Taylor3473f882001-02-23 17:55:21 +00003267/**
3268 * xmlParsePI:
3269 * @ctxt: an XML parser context
3270 *
3271 * parse an XML Processing Instruction.
3272 *
3273 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3274 *
3275 * The processing is transfered to SAX once parsed.
3276 */
3277
3278void
3279xmlParsePI(xmlParserCtxtPtr ctxt) {
3280 xmlChar *buf = NULL;
3281 int len = 0;
3282 int size = XML_PARSER_BUFFER_SIZE;
3283 int cur, l;
3284 xmlChar *target;
3285 xmlParserInputState state;
3286 int count = 0;
3287
3288 if ((RAW == '<') && (NXT(1) == '?')) {
3289 xmlParserInputPtr input = ctxt->input;
3290 state = ctxt->instate;
3291 ctxt->instate = XML_PARSER_PI;
3292 /*
3293 * this is a Processing Instruction.
3294 */
3295 SKIP(2);
3296 SHRINK;
3297
3298 /*
3299 * Parse the target name and check for special support like
3300 * namespace.
3301 */
3302 target = xmlParsePITarget(ctxt);
3303 if (target != NULL) {
3304 if ((RAW == '?') && (NXT(1) == '>')) {
3305 if (input != ctxt->input) {
3306 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3307 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3308 ctxt->sax->error(ctxt->userData,
3309 "PI declaration doesn't start and stop in the same entity\n");
3310 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003311 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003312 }
3313 SKIP(2);
3314
3315 /*
3316 * SAX: PI detected.
3317 */
3318 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3319 (ctxt->sax->processingInstruction != NULL))
3320 ctxt->sax->processingInstruction(ctxt->userData,
3321 target, NULL);
3322 ctxt->instate = state;
3323 xmlFree(target);
3324 return;
3325 }
3326 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3327 if (buf == NULL) {
3328 xmlGenericError(xmlGenericErrorContext,
3329 "malloc of %d byte failed\n", size);
3330 ctxt->instate = state;
3331 return;
3332 }
3333 cur = CUR;
3334 if (!IS_BLANK(cur)) {
3335 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3337 ctxt->sax->error(ctxt->userData,
3338 "xmlParsePI: PI %s space expected\n", target);
3339 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003340 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003341 }
3342 SKIP_BLANKS;
3343 cur = CUR_CHAR(l);
3344 while (IS_CHAR(cur) && /* checked */
3345 ((cur != '?') || (NXT(1) != '>'))) {
3346 if (len + 5 >= size) {
3347 size *= 2;
3348 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3349 if (buf == NULL) {
3350 xmlGenericError(xmlGenericErrorContext,
3351 "realloc of %d byte failed\n", size);
3352 ctxt->instate = state;
3353 return;
3354 }
3355 }
3356 count++;
3357 if (count > 50) {
3358 GROW;
3359 count = 0;
3360 }
3361 COPY_BUF(l,buf,len,cur);
3362 NEXTL(l);
3363 cur = CUR_CHAR(l);
3364 if (cur == 0) {
3365 SHRINK;
3366 GROW;
3367 cur = CUR_CHAR(l);
3368 }
3369 }
3370 buf[len] = 0;
3371 if (cur != '?') {
3372 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3374 ctxt->sax->error(ctxt->userData,
3375 "xmlParsePI: PI %s never end ...\n", target);
3376 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003377 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003378 } else {
3379 if (input != ctxt->input) {
3380 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3382 ctxt->sax->error(ctxt->userData,
3383 "PI declaration doesn't start and stop in the same entity\n");
3384 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003385 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003386 }
3387 SKIP(2);
3388
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003389#ifdef LIBXML_CATALOG_ENABLED
3390 if (((state == XML_PARSER_MISC) ||
3391 (state == XML_PARSER_START)) &&
3392 (xmlStrEqual(target, XML_CATALOG_PI))) {
3393 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3394 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3395 (allow == XML_CATA_ALLOW_ALL))
3396 xmlParseCatalogPI(ctxt, buf);
3397 }
3398#endif
3399
3400
Owen Taylor3473f882001-02-23 17:55:21 +00003401 /*
3402 * SAX: PI detected.
3403 */
3404 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3405 (ctxt->sax->processingInstruction != NULL))
3406 ctxt->sax->processingInstruction(ctxt->userData,
3407 target, buf);
3408 }
3409 xmlFree(buf);
3410 xmlFree(target);
3411 } else {
3412 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3414 ctxt->sax->error(ctxt->userData,
3415 "xmlParsePI : no target name\n");
3416 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003417 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003418 }
3419 ctxt->instate = state;
3420 }
3421}
3422
3423/**
3424 * xmlParseNotationDecl:
3425 * @ctxt: an XML parser context
3426 *
3427 * parse a notation declaration
3428 *
3429 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3430 *
3431 * Hence there is actually 3 choices:
3432 * 'PUBLIC' S PubidLiteral
3433 * 'PUBLIC' S PubidLiteral S SystemLiteral
3434 * and 'SYSTEM' S SystemLiteral
3435 *
3436 * See the NOTE on xmlParseExternalID().
3437 */
3438
3439void
3440xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3441 xmlChar *name;
3442 xmlChar *Pubid;
3443 xmlChar *Systemid;
3444
3445 if ((RAW == '<') && (NXT(1) == '!') &&
3446 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3447 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3448 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3449 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3450 xmlParserInputPtr input = ctxt->input;
3451 SHRINK;
3452 SKIP(10);
3453 if (!IS_BLANK(CUR)) {
3454 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3456 ctxt->sax->error(ctxt->userData,
3457 "Space required after '<!NOTATION'\n");
3458 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003459 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003460 return;
3461 }
3462 SKIP_BLANKS;
3463
Daniel Veillard76d66f42001-05-16 21:05:17 +00003464 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003465 if (name == NULL) {
3466 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3468 ctxt->sax->error(ctxt->userData,
3469 "NOTATION: Name expected here\n");
3470 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003471 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003472 return;
3473 }
3474 if (!IS_BLANK(CUR)) {
3475 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3477 ctxt->sax->error(ctxt->userData,
3478 "Space required after the NOTATION name'\n");
3479 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003480 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003481 return;
3482 }
3483 SKIP_BLANKS;
3484
3485 /*
3486 * Parse the IDs.
3487 */
3488 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3489 SKIP_BLANKS;
3490
3491 if (RAW == '>') {
3492 if (input != ctxt->input) {
3493 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3495 ctxt->sax->error(ctxt->userData,
3496"Notation declaration doesn't start and stop in the same entity\n");
3497 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003498 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003499 }
3500 NEXT;
3501 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3502 (ctxt->sax->notationDecl != NULL))
3503 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3504 } else {
3505 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3507 ctxt->sax->error(ctxt->userData,
3508 "'>' required to close NOTATION declaration\n");
3509 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003510 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003511 }
3512 xmlFree(name);
3513 if (Systemid != NULL) xmlFree(Systemid);
3514 if (Pubid != NULL) xmlFree(Pubid);
3515 }
3516}
3517
3518/**
3519 * xmlParseEntityDecl:
3520 * @ctxt: an XML parser context
3521 *
3522 * parse <!ENTITY declarations
3523 *
3524 * [70] EntityDecl ::= GEDecl | PEDecl
3525 *
3526 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3527 *
3528 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3529 *
3530 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3531 *
3532 * [74] PEDef ::= EntityValue | ExternalID
3533 *
3534 * [76] NDataDecl ::= S 'NDATA' S Name
3535 *
3536 * [ VC: Notation Declared ]
3537 * The Name must match the declared name of a notation.
3538 */
3539
3540void
3541xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3542 xmlChar *name = NULL;
3543 xmlChar *value = NULL;
3544 xmlChar *URI = NULL, *literal = NULL;
3545 xmlChar *ndata = NULL;
3546 int isParameter = 0;
3547 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003548 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003549
3550 GROW;
3551 if ((RAW == '<') && (NXT(1) == '!') &&
3552 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3553 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3554 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3555 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003556 SHRINK;
3557 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003558 skipped = SKIP_BLANKS;
3559 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003560 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3562 ctxt->sax->error(ctxt->userData,
3563 "Space required after '<!ENTITY'\n");
3564 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003565 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003566 }
Owen Taylor3473f882001-02-23 17:55:21 +00003567
3568 if (RAW == '%') {
3569 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003570 skipped = SKIP_BLANKS;
3571 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003572 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3574 ctxt->sax->error(ctxt->userData,
3575 "Space required after '%'\n");
3576 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003577 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003578 }
Owen Taylor3473f882001-02-23 17:55:21 +00003579 isParameter = 1;
3580 }
3581
Daniel Veillard76d66f42001-05-16 21:05:17 +00003582 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003583 if (name == NULL) {
3584 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3586 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3587 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003588 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003589 return;
3590 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003591 skipped = SKIP_BLANKS;
3592 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003593 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3595 ctxt->sax->error(ctxt->userData,
3596 "Space required after the entity name\n");
3597 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003599 }
Owen Taylor3473f882001-02-23 17:55:21 +00003600
Daniel Veillardf5582f12002-06-11 10:08:16 +00003601 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003602 /*
3603 * handle the various case of definitions...
3604 */
3605 if (isParameter) {
3606 if ((RAW == '"') || (RAW == '\'')) {
3607 value = xmlParseEntityValue(ctxt, &orig);
3608 if (value) {
3609 if ((ctxt->sax != NULL) &&
3610 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3611 ctxt->sax->entityDecl(ctxt->userData, name,
3612 XML_INTERNAL_PARAMETER_ENTITY,
3613 NULL, NULL, value);
3614 }
3615 } else {
3616 URI = xmlParseExternalID(ctxt, &literal, 1);
3617 if ((URI == NULL) && (literal == NULL)) {
3618 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3620 ctxt->sax->error(ctxt->userData,
3621 "Entity value required\n");
3622 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003623 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003624 }
3625 if (URI) {
3626 xmlURIPtr uri;
3627
3628 uri = xmlParseURI((const char *) URI);
3629 if (uri == NULL) {
3630 ctxt->errNo = XML_ERR_INVALID_URI;
3631 if ((ctxt->sax != NULL) &&
3632 (!ctxt->disableSAX) &&
3633 (ctxt->sax->error != NULL))
3634 ctxt->sax->error(ctxt->userData,
3635 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003636 /*
3637 * This really ought to be a well formedness error
3638 * but the XML Core WG decided otherwise c.f. issue
3639 * E26 of the XML erratas.
3640 */
Owen Taylor3473f882001-02-23 17:55:21 +00003641 } else {
3642 if (uri->fragment != NULL) {
3643 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3644 if ((ctxt->sax != NULL) &&
3645 (!ctxt->disableSAX) &&
3646 (ctxt->sax->error != NULL))
3647 ctxt->sax->error(ctxt->userData,
3648 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003649 /*
3650 * Okay this is foolish to block those but not
3651 * invalid URIs.
3652 */
Owen Taylor3473f882001-02-23 17:55:21 +00003653 ctxt->wellFormed = 0;
3654 } else {
3655 if ((ctxt->sax != NULL) &&
3656 (!ctxt->disableSAX) &&
3657 (ctxt->sax->entityDecl != NULL))
3658 ctxt->sax->entityDecl(ctxt->userData, name,
3659 XML_EXTERNAL_PARAMETER_ENTITY,
3660 literal, URI, NULL);
3661 }
3662 xmlFreeURI(uri);
3663 }
3664 }
3665 }
3666 } else {
3667 if ((RAW == '"') || (RAW == '\'')) {
3668 value = xmlParseEntityValue(ctxt, &orig);
3669 if ((ctxt->sax != NULL) &&
3670 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3671 ctxt->sax->entityDecl(ctxt->userData, name,
3672 XML_INTERNAL_GENERAL_ENTITY,
3673 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003674 /*
3675 * For expat compatibility in SAX mode.
3676 */
3677 if ((ctxt->myDoc == NULL) ||
3678 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3679 if (ctxt->myDoc == NULL) {
3680 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3681 }
3682 if (ctxt->myDoc->intSubset == NULL)
3683 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3684 BAD_CAST "fake", NULL, NULL);
3685
3686 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3687 NULL, NULL, value);
3688 }
Owen Taylor3473f882001-02-23 17:55:21 +00003689 } else {
3690 URI = xmlParseExternalID(ctxt, &literal, 1);
3691 if ((URI == NULL) && (literal == NULL)) {
3692 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3694 ctxt->sax->error(ctxt->userData,
3695 "Entity value required\n");
3696 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003697 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003698 }
3699 if (URI) {
3700 xmlURIPtr uri;
3701
3702 uri = xmlParseURI((const char *)URI);
3703 if (uri == NULL) {
3704 ctxt->errNo = XML_ERR_INVALID_URI;
3705 if ((ctxt->sax != NULL) &&
3706 (!ctxt->disableSAX) &&
3707 (ctxt->sax->error != NULL))
3708 ctxt->sax->error(ctxt->userData,
3709 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003710 /*
3711 * This really ought to be a well formedness error
3712 * but the XML Core WG decided otherwise c.f. issue
3713 * E26 of the XML erratas.
3714 */
Owen Taylor3473f882001-02-23 17:55:21 +00003715 } else {
3716 if (uri->fragment != NULL) {
3717 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3718 if ((ctxt->sax != NULL) &&
3719 (!ctxt->disableSAX) &&
3720 (ctxt->sax->error != NULL))
3721 ctxt->sax->error(ctxt->userData,
3722 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003723 /*
3724 * Okay this is foolish to block those but not
3725 * invalid URIs.
3726 */
Owen Taylor3473f882001-02-23 17:55:21 +00003727 ctxt->wellFormed = 0;
3728 }
3729 xmlFreeURI(uri);
3730 }
3731 }
3732 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3733 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3735 ctxt->sax->error(ctxt->userData,
3736 "Space required before 'NDATA'\n");
3737 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003738 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003739 }
3740 SKIP_BLANKS;
3741 if ((RAW == 'N') && (NXT(1) == 'D') &&
3742 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3743 (NXT(4) == 'A')) {
3744 SKIP(5);
3745 if (!IS_BLANK(CUR)) {
3746 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3748 ctxt->sax->error(ctxt->userData,
3749 "Space required after 'NDATA'\n");
3750 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003751 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003752 }
3753 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003754 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003755 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3756 (ctxt->sax->unparsedEntityDecl != NULL))
3757 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3758 literal, URI, ndata);
3759 } else {
3760 if ((ctxt->sax != NULL) &&
3761 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3762 ctxt->sax->entityDecl(ctxt->userData, name,
3763 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3764 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003765 /*
3766 * For expat compatibility in SAX mode.
3767 * assuming the entity repalcement was asked for
3768 */
3769 if ((ctxt->replaceEntities != 0) &&
3770 ((ctxt->myDoc == NULL) ||
3771 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3772 if (ctxt->myDoc == NULL) {
3773 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3774 }
3775
3776 if (ctxt->myDoc->intSubset == NULL)
3777 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3778 BAD_CAST "fake", NULL, NULL);
3779 entityDecl(ctxt, name,
3780 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3781 literal, URI, NULL);
3782 }
Owen Taylor3473f882001-02-23 17:55:21 +00003783 }
3784 }
3785 }
3786 SKIP_BLANKS;
3787 if (RAW != '>') {
3788 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3790 ctxt->sax->error(ctxt->userData,
3791 "xmlParseEntityDecl: entity %s not terminated\n", name);
3792 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003793 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003794 } else {
3795 if (input != ctxt->input) {
3796 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3798 ctxt->sax->error(ctxt->userData,
3799"Entity declaration doesn't start and stop in the same entity\n");
3800 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003801 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003802 }
3803 NEXT;
3804 }
3805 if (orig != NULL) {
3806 /*
3807 * Ugly mechanism to save the raw entity value.
3808 */
3809 xmlEntityPtr cur = NULL;
3810
3811 if (isParameter) {
3812 if ((ctxt->sax != NULL) &&
3813 (ctxt->sax->getParameterEntity != NULL))
3814 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3815 } else {
3816 if ((ctxt->sax != NULL) &&
3817 (ctxt->sax->getEntity != NULL))
3818 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003819 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3820 cur = getEntity(ctxt, name);
3821 }
Owen Taylor3473f882001-02-23 17:55:21 +00003822 }
3823 if (cur != NULL) {
3824 if (cur->orig != NULL)
3825 xmlFree(orig);
3826 else
3827 cur->orig = orig;
3828 } else
3829 xmlFree(orig);
3830 }
3831 if (name != NULL) xmlFree(name);
3832 if (value != NULL) xmlFree(value);
3833 if (URI != NULL) xmlFree(URI);
3834 if (literal != NULL) xmlFree(literal);
3835 if (ndata != NULL) xmlFree(ndata);
3836 }
3837}
3838
3839/**
3840 * xmlParseDefaultDecl:
3841 * @ctxt: an XML parser context
3842 * @value: Receive a possible fixed default value for the attribute
3843 *
3844 * Parse an attribute default declaration
3845 *
3846 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3847 *
3848 * [ VC: Required Attribute ]
3849 * if the default declaration is the keyword #REQUIRED, then the
3850 * attribute must be specified for all elements of the type in the
3851 * attribute-list declaration.
3852 *
3853 * [ VC: Attribute Default Legal ]
3854 * The declared default value must meet the lexical constraints of
3855 * the declared attribute type c.f. xmlValidateAttributeDecl()
3856 *
3857 * [ VC: Fixed Attribute Default ]
3858 * if an attribute has a default value declared with the #FIXED
3859 * keyword, instances of that attribute must match the default value.
3860 *
3861 * [ WFC: No < in Attribute Values ]
3862 * handled in xmlParseAttValue()
3863 *
3864 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3865 * or XML_ATTRIBUTE_FIXED.
3866 */
3867
3868int
3869xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3870 int val;
3871 xmlChar *ret;
3872
3873 *value = NULL;
3874 if ((RAW == '#') && (NXT(1) == 'R') &&
3875 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3876 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3877 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3878 (NXT(8) == 'D')) {
3879 SKIP(9);
3880 return(XML_ATTRIBUTE_REQUIRED);
3881 }
3882 if ((RAW == '#') && (NXT(1) == 'I') &&
3883 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3884 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3885 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3886 SKIP(8);
3887 return(XML_ATTRIBUTE_IMPLIED);
3888 }
3889 val = XML_ATTRIBUTE_NONE;
3890 if ((RAW == '#') && (NXT(1) == 'F') &&
3891 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3892 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3893 SKIP(6);
3894 val = XML_ATTRIBUTE_FIXED;
3895 if (!IS_BLANK(CUR)) {
3896 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3898 ctxt->sax->error(ctxt->userData,
3899 "Space required after '#FIXED'\n");
3900 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003901 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003902 }
3903 SKIP_BLANKS;
3904 }
3905 ret = xmlParseAttValue(ctxt);
3906 ctxt->instate = XML_PARSER_DTD;
3907 if (ret == NULL) {
3908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3909 ctxt->sax->error(ctxt->userData,
3910 "Attribute default value declaration error\n");
3911 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003912 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003913 } else
3914 *value = ret;
3915 return(val);
3916}
3917
3918/**
3919 * xmlParseNotationType:
3920 * @ctxt: an XML parser context
3921 *
3922 * parse an Notation attribute type.
3923 *
3924 * Note: the leading 'NOTATION' S part has already being parsed...
3925 *
3926 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3927 *
3928 * [ VC: Notation Attributes ]
3929 * Values of this type must match one of the notation names included
3930 * in the declaration; all notation names in the declaration must be declared.
3931 *
3932 * Returns: the notation attribute tree built while parsing
3933 */
3934
3935xmlEnumerationPtr
3936xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3937 xmlChar *name;
3938 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3939
3940 if (RAW != '(') {
3941 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3943 ctxt->sax->error(ctxt->userData,
3944 "'(' required to start 'NOTATION'\n");
3945 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003946 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003947 return(NULL);
3948 }
3949 SHRINK;
3950 do {
3951 NEXT;
3952 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003953 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003954 if (name == NULL) {
3955 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3957 ctxt->sax->error(ctxt->userData,
3958 "Name expected in NOTATION declaration\n");
3959 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003960 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003961 return(ret);
3962 }
3963 cur = xmlCreateEnumeration(name);
3964 xmlFree(name);
3965 if (cur == NULL) return(ret);
3966 if (last == NULL) ret = last = cur;
3967 else {
3968 last->next = cur;
3969 last = cur;
3970 }
3971 SKIP_BLANKS;
3972 } while (RAW == '|');
3973 if (RAW != ')') {
3974 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3976 ctxt->sax->error(ctxt->userData,
3977 "')' required to finish NOTATION declaration\n");
3978 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003979 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003980 if ((last != NULL) && (last != ret))
3981 xmlFreeEnumeration(last);
3982 return(ret);
3983 }
3984 NEXT;
3985 return(ret);
3986}
3987
3988/**
3989 * xmlParseEnumerationType:
3990 * @ctxt: an XML parser context
3991 *
3992 * parse an Enumeration attribute type.
3993 *
3994 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3995 *
3996 * [ VC: Enumeration ]
3997 * Values of this type must match one of the Nmtoken tokens in
3998 * the declaration
3999 *
4000 * Returns: the enumeration attribute tree built while parsing
4001 */
4002
4003xmlEnumerationPtr
4004xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4005 xmlChar *name;
4006 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4007
4008 if (RAW != '(') {
4009 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4011 ctxt->sax->error(ctxt->userData,
4012 "'(' required to start ATTLIST enumeration\n");
4013 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004014 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004015 return(NULL);
4016 }
4017 SHRINK;
4018 do {
4019 NEXT;
4020 SKIP_BLANKS;
4021 name = xmlParseNmtoken(ctxt);
4022 if (name == NULL) {
4023 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4025 ctxt->sax->error(ctxt->userData,
4026 "NmToken expected in ATTLIST enumeration\n");
4027 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004028 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004029 return(ret);
4030 }
4031 cur = xmlCreateEnumeration(name);
4032 xmlFree(name);
4033 if (cur == NULL) return(ret);
4034 if (last == NULL) ret = last = cur;
4035 else {
4036 last->next = cur;
4037 last = cur;
4038 }
4039 SKIP_BLANKS;
4040 } while (RAW == '|');
4041 if (RAW != ')') {
4042 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4044 ctxt->sax->error(ctxt->userData,
4045 "')' required to finish ATTLIST enumeration\n");
4046 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004047 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004048 return(ret);
4049 }
4050 NEXT;
4051 return(ret);
4052}
4053
4054/**
4055 * xmlParseEnumeratedType:
4056 * @ctxt: an XML parser context
4057 * @tree: the enumeration tree built while parsing
4058 *
4059 * parse an Enumerated attribute type.
4060 *
4061 * [57] EnumeratedType ::= NotationType | Enumeration
4062 *
4063 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4064 *
4065 *
4066 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4067 */
4068
4069int
4070xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4071 if ((RAW == 'N') && (NXT(1) == 'O') &&
4072 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4073 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4074 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4075 SKIP(8);
4076 if (!IS_BLANK(CUR)) {
4077 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4079 ctxt->sax->error(ctxt->userData,
4080 "Space required after 'NOTATION'\n");
4081 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004082 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004083 return(0);
4084 }
4085 SKIP_BLANKS;
4086 *tree = xmlParseNotationType(ctxt);
4087 if (*tree == NULL) return(0);
4088 return(XML_ATTRIBUTE_NOTATION);
4089 }
4090 *tree = xmlParseEnumerationType(ctxt);
4091 if (*tree == NULL) return(0);
4092 return(XML_ATTRIBUTE_ENUMERATION);
4093}
4094
4095/**
4096 * xmlParseAttributeType:
4097 * @ctxt: an XML parser context
4098 * @tree: the enumeration tree built while parsing
4099 *
4100 * parse the Attribute list def for an element
4101 *
4102 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4103 *
4104 * [55] StringType ::= 'CDATA'
4105 *
4106 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4107 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4108 *
4109 * Validity constraints for attribute values syntax are checked in
4110 * xmlValidateAttributeValue()
4111 *
4112 * [ VC: ID ]
4113 * Values of type ID must match the Name production. A name must not
4114 * appear more than once in an XML document as a value of this type;
4115 * i.e., ID values must uniquely identify the elements which bear them.
4116 *
4117 * [ VC: One ID per Element Type ]
4118 * No element type may have more than one ID attribute specified.
4119 *
4120 * [ VC: ID Attribute Default ]
4121 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4122 *
4123 * [ VC: IDREF ]
4124 * Values of type IDREF must match the Name production, and values
4125 * of type IDREFS must match Names; each IDREF Name must match the value
4126 * of an ID attribute on some element in the XML document; i.e. IDREF
4127 * values must match the value of some ID attribute.
4128 *
4129 * [ VC: Entity Name ]
4130 * Values of type ENTITY must match the Name production, values
4131 * of type ENTITIES must match Names; each Entity Name must match the
4132 * name of an unparsed entity declared in the DTD.
4133 *
4134 * [ VC: Name Token ]
4135 * Values of type NMTOKEN must match the Nmtoken production; values
4136 * of type NMTOKENS must match Nmtokens.
4137 *
4138 * Returns the attribute type
4139 */
4140int
4141xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4142 SHRINK;
4143 if ((RAW == 'C') && (NXT(1) == 'D') &&
4144 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4145 (NXT(4) == 'A')) {
4146 SKIP(5);
4147 return(XML_ATTRIBUTE_CDATA);
4148 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4149 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4150 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4151 SKIP(6);
4152 return(XML_ATTRIBUTE_IDREFS);
4153 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4154 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4155 (NXT(4) == 'F')) {
4156 SKIP(5);
4157 return(XML_ATTRIBUTE_IDREF);
4158 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4159 SKIP(2);
4160 return(XML_ATTRIBUTE_ID);
4161 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4162 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4163 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4164 SKIP(6);
4165 return(XML_ATTRIBUTE_ENTITY);
4166 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4167 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4168 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4169 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4170 SKIP(8);
4171 return(XML_ATTRIBUTE_ENTITIES);
4172 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4173 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4174 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4175 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4176 SKIP(8);
4177 return(XML_ATTRIBUTE_NMTOKENS);
4178 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4179 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4180 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4181 (NXT(6) == 'N')) {
4182 SKIP(7);
4183 return(XML_ATTRIBUTE_NMTOKEN);
4184 }
4185 return(xmlParseEnumeratedType(ctxt, tree));
4186}
4187
4188/**
4189 * xmlParseAttributeListDecl:
4190 * @ctxt: an XML parser context
4191 *
4192 * : parse the Attribute list def for an element
4193 *
4194 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4195 *
4196 * [53] AttDef ::= S Name S AttType S DefaultDecl
4197 *
4198 */
4199void
4200xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4201 xmlChar *elemName;
4202 xmlChar *attrName;
4203 xmlEnumerationPtr tree;
4204
4205 if ((RAW == '<') && (NXT(1) == '!') &&
4206 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4207 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4208 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4209 (NXT(8) == 'T')) {
4210 xmlParserInputPtr input = ctxt->input;
4211
4212 SKIP(9);
4213 if (!IS_BLANK(CUR)) {
4214 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4216 ctxt->sax->error(ctxt->userData,
4217 "Space required after '<!ATTLIST'\n");
4218 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004219 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004220 }
4221 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004222 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004223 if (elemName == NULL) {
4224 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4226 ctxt->sax->error(ctxt->userData,
4227 "ATTLIST: no name for Element\n");
4228 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004229 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004230 return;
4231 }
4232 SKIP_BLANKS;
4233 GROW;
4234 while (RAW != '>') {
4235 const xmlChar *check = CUR_PTR;
4236 int type;
4237 int def;
4238 xmlChar *defaultValue = NULL;
4239
4240 GROW;
4241 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004242 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004243 if (attrName == NULL) {
4244 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4245 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4246 ctxt->sax->error(ctxt->userData,
4247 "ATTLIST: no name for Attribute\n");
4248 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004249 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004250 break;
4251 }
4252 GROW;
4253 if (!IS_BLANK(CUR)) {
4254 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4256 ctxt->sax->error(ctxt->userData,
4257 "Space required after the attribute name\n");
4258 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004259 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004260 if (attrName != NULL)
4261 xmlFree(attrName);
4262 if (defaultValue != NULL)
4263 xmlFree(defaultValue);
4264 break;
4265 }
4266 SKIP_BLANKS;
4267
4268 type = xmlParseAttributeType(ctxt, &tree);
4269 if (type <= 0) {
4270 if (attrName != NULL)
4271 xmlFree(attrName);
4272 if (defaultValue != NULL)
4273 xmlFree(defaultValue);
4274 break;
4275 }
4276
4277 GROW;
4278 if (!IS_BLANK(CUR)) {
4279 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4281 ctxt->sax->error(ctxt->userData,
4282 "Space required after the attribute type\n");
4283 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004284 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004285 if (attrName != NULL)
4286 xmlFree(attrName);
4287 if (defaultValue != NULL)
4288 xmlFree(defaultValue);
4289 if (tree != NULL)
4290 xmlFreeEnumeration(tree);
4291 break;
4292 }
4293 SKIP_BLANKS;
4294
4295 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4296 if (def <= 0) {
4297 if (attrName != NULL)
4298 xmlFree(attrName);
4299 if (defaultValue != NULL)
4300 xmlFree(defaultValue);
4301 if (tree != NULL)
4302 xmlFreeEnumeration(tree);
4303 break;
4304 }
4305
4306 GROW;
4307 if (RAW != '>') {
4308 if (!IS_BLANK(CUR)) {
4309 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4311 ctxt->sax->error(ctxt->userData,
4312 "Space required after the attribute default value\n");
4313 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004314 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004315 if (attrName != NULL)
4316 xmlFree(attrName);
4317 if (defaultValue != NULL)
4318 xmlFree(defaultValue);
4319 if (tree != NULL)
4320 xmlFreeEnumeration(tree);
4321 break;
4322 }
4323 SKIP_BLANKS;
4324 }
4325 if (check == CUR_PTR) {
4326 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4328 ctxt->sax->error(ctxt->userData,
4329 "xmlParseAttributeListDecl: detected internal error\n");
4330 if (attrName != NULL)
4331 xmlFree(attrName);
4332 if (defaultValue != NULL)
4333 xmlFree(defaultValue);
4334 if (tree != NULL)
4335 xmlFreeEnumeration(tree);
4336 break;
4337 }
4338 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4339 (ctxt->sax->attributeDecl != NULL))
4340 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4341 type, def, defaultValue, tree);
4342 if (attrName != NULL)
4343 xmlFree(attrName);
4344 if (defaultValue != NULL)
4345 xmlFree(defaultValue);
4346 GROW;
4347 }
4348 if (RAW == '>') {
4349 if (input != ctxt->input) {
4350 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4352 ctxt->sax->error(ctxt->userData,
4353"Attribute list declaration doesn't start and stop in the same entity\n");
4354 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004355 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004356 }
4357 NEXT;
4358 }
4359
4360 xmlFree(elemName);
4361 }
4362}
4363
4364/**
4365 * xmlParseElementMixedContentDecl:
4366 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004367 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004368 *
4369 * parse the declaration for a Mixed Element content
4370 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4371 *
4372 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4373 * '(' S? '#PCDATA' S? ')'
4374 *
4375 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4376 *
4377 * [ VC: No Duplicate Types ]
4378 * The same name must not appear more than once in a single
4379 * mixed-content declaration.
4380 *
4381 * returns: the list of the xmlElementContentPtr describing the element choices
4382 */
4383xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004384xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004385 xmlElementContentPtr ret = NULL, cur = NULL, n;
4386 xmlChar *elem = NULL;
4387
4388 GROW;
4389 if ((RAW == '#') && (NXT(1) == 'P') &&
4390 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4391 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4392 (NXT(6) == 'A')) {
4393 SKIP(7);
4394 SKIP_BLANKS;
4395 SHRINK;
4396 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004397 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4398 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4399 if (ctxt->vctxt.error != NULL)
4400 ctxt->vctxt.error(ctxt->vctxt.userData,
4401"Element content declaration doesn't start and stop in the same entity\n");
4402 ctxt->valid = 0;
4403 }
Owen Taylor3473f882001-02-23 17:55:21 +00004404 NEXT;
4405 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4406 if (RAW == '*') {
4407 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4408 NEXT;
4409 }
4410 return(ret);
4411 }
4412 if ((RAW == '(') || (RAW == '|')) {
4413 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4414 if (ret == NULL) return(NULL);
4415 }
4416 while (RAW == '|') {
4417 NEXT;
4418 if (elem == NULL) {
4419 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4420 if (ret == NULL) return(NULL);
4421 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004422 if (cur != NULL)
4423 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004424 cur = ret;
4425 } else {
4426 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4427 if (n == NULL) return(NULL);
4428 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004429 if (n->c1 != NULL)
4430 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004431 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004432 if (n != NULL)
4433 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004434 cur = n;
4435 xmlFree(elem);
4436 }
4437 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004438 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004439 if (elem == NULL) {
4440 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4442 ctxt->sax->error(ctxt->userData,
4443 "xmlParseElementMixedContentDecl : Name expected\n");
4444 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004445 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004446 xmlFreeElementContent(cur);
4447 return(NULL);
4448 }
4449 SKIP_BLANKS;
4450 GROW;
4451 }
4452 if ((RAW == ')') && (NXT(1) == '*')) {
4453 if (elem != NULL) {
4454 cur->c2 = xmlNewElementContent(elem,
4455 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004456 if (cur->c2 != NULL)
4457 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004458 xmlFree(elem);
4459 }
4460 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004461 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4462 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4463 if (ctxt->vctxt.error != NULL)
4464 ctxt->vctxt.error(ctxt->vctxt.userData,
4465"Element content declaration doesn't start and stop in the same entity\n");
4466 ctxt->valid = 0;
4467 }
Owen Taylor3473f882001-02-23 17:55:21 +00004468 SKIP(2);
4469 } else {
4470 if (elem != NULL) xmlFree(elem);
4471 xmlFreeElementContent(ret);
4472 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4473 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4474 ctxt->sax->error(ctxt->userData,
4475 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4476 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004477 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004478 return(NULL);
4479 }
4480
4481 } else {
4482 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4484 ctxt->sax->error(ctxt->userData,
4485 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4486 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004487 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004488 }
4489 return(ret);
4490}
4491
4492/**
4493 * xmlParseElementChildrenContentDecl:
4494 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004495 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004496 *
4497 * parse the declaration for a Mixed Element content
4498 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4499 *
4500 *
4501 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4502 *
4503 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4504 *
4505 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4506 *
4507 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4508 *
4509 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4510 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004511 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004512 * opening or closing parentheses in a choice, seq, or Mixed
4513 * construct is contained in the replacement text for a parameter
4514 * entity, both must be contained in the same replacement text. For
4515 * interoperability, if a parameter-entity reference appears in a
4516 * choice, seq, or Mixed construct, its replacement text should not
4517 * be empty, and neither the first nor last non-blank character of
4518 * the replacement text should be a connector (| or ,).
4519 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004520 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004521 * hierarchy.
4522 */
4523xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004524xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004525(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004526 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4527 xmlChar *elem;
4528 xmlChar type = 0;
4529
4530 SKIP_BLANKS;
4531 GROW;
4532 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004533 xmlParserInputPtr input = ctxt->input;
4534
Owen Taylor3473f882001-02-23 17:55:21 +00004535 /* Recurse on first child */
4536 NEXT;
4537 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004538 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004539 SKIP_BLANKS;
4540 GROW;
4541 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004542 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004543 if (elem == NULL) {
4544 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4546 ctxt->sax->error(ctxt->userData,
4547 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4548 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004549 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004550 return(NULL);
4551 }
4552 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4553 GROW;
4554 if (RAW == '?') {
4555 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4556 NEXT;
4557 } else if (RAW == '*') {
4558 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4559 NEXT;
4560 } else if (RAW == '+') {
4561 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4562 NEXT;
4563 } else {
4564 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4565 }
4566 xmlFree(elem);
4567 GROW;
4568 }
4569 SKIP_BLANKS;
4570 SHRINK;
4571 while (RAW != ')') {
4572 /*
4573 * Each loop we parse one separator and one element.
4574 */
4575 if (RAW == ',') {
4576 if (type == 0) type = CUR;
4577
4578 /*
4579 * Detect "Name | Name , Name" error
4580 */
4581 else if (type != CUR) {
4582 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4584 ctxt->sax->error(ctxt->userData,
4585 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4586 type);
4587 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004588 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004589 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004590 xmlFreeElementContent(last);
4591 if (ret != NULL)
4592 xmlFreeElementContent(ret);
4593 return(NULL);
4594 }
4595 NEXT;
4596
4597 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4598 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004599 if ((last != NULL) && (last != ret))
4600 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004601 xmlFreeElementContent(ret);
4602 return(NULL);
4603 }
4604 if (last == NULL) {
4605 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004606 if (ret != NULL)
4607 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004608 ret = cur = op;
4609 } else {
4610 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004611 if (op != NULL)
4612 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004613 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004614 if (last != NULL)
4615 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004616 cur =op;
4617 last = NULL;
4618 }
4619 } else if (RAW == '|') {
4620 if (type == 0) type = CUR;
4621
4622 /*
4623 * Detect "Name , Name | Name" error
4624 */
4625 else if (type != CUR) {
4626 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4628 ctxt->sax->error(ctxt->userData,
4629 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4630 type);
4631 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004632 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004633 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004634 xmlFreeElementContent(last);
4635 if (ret != NULL)
4636 xmlFreeElementContent(ret);
4637 return(NULL);
4638 }
4639 NEXT;
4640
4641 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4642 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004643 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004644 xmlFreeElementContent(last);
4645 if (ret != NULL)
4646 xmlFreeElementContent(ret);
4647 return(NULL);
4648 }
4649 if (last == NULL) {
4650 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004651 if (ret != NULL)
4652 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004653 ret = cur = op;
4654 } else {
4655 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004656 if (op != NULL)
4657 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004658 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004659 if (last != NULL)
4660 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004661 cur =op;
4662 last = NULL;
4663 }
4664 } else {
4665 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4667 ctxt->sax->error(ctxt->userData,
4668 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4669 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004670 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004671 if (ret != NULL)
4672 xmlFreeElementContent(ret);
4673 return(NULL);
4674 }
4675 GROW;
4676 SKIP_BLANKS;
4677 GROW;
4678 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004679 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004680 /* Recurse on second child */
4681 NEXT;
4682 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004683 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004684 SKIP_BLANKS;
4685 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004686 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004687 if (elem == NULL) {
4688 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4690 ctxt->sax->error(ctxt->userData,
4691 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4692 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004693 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004694 if (ret != NULL)
4695 xmlFreeElementContent(ret);
4696 return(NULL);
4697 }
4698 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4699 xmlFree(elem);
4700 if (RAW == '?') {
4701 last->ocur = XML_ELEMENT_CONTENT_OPT;
4702 NEXT;
4703 } else if (RAW == '*') {
4704 last->ocur = XML_ELEMENT_CONTENT_MULT;
4705 NEXT;
4706 } else if (RAW == '+') {
4707 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4708 NEXT;
4709 } else {
4710 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4711 }
4712 }
4713 SKIP_BLANKS;
4714 GROW;
4715 }
4716 if ((cur != NULL) && (last != NULL)) {
4717 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004718 if (last != NULL)
4719 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004720 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004721 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4722 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4723 if (ctxt->vctxt.error != NULL)
4724 ctxt->vctxt.error(ctxt->vctxt.userData,
4725"Element content declaration doesn't start and stop in the same entity\n");
4726 ctxt->valid = 0;
4727 }
Owen Taylor3473f882001-02-23 17:55:21 +00004728 NEXT;
4729 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004730 if (ret != NULL)
4731 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004732 NEXT;
4733 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004734 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004735 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004736 cur = ret;
4737 /*
4738 * Some normalization:
4739 * (a | b* | c?)* == (a | b | c)*
4740 */
4741 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4742 if ((cur->c1 != NULL) &&
4743 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4744 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4745 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4746 if ((cur->c2 != NULL) &&
4747 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4748 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4749 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4750 cur = cur->c2;
4751 }
4752 }
Owen Taylor3473f882001-02-23 17:55:21 +00004753 NEXT;
4754 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004755 if (ret != NULL) {
4756 int found = 0;
4757
Daniel Veillarde470df72001-04-18 21:41:07 +00004758 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004759 /*
4760 * Some normalization:
4761 * (a | b*)+ == (a | b)*
4762 * (a | b?)+ == (a | b)*
4763 */
4764 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4765 if ((cur->c1 != NULL) &&
4766 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4767 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4768 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4769 found = 1;
4770 }
4771 if ((cur->c2 != NULL) &&
4772 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4773 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4774 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4775 found = 1;
4776 }
4777 cur = cur->c2;
4778 }
4779 if (found)
4780 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4781 }
Owen Taylor3473f882001-02-23 17:55:21 +00004782 NEXT;
4783 }
4784 return(ret);
4785}
4786
4787/**
4788 * xmlParseElementContentDecl:
4789 * @ctxt: an XML parser context
4790 * @name: the name of the element being defined.
4791 * @result: the Element Content pointer will be stored here if any
4792 *
4793 * parse the declaration for an Element content either Mixed or Children,
4794 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4795 *
4796 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4797 *
4798 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4799 */
4800
4801int
4802xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4803 xmlElementContentPtr *result) {
4804
4805 xmlElementContentPtr tree = NULL;
4806 xmlParserInputPtr input = ctxt->input;
4807 int res;
4808
4809 *result = NULL;
4810
4811 if (RAW != '(') {
4812 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4814 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004815 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004816 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004817 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004818 return(-1);
4819 }
4820 NEXT;
4821 GROW;
4822 SKIP_BLANKS;
4823 if ((RAW == '#') && (NXT(1) == 'P') &&
4824 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4825 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4826 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004827 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004828 res = XML_ELEMENT_TYPE_MIXED;
4829 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004830 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004831 res = XML_ELEMENT_TYPE_ELEMENT;
4832 }
Owen Taylor3473f882001-02-23 17:55:21 +00004833 SKIP_BLANKS;
4834 *result = tree;
4835 return(res);
4836}
4837
4838/**
4839 * xmlParseElementDecl:
4840 * @ctxt: an XML parser context
4841 *
4842 * parse an Element declaration.
4843 *
4844 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4845 *
4846 * [ VC: Unique Element Type Declaration ]
4847 * No element type may be declared more than once
4848 *
4849 * Returns the type of the element, or -1 in case of error
4850 */
4851int
4852xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4853 xmlChar *name;
4854 int ret = -1;
4855 xmlElementContentPtr content = NULL;
4856
4857 GROW;
4858 if ((RAW == '<') && (NXT(1) == '!') &&
4859 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4860 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4861 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4862 (NXT(8) == 'T')) {
4863 xmlParserInputPtr input = ctxt->input;
4864
4865 SKIP(9);
4866 if (!IS_BLANK(CUR)) {
4867 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4869 ctxt->sax->error(ctxt->userData,
4870 "Space required after 'ELEMENT'\n");
4871 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004872 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004873 }
4874 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004875 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004876 if (name == NULL) {
4877 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4878 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4879 ctxt->sax->error(ctxt->userData,
4880 "xmlParseElementDecl: no name for Element\n");
4881 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004882 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004883 return(-1);
4884 }
4885 while ((RAW == 0) && (ctxt->inputNr > 1))
4886 xmlPopInput(ctxt);
4887 if (!IS_BLANK(CUR)) {
4888 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4890 ctxt->sax->error(ctxt->userData,
4891 "Space required after the element name\n");
4892 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004893 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004894 }
4895 SKIP_BLANKS;
4896 if ((RAW == 'E') && (NXT(1) == 'M') &&
4897 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4898 (NXT(4) == 'Y')) {
4899 SKIP(5);
4900 /*
4901 * Element must always be empty.
4902 */
4903 ret = XML_ELEMENT_TYPE_EMPTY;
4904 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4905 (NXT(2) == 'Y')) {
4906 SKIP(3);
4907 /*
4908 * Element is a generic container.
4909 */
4910 ret = XML_ELEMENT_TYPE_ANY;
4911 } else if (RAW == '(') {
4912 ret = xmlParseElementContentDecl(ctxt, name, &content);
4913 } else {
4914 /*
4915 * [ WFC: PEs in Internal Subset ] error handling.
4916 */
4917 if ((RAW == '%') && (ctxt->external == 0) &&
4918 (ctxt->inputNr == 1)) {
4919 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4921 ctxt->sax->error(ctxt->userData,
4922 "PEReference: forbidden within markup decl in internal subset\n");
4923 } else {
4924 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4926 ctxt->sax->error(ctxt->userData,
4927 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4928 }
4929 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004930 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004931 if (name != NULL) xmlFree(name);
4932 return(-1);
4933 }
4934
4935 SKIP_BLANKS;
4936 /*
4937 * Pop-up of finished entities.
4938 */
4939 while ((RAW == 0) && (ctxt->inputNr > 1))
4940 xmlPopInput(ctxt);
4941 SKIP_BLANKS;
4942
4943 if (RAW != '>') {
4944 ctxt->errNo = XML_ERR_GT_REQUIRED;
4945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4946 ctxt->sax->error(ctxt->userData,
4947 "xmlParseElementDecl: expected '>' at the end\n");
4948 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004949 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004950 } else {
4951 if (input != ctxt->input) {
4952 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4954 ctxt->sax->error(ctxt->userData,
4955"Element declaration doesn't start and stop in the same entity\n");
4956 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004957 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004958 }
4959
4960 NEXT;
4961 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4962 (ctxt->sax->elementDecl != NULL))
4963 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4964 content);
4965 }
4966 if (content != NULL) {
4967 xmlFreeElementContent(content);
4968 }
4969 if (name != NULL) {
4970 xmlFree(name);
4971 }
4972 }
4973 return(ret);
4974}
4975
4976/**
Owen Taylor3473f882001-02-23 17:55:21 +00004977 * xmlParseConditionalSections
4978 * @ctxt: an XML parser context
4979 *
4980 * [61] conditionalSect ::= includeSect | ignoreSect
4981 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4982 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4983 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4984 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4985 */
4986
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004987static void
Owen Taylor3473f882001-02-23 17:55:21 +00004988xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4989 SKIP(3);
4990 SKIP_BLANKS;
4991 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4992 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4993 (NXT(6) == 'E')) {
4994 SKIP(7);
4995 SKIP_BLANKS;
4996 if (RAW != '[') {
4997 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4999 ctxt->sax->error(ctxt->userData,
5000 "XML conditional section '[' expected\n");
5001 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005002 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005003 } else {
5004 NEXT;
5005 }
5006 if (xmlParserDebugEntities) {
5007 if ((ctxt->input != NULL) && (ctxt->input->filename))
5008 xmlGenericError(xmlGenericErrorContext,
5009 "%s(%d): ", ctxt->input->filename,
5010 ctxt->input->line);
5011 xmlGenericError(xmlGenericErrorContext,
5012 "Entering INCLUDE Conditional Section\n");
5013 }
5014
5015 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5016 (NXT(2) != '>'))) {
5017 const xmlChar *check = CUR_PTR;
5018 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005019
5020 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5021 xmlParseConditionalSections(ctxt);
5022 } else if (IS_BLANK(CUR)) {
5023 NEXT;
5024 } else if (RAW == '%') {
5025 xmlParsePEReference(ctxt);
5026 } else
5027 xmlParseMarkupDecl(ctxt);
5028
5029 /*
5030 * Pop-up of finished entities.
5031 */
5032 while ((RAW == 0) && (ctxt->inputNr > 1))
5033 xmlPopInput(ctxt);
5034
Daniel Veillardfdc91562002-07-01 21:52:03 +00005035 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005036 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5038 ctxt->sax->error(ctxt->userData,
5039 "Content error in the external subset\n");
5040 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005041 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005042 break;
5043 }
5044 }
5045 if (xmlParserDebugEntities) {
5046 if ((ctxt->input != NULL) && (ctxt->input->filename))
5047 xmlGenericError(xmlGenericErrorContext,
5048 "%s(%d): ", ctxt->input->filename,
5049 ctxt->input->line);
5050 xmlGenericError(xmlGenericErrorContext,
5051 "Leaving INCLUDE Conditional Section\n");
5052 }
5053
5054 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5055 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5056 int state;
5057 int instate;
5058 int depth = 0;
5059
5060 SKIP(6);
5061 SKIP_BLANKS;
5062 if (RAW != '[') {
5063 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5064 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5065 ctxt->sax->error(ctxt->userData,
5066 "XML conditional section '[' expected\n");
5067 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005068 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005069 } else {
5070 NEXT;
5071 }
5072 if (xmlParserDebugEntities) {
5073 if ((ctxt->input != NULL) && (ctxt->input->filename))
5074 xmlGenericError(xmlGenericErrorContext,
5075 "%s(%d): ", ctxt->input->filename,
5076 ctxt->input->line);
5077 xmlGenericError(xmlGenericErrorContext,
5078 "Entering IGNORE Conditional Section\n");
5079 }
5080
5081 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005082 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005083 * But disable SAX event generating DTD building in the meantime
5084 */
5085 state = ctxt->disableSAX;
5086 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005087 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005088 ctxt->instate = XML_PARSER_IGNORE;
5089
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005090 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005091 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5092 depth++;
5093 SKIP(3);
5094 continue;
5095 }
5096 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5097 if (--depth >= 0) SKIP(3);
5098 continue;
5099 }
5100 NEXT;
5101 continue;
5102 }
5103
5104 ctxt->disableSAX = state;
5105 ctxt->instate = instate;
5106
5107 if (xmlParserDebugEntities) {
5108 if ((ctxt->input != NULL) && (ctxt->input->filename))
5109 xmlGenericError(xmlGenericErrorContext,
5110 "%s(%d): ", ctxt->input->filename,
5111 ctxt->input->line);
5112 xmlGenericError(xmlGenericErrorContext,
5113 "Leaving IGNORE Conditional Section\n");
5114 }
5115
5116 } else {
5117 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5118 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5119 ctxt->sax->error(ctxt->userData,
5120 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5121 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005122 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005123 }
5124
5125 if (RAW == 0)
5126 SHRINK;
5127
5128 if (RAW == 0) {
5129 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5130 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5131 ctxt->sax->error(ctxt->userData,
5132 "XML conditional section not closed\n");
5133 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005134 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005135 } else {
5136 SKIP(3);
5137 }
5138}
5139
5140/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005141 * xmlParseMarkupDecl:
5142 * @ctxt: an XML parser context
5143 *
5144 * parse Markup declarations
5145 *
5146 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5147 * NotationDecl | PI | Comment
5148 *
5149 * [ VC: Proper Declaration/PE Nesting ]
5150 * Parameter-entity replacement text must be properly nested with
5151 * markup declarations. That is to say, if either the first character
5152 * or the last character of a markup declaration (markupdecl above) is
5153 * contained in the replacement text for a parameter-entity reference,
5154 * both must be contained in the same replacement text.
5155 *
5156 * [ WFC: PEs in Internal Subset ]
5157 * In the internal DTD subset, parameter-entity references can occur
5158 * only where markup declarations can occur, not within markup declarations.
5159 * (This does not apply to references that occur in external parameter
5160 * entities or to the external subset.)
5161 */
5162void
5163xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5164 GROW;
5165 xmlParseElementDecl(ctxt);
5166 xmlParseAttributeListDecl(ctxt);
5167 xmlParseEntityDecl(ctxt);
5168 xmlParseNotationDecl(ctxt);
5169 xmlParsePI(ctxt);
5170 xmlParseComment(ctxt);
5171 /*
5172 * This is only for internal subset. On external entities,
5173 * the replacement is done before parsing stage
5174 */
5175 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5176 xmlParsePEReference(ctxt);
5177
5178 /*
5179 * Conditional sections are allowed from entities included
5180 * by PE References in the internal subset.
5181 */
5182 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5183 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5184 xmlParseConditionalSections(ctxt);
5185 }
5186 }
5187
5188 ctxt->instate = XML_PARSER_DTD;
5189}
5190
5191/**
5192 * xmlParseTextDecl:
5193 * @ctxt: an XML parser context
5194 *
5195 * parse an XML declaration header for external entities
5196 *
5197 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5198 *
5199 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5200 */
5201
5202void
5203xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5204 xmlChar *version;
5205
5206 /*
5207 * We know that '<?xml' is here.
5208 */
5209 if ((RAW == '<') && (NXT(1) == '?') &&
5210 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5211 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5212 SKIP(5);
5213 } else {
5214 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5216 ctxt->sax->error(ctxt->userData,
5217 "Text declaration '<?xml' required\n");
5218 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005219 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005220
5221 return;
5222 }
5223
5224 if (!IS_BLANK(CUR)) {
5225 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5227 ctxt->sax->error(ctxt->userData,
5228 "Space needed after '<?xml'\n");
5229 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005230 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005231 }
5232 SKIP_BLANKS;
5233
5234 /*
5235 * We may have the VersionInfo here.
5236 */
5237 version = xmlParseVersionInfo(ctxt);
5238 if (version == NULL)
5239 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005240 else {
5241 if (!IS_BLANK(CUR)) {
5242 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5244 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5245 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005246 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005247 }
5248 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005249 ctxt->input->version = version;
5250
5251 /*
5252 * We must have the encoding declaration
5253 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005254 xmlParseEncodingDecl(ctxt);
5255 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5256 /*
5257 * The XML REC instructs us to stop parsing right here
5258 */
5259 return;
5260 }
5261
5262 SKIP_BLANKS;
5263 if ((RAW == '?') && (NXT(1) == '>')) {
5264 SKIP(2);
5265 } else if (RAW == '>') {
5266 /* Deprecated old WD ... */
5267 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5269 ctxt->sax->error(ctxt->userData,
5270 "XML declaration must end-up with '?>'\n");
5271 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005272 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005273 NEXT;
5274 } else {
5275 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5277 ctxt->sax->error(ctxt->userData,
5278 "parsing XML declaration: '?>' expected\n");
5279 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005280 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005281 MOVETO_ENDTAG(CUR_PTR);
5282 NEXT;
5283 }
5284}
5285
5286/**
Owen Taylor3473f882001-02-23 17:55:21 +00005287 * xmlParseExternalSubset:
5288 * @ctxt: an XML parser context
5289 * @ExternalID: the external identifier
5290 * @SystemID: the system identifier (or URL)
5291 *
5292 * parse Markup declarations from an external subset
5293 *
5294 * [30] extSubset ::= textDecl? extSubsetDecl
5295 *
5296 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5297 */
5298void
5299xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5300 const xmlChar *SystemID) {
5301 GROW;
5302 if ((RAW == '<') && (NXT(1) == '?') &&
5303 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5304 (NXT(4) == 'l')) {
5305 xmlParseTextDecl(ctxt);
5306 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5307 /*
5308 * The XML REC instructs us to stop parsing right here
5309 */
5310 ctxt->instate = XML_PARSER_EOF;
5311 return;
5312 }
5313 }
5314 if (ctxt->myDoc == NULL) {
5315 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5316 }
5317 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5318 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5319
5320 ctxt->instate = XML_PARSER_DTD;
5321 ctxt->external = 1;
5322 while (((RAW == '<') && (NXT(1) == '?')) ||
5323 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005324 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005325 const xmlChar *check = CUR_PTR;
5326 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005327
5328 GROW;
5329 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5330 xmlParseConditionalSections(ctxt);
5331 } else if (IS_BLANK(CUR)) {
5332 NEXT;
5333 } else if (RAW == '%') {
5334 xmlParsePEReference(ctxt);
5335 } else
5336 xmlParseMarkupDecl(ctxt);
5337
5338 /*
5339 * Pop-up of finished entities.
5340 */
5341 while ((RAW == 0) && (ctxt->inputNr > 1))
5342 xmlPopInput(ctxt);
5343
Daniel Veillardfdc91562002-07-01 21:52:03 +00005344 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005345 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5347 ctxt->sax->error(ctxt->userData,
5348 "Content error in the external subset\n");
5349 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005350 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005351 break;
5352 }
5353 }
5354
5355 if (RAW != 0) {
5356 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5358 ctxt->sax->error(ctxt->userData,
5359 "Extra content at the end of the document\n");
5360 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005361 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005362 }
5363
5364}
5365
5366/**
5367 * xmlParseReference:
5368 * @ctxt: an XML parser context
5369 *
5370 * parse and handle entity references in content, depending on the SAX
5371 * interface, this may end-up in a call to character() if this is a
5372 * CharRef, a predefined entity, if there is no reference() callback.
5373 * or if the parser was asked to switch to that mode.
5374 *
5375 * [67] Reference ::= EntityRef | CharRef
5376 */
5377void
5378xmlParseReference(xmlParserCtxtPtr ctxt) {
5379 xmlEntityPtr ent;
5380 xmlChar *val;
5381 if (RAW != '&') return;
5382
5383 if (NXT(1) == '#') {
5384 int i = 0;
5385 xmlChar out[10];
5386 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005387 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005388
5389 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5390 /*
5391 * So we are using non-UTF-8 buffers
5392 * Check that the char fit on 8bits, if not
5393 * generate a CharRef.
5394 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005395 if (value <= 0xFF) {
5396 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005397 out[1] = 0;
5398 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5399 (!ctxt->disableSAX))
5400 ctxt->sax->characters(ctxt->userData, out, 1);
5401 } else {
5402 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005403 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005404 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005405 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005406 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5407 (!ctxt->disableSAX))
5408 ctxt->sax->reference(ctxt->userData, out);
5409 }
5410 } else {
5411 /*
5412 * Just encode the value in UTF-8
5413 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005414 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005415 out[i] = 0;
5416 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5417 (!ctxt->disableSAX))
5418 ctxt->sax->characters(ctxt->userData, out, i);
5419 }
5420 } else {
5421 ent = xmlParseEntityRef(ctxt);
5422 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005423 if (!ctxt->wellFormed)
5424 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005425 if ((ent->name != NULL) &&
5426 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5427 xmlNodePtr list = NULL;
5428 int ret;
5429
5430
5431 /*
5432 * The first reference to the entity trigger a parsing phase
5433 * where the ent->children is filled with the result from
5434 * the parsing.
5435 */
5436 if (ent->children == NULL) {
5437 xmlChar *value;
5438 value = ent->content;
5439
5440 /*
5441 * Check that this entity is well formed
5442 */
5443 if ((value != NULL) &&
5444 (value[1] == 0) && (value[0] == '<') &&
5445 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5446 /*
5447 * DONE: get definite answer on this !!!
5448 * Lots of entity decls are used to declare a single
5449 * char
5450 * <!ENTITY lt "<">
5451 * Which seems to be valid since
5452 * 2.4: The ampersand character (&) and the left angle
5453 * bracket (<) may appear in their literal form only
5454 * when used ... They are also legal within the literal
5455 * entity value of an internal entity declaration;i
5456 * see "4.3.2 Well-Formed Parsed Entities".
5457 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5458 * Looking at the OASIS test suite and James Clark
5459 * tests, this is broken. However the XML REC uses
5460 * it. Is the XML REC not well-formed ????
5461 * This is a hack to avoid this problem
5462 *
5463 * ANSWER: since lt gt amp .. are already defined,
5464 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005465 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005466 * is lousy but acceptable.
5467 */
5468 list = xmlNewDocText(ctxt->myDoc, value);
5469 if (list != NULL) {
5470 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5471 (ent->children == NULL)) {
5472 ent->children = list;
5473 ent->last = list;
5474 list->parent = (xmlNodePtr) ent;
5475 } else {
5476 xmlFreeNodeList(list);
5477 }
5478 } else if (list != NULL) {
5479 xmlFreeNodeList(list);
5480 }
5481 } else {
5482 /*
5483 * 4.3.2: An internal general parsed entity is well-formed
5484 * if its replacement text matches the production labeled
5485 * content.
5486 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005487
5488 void *user_data;
5489 /*
5490 * This is a bit hackish but this seems the best
5491 * way to make sure both SAX and DOM entity support
5492 * behaves okay.
5493 */
5494 if (ctxt->userData == ctxt)
5495 user_data = NULL;
5496 else
5497 user_data = ctxt->userData;
5498
Owen Taylor3473f882001-02-23 17:55:21 +00005499 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5500 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005501 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5502 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005503 ctxt->depth--;
5504 } else if (ent->etype ==
5505 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5506 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005507 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005508 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005509 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005510 ctxt->depth--;
5511 } else {
5512 ret = -1;
5513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5514 ctxt->sax->error(ctxt->userData,
5515 "Internal: invalid entity type\n");
5516 }
5517 if (ret == XML_ERR_ENTITY_LOOP) {
5518 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5520 ctxt->sax->error(ctxt->userData,
5521 "Detected entity reference loop\n");
5522 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005523 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005524 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005525 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005526 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5527 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005528 (ent->children == NULL)) {
5529 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005530 if (ctxt->replaceEntities) {
5531 /*
5532 * Prune it directly in the generated document
5533 * except for single text nodes.
5534 */
5535 if ((list->type == XML_TEXT_NODE) &&
5536 (list->next == NULL)) {
5537 list->parent = (xmlNodePtr) ent;
5538 list = NULL;
5539 } else {
5540 while (list != NULL) {
5541 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005542 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005543 if (list->next == NULL)
5544 ent->last = list;
5545 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005546 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005547 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005548 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5549 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005550 }
5551 } else {
5552 while (list != NULL) {
5553 list->parent = (xmlNodePtr) ent;
5554 if (list->next == NULL)
5555 ent->last = list;
5556 list = list->next;
5557 }
Owen Taylor3473f882001-02-23 17:55:21 +00005558 }
5559 } else {
5560 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005561 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005562 }
5563 } else if (ret > 0) {
5564 ctxt->errNo = ret;
5565 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5566 ctxt->sax->error(ctxt->userData,
5567 "Entity value required\n");
5568 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005569 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005570 } else if (list != NULL) {
5571 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005572 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005573 }
5574 }
5575 }
5576 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5577 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5578 /*
5579 * Create a node.
5580 */
5581 ctxt->sax->reference(ctxt->userData, ent->name);
5582 return;
5583 } else if (ctxt->replaceEntities) {
5584 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5585 /*
5586 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005587 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005588 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005589 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005590 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005591 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005592 cur = ent->children;
5593 while (cur != NULL) {
5594 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005595 if (firstChild == NULL){
5596 firstChild = new;
5597 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005598 xmlAddChild(ctxt->node, new);
5599 if (cur == ent->last)
5600 break;
5601 cur = cur->next;
5602 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005603 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5604 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005605 } else {
5606 /*
5607 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005608 * node with a possible previous text one which
5609 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005610 */
5611 if (ent->children->type == XML_TEXT_NODE)
5612 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5613 if ((ent->last != ent->children) &&
5614 (ent->last->type == XML_TEXT_NODE))
5615 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5616 xmlAddChildList(ctxt->node, ent->children);
5617 }
5618
Owen Taylor3473f882001-02-23 17:55:21 +00005619 /*
5620 * This is to avoid a nasty side effect, see
5621 * characters() in SAX.c
5622 */
5623 ctxt->nodemem = 0;
5624 ctxt->nodelen = 0;
5625 return;
5626 } else {
5627 /*
5628 * Probably running in SAX mode
5629 */
5630 xmlParserInputPtr input;
5631
5632 input = xmlNewEntityInputStream(ctxt, ent);
5633 xmlPushInput(ctxt, input);
5634 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5635 (RAW == '<') && (NXT(1) == '?') &&
5636 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5637 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5638 xmlParseTextDecl(ctxt);
5639 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5640 /*
5641 * The XML REC instructs us to stop parsing right here
5642 */
5643 ctxt->instate = XML_PARSER_EOF;
5644 return;
5645 }
5646 if (input->standalone == 1) {
5647 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5649 ctxt->sax->error(ctxt->userData,
5650 "external parsed entities cannot be standalone\n");
5651 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005652 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005653 }
5654 }
5655 return;
5656 }
5657 }
5658 } else {
5659 val = ent->content;
5660 if (val == NULL) return;
5661 /*
5662 * inline the entity.
5663 */
5664 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5665 (!ctxt->disableSAX))
5666 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5667 }
5668 }
5669}
5670
5671/**
5672 * xmlParseEntityRef:
5673 * @ctxt: an XML parser context
5674 *
5675 * parse ENTITY references declarations
5676 *
5677 * [68] EntityRef ::= '&' Name ';'
5678 *
5679 * [ WFC: Entity Declared ]
5680 * In a document without any DTD, a document with only an internal DTD
5681 * subset which contains no parameter entity references, or a document
5682 * with "standalone='yes'", the Name given in the entity reference
5683 * must match that in an entity declaration, except that well-formed
5684 * documents need not declare any of the following entities: amp, lt,
5685 * gt, apos, quot. The declaration of a parameter entity must precede
5686 * any reference to it. Similarly, the declaration of a general entity
5687 * must precede any reference to it which appears in a default value in an
5688 * attribute-list declaration. Note that if entities are declared in the
5689 * external subset or in external parameter entities, a non-validating
5690 * processor is not obligated to read and process their declarations;
5691 * for such documents, the rule that an entity must be declared is a
5692 * well-formedness constraint only if standalone='yes'.
5693 *
5694 * [ WFC: Parsed Entity ]
5695 * An entity reference must not contain the name of an unparsed entity
5696 *
5697 * Returns the xmlEntityPtr if found, or NULL otherwise.
5698 */
5699xmlEntityPtr
5700xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5701 xmlChar *name;
5702 xmlEntityPtr ent = NULL;
5703
5704 GROW;
5705
5706 if (RAW == '&') {
5707 NEXT;
5708 name = xmlParseName(ctxt);
5709 if (name == NULL) {
5710 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5711 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5712 ctxt->sax->error(ctxt->userData,
5713 "xmlParseEntityRef: no name\n");
5714 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005715 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005716 } else {
5717 if (RAW == ';') {
5718 NEXT;
5719 /*
5720 * Ask first SAX for entity resolution, otherwise try the
5721 * predefined set.
5722 */
5723 if (ctxt->sax != NULL) {
5724 if (ctxt->sax->getEntity != NULL)
5725 ent = ctxt->sax->getEntity(ctxt->userData, name);
5726 if (ent == NULL)
5727 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005728 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5729 ent = getEntity(ctxt, name);
5730 }
Owen Taylor3473f882001-02-23 17:55:21 +00005731 }
5732 /*
5733 * [ WFC: Entity Declared ]
5734 * In a document without any DTD, a document with only an
5735 * internal DTD subset which contains no parameter entity
5736 * references, or a document with "standalone='yes'", the
5737 * Name given in the entity reference must match that in an
5738 * entity declaration, except that well-formed documents
5739 * need not declare any of the following entities: amp, lt,
5740 * gt, apos, quot.
5741 * The declaration of a parameter entity must precede any
5742 * reference to it.
5743 * Similarly, the declaration of a general entity must
5744 * precede any reference to it which appears in a default
5745 * value in an attribute-list declaration. Note that if
5746 * entities are declared in the external subset or in
5747 * external parameter entities, a non-validating processor
5748 * is not obligated to read and process their declarations;
5749 * for such documents, the rule that an entity must be
5750 * declared is a well-formedness constraint only if
5751 * standalone='yes'.
5752 */
5753 if (ent == NULL) {
5754 if ((ctxt->standalone == 1) ||
5755 ((ctxt->hasExternalSubset == 0) &&
5756 (ctxt->hasPErefs == 0))) {
5757 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5759 ctxt->sax->error(ctxt->userData,
5760 "Entity '%s' not defined\n", name);
5761 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005762 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005763 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005764 } else {
5765 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005767 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005768 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005769 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005770 }
5771 }
5772
5773 /*
5774 * [ WFC: Parsed Entity ]
5775 * An entity reference must not contain the name of an
5776 * unparsed entity
5777 */
5778 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5779 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5780 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5781 ctxt->sax->error(ctxt->userData,
5782 "Entity reference to unparsed entity %s\n", name);
5783 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005784 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005785 }
5786
5787 /*
5788 * [ WFC: No External Entity References ]
5789 * Attribute values cannot contain direct or indirect
5790 * entity references to external entities.
5791 */
5792 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5793 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5794 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5796 ctxt->sax->error(ctxt->userData,
5797 "Attribute references external entity '%s'\n", name);
5798 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005799 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005800 }
5801 /*
5802 * [ WFC: No < in Attribute Values ]
5803 * The replacement text of any entity referred to directly or
5804 * indirectly in an attribute value (other than "&lt;") must
5805 * not contain a <.
5806 */
5807 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5808 (ent != NULL) &&
5809 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5810 (ent->content != NULL) &&
5811 (xmlStrchr(ent->content, '<'))) {
5812 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5814 ctxt->sax->error(ctxt->userData,
5815 "'<' in entity '%s' is not allowed in attributes values\n", name);
5816 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005817 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005818 }
5819
5820 /*
5821 * Internal check, no parameter entities here ...
5822 */
5823 else {
5824 switch (ent->etype) {
5825 case XML_INTERNAL_PARAMETER_ENTITY:
5826 case XML_EXTERNAL_PARAMETER_ENTITY:
5827 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5829 ctxt->sax->error(ctxt->userData,
5830 "Attempt to reference the parameter entity '%s'\n", name);
5831 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005832 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005833 break;
5834 default:
5835 break;
5836 }
5837 }
5838
5839 /*
5840 * [ WFC: No Recursion ]
5841 * A parsed entity must not contain a recursive reference
5842 * to itself, either directly or indirectly.
5843 * Done somewhere else
5844 */
5845
5846 } else {
5847 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5849 ctxt->sax->error(ctxt->userData,
5850 "xmlParseEntityRef: expecting ';'\n");
5851 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005852 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005853 }
5854 xmlFree(name);
5855 }
5856 }
5857 return(ent);
5858}
5859
5860/**
5861 * xmlParseStringEntityRef:
5862 * @ctxt: an XML parser context
5863 * @str: a pointer to an index in the string
5864 *
5865 * parse ENTITY references declarations, but this version parses it from
5866 * a string value.
5867 *
5868 * [68] EntityRef ::= '&' Name ';'
5869 *
5870 * [ WFC: Entity Declared ]
5871 * In a document without any DTD, a document with only an internal DTD
5872 * subset which contains no parameter entity references, or a document
5873 * with "standalone='yes'", the Name given in the entity reference
5874 * must match that in an entity declaration, except that well-formed
5875 * documents need not declare any of the following entities: amp, lt,
5876 * gt, apos, quot. The declaration of a parameter entity must precede
5877 * any reference to it. Similarly, the declaration of a general entity
5878 * must precede any reference to it which appears in a default value in an
5879 * attribute-list declaration. Note that if entities are declared in the
5880 * external subset or in external parameter entities, a non-validating
5881 * processor is not obligated to read and process their declarations;
5882 * for such documents, the rule that an entity must be declared is a
5883 * well-formedness constraint only if standalone='yes'.
5884 *
5885 * [ WFC: Parsed Entity ]
5886 * An entity reference must not contain the name of an unparsed entity
5887 *
5888 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5889 * is updated to the current location in the string.
5890 */
5891xmlEntityPtr
5892xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5893 xmlChar *name;
5894 const xmlChar *ptr;
5895 xmlChar cur;
5896 xmlEntityPtr ent = NULL;
5897
5898 if ((str == NULL) || (*str == NULL))
5899 return(NULL);
5900 ptr = *str;
5901 cur = *ptr;
5902 if (cur == '&') {
5903 ptr++;
5904 cur = *ptr;
5905 name = xmlParseStringName(ctxt, &ptr);
5906 if (name == NULL) {
5907 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5909 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005910 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005911 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005912 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005913 } else {
5914 if (*ptr == ';') {
5915 ptr++;
5916 /*
5917 * Ask first SAX for entity resolution, otherwise try the
5918 * predefined set.
5919 */
5920 if (ctxt->sax != NULL) {
5921 if (ctxt->sax->getEntity != NULL)
5922 ent = ctxt->sax->getEntity(ctxt->userData, name);
5923 if (ent == NULL)
5924 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005925 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5926 ent = getEntity(ctxt, name);
5927 }
Owen Taylor3473f882001-02-23 17:55:21 +00005928 }
5929 /*
5930 * [ WFC: Entity Declared ]
5931 * In a document without any DTD, a document with only an
5932 * internal DTD subset which contains no parameter entity
5933 * references, or a document with "standalone='yes'", the
5934 * Name given in the entity reference must match that in an
5935 * entity declaration, except that well-formed documents
5936 * need not declare any of the following entities: amp, lt,
5937 * gt, apos, quot.
5938 * The declaration of a parameter entity must precede any
5939 * reference to it.
5940 * Similarly, the declaration of a general entity must
5941 * precede any reference to it which appears in a default
5942 * value in an attribute-list declaration. Note that if
5943 * entities are declared in the external subset or in
5944 * external parameter entities, a non-validating processor
5945 * is not obligated to read and process their declarations;
5946 * for such documents, the rule that an entity must be
5947 * declared is a well-formedness constraint only if
5948 * standalone='yes'.
5949 */
5950 if (ent == NULL) {
5951 if ((ctxt->standalone == 1) ||
5952 ((ctxt->hasExternalSubset == 0) &&
5953 (ctxt->hasPErefs == 0))) {
5954 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5955 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5956 ctxt->sax->error(ctxt->userData,
5957 "Entity '%s' not defined\n", name);
5958 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005959 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005960 } else {
5961 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5962 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5963 ctxt->sax->warning(ctxt->userData,
5964 "Entity '%s' not defined\n", name);
5965 }
5966 }
5967
5968 /*
5969 * [ WFC: Parsed Entity ]
5970 * An entity reference must not contain the name of an
5971 * unparsed entity
5972 */
5973 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5974 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5976 ctxt->sax->error(ctxt->userData,
5977 "Entity reference to unparsed entity %s\n", name);
5978 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005979 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005980 }
5981
5982 /*
5983 * [ WFC: No External Entity References ]
5984 * Attribute values cannot contain direct or indirect
5985 * entity references to external entities.
5986 */
5987 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5988 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5989 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5991 ctxt->sax->error(ctxt->userData,
5992 "Attribute references external entity '%s'\n", name);
5993 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005994 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005995 }
5996 /*
5997 * [ WFC: No < in Attribute Values ]
5998 * The replacement text of any entity referred to directly or
5999 * indirectly in an attribute value (other than "&lt;") must
6000 * not contain a <.
6001 */
6002 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6003 (ent != NULL) &&
6004 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6005 (ent->content != NULL) &&
6006 (xmlStrchr(ent->content, '<'))) {
6007 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6009 ctxt->sax->error(ctxt->userData,
6010 "'<' in entity '%s' is not allowed in attributes values\n", name);
6011 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006012 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006013 }
6014
6015 /*
6016 * Internal check, no parameter entities here ...
6017 */
6018 else {
6019 switch (ent->etype) {
6020 case XML_INTERNAL_PARAMETER_ENTITY:
6021 case XML_EXTERNAL_PARAMETER_ENTITY:
6022 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6024 ctxt->sax->error(ctxt->userData,
6025 "Attempt to reference the parameter entity '%s'\n", name);
6026 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006027 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006028 break;
6029 default:
6030 break;
6031 }
6032 }
6033
6034 /*
6035 * [ WFC: No Recursion ]
6036 * A parsed entity must not contain a recursive reference
6037 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006038 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006039 */
6040
6041 } else {
6042 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6044 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006045 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006046 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006047 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006048 }
6049 xmlFree(name);
6050 }
6051 }
6052 *str = ptr;
6053 return(ent);
6054}
6055
6056/**
6057 * xmlParsePEReference:
6058 * @ctxt: an XML parser context
6059 *
6060 * parse PEReference declarations
6061 * The entity content is handled directly by pushing it's content as
6062 * a new input stream.
6063 *
6064 * [69] PEReference ::= '%' Name ';'
6065 *
6066 * [ WFC: No Recursion ]
6067 * A parsed entity must not contain a recursive
6068 * reference to itself, either directly or indirectly.
6069 *
6070 * [ WFC: Entity Declared ]
6071 * In a document without any DTD, a document with only an internal DTD
6072 * subset which contains no parameter entity references, or a document
6073 * with "standalone='yes'", ... ... The declaration of a parameter
6074 * entity must precede any reference to it...
6075 *
6076 * [ VC: Entity Declared ]
6077 * In a document with an external subset or external parameter entities
6078 * with "standalone='no'", ... ... The declaration of a parameter entity
6079 * must precede any reference to it...
6080 *
6081 * [ WFC: In DTD ]
6082 * Parameter-entity references may only appear in the DTD.
6083 * NOTE: misleading but this is handled.
6084 */
6085void
6086xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6087 xmlChar *name;
6088 xmlEntityPtr entity = NULL;
6089 xmlParserInputPtr input;
6090
6091 if (RAW == '%') {
6092 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006093 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006094 if (name == NULL) {
6095 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6097 ctxt->sax->error(ctxt->userData,
6098 "xmlParsePEReference: no name\n");
6099 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006100 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006101 } else {
6102 if (RAW == ';') {
6103 NEXT;
6104 if ((ctxt->sax != NULL) &&
6105 (ctxt->sax->getParameterEntity != NULL))
6106 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6107 name);
6108 if (entity == NULL) {
6109 /*
6110 * [ WFC: Entity Declared ]
6111 * In a document without any DTD, a document with only an
6112 * internal DTD subset which contains no parameter entity
6113 * references, or a document with "standalone='yes'", ...
6114 * ... The declaration of a parameter entity must precede
6115 * any reference to it...
6116 */
6117 if ((ctxt->standalone == 1) ||
6118 ((ctxt->hasExternalSubset == 0) &&
6119 (ctxt->hasPErefs == 0))) {
6120 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6121 if ((!ctxt->disableSAX) &&
6122 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6123 ctxt->sax->error(ctxt->userData,
6124 "PEReference: %%%s; not found\n", name);
6125 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006126 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006127 } else {
6128 /*
6129 * [ VC: Entity Declared ]
6130 * In a document with an external subset or external
6131 * parameter entities with "standalone='no'", ...
6132 * ... The declaration of a parameter entity must precede
6133 * any reference to it...
6134 */
6135 if ((!ctxt->disableSAX) &&
6136 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6137 ctxt->sax->warning(ctxt->userData,
6138 "PEReference: %%%s; not found\n", name);
6139 ctxt->valid = 0;
6140 }
6141 } else {
6142 /*
6143 * Internal checking in case the entity quest barfed
6144 */
6145 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6146 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6147 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6148 ctxt->sax->warning(ctxt->userData,
6149 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006150 } else if (ctxt->input->free != deallocblankswrapper) {
6151 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6152 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006153 } else {
6154 /*
6155 * TODO !!!
6156 * handle the extra spaces added before and after
6157 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6158 */
6159 input = xmlNewEntityInputStream(ctxt, entity);
6160 xmlPushInput(ctxt, input);
6161 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6162 (RAW == '<') && (NXT(1) == '?') &&
6163 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6164 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6165 xmlParseTextDecl(ctxt);
6166 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6167 /*
6168 * The XML REC instructs us to stop parsing
6169 * right here
6170 */
6171 ctxt->instate = XML_PARSER_EOF;
6172 xmlFree(name);
6173 return;
6174 }
6175 }
Owen Taylor3473f882001-02-23 17:55:21 +00006176 }
6177 }
6178 ctxt->hasPErefs = 1;
6179 } else {
6180 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6181 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6182 ctxt->sax->error(ctxt->userData,
6183 "xmlParsePEReference: expecting ';'\n");
6184 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006185 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006186 }
6187 xmlFree(name);
6188 }
6189 }
6190}
6191
6192/**
6193 * xmlParseStringPEReference:
6194 * @ctxt: an XML parser context
6195 * @str: a pointer to an index in the string
6196 *
6197 * parse PEReference declarations
6198 *
6199 * [69] PEReference ::= '%' Name ';'
6200 *
6201 * [ WFC: No Recursion ]
6202 * A parsed entity must not contain a recursive
6203 * reference to itself, either directly or indirectly.
6204 *
6205 * [ WFC: Entity Declared ]
6206 * In a document without any DTD, a document with only an internal DTD
6207 * subset which contains no parameter entity references, or a document
6208 * with "standalone='yes'", ... ... The declaration of a parameter
6209 * entity must precede any reference to it...
6210 *
6211 * [ VC: Entity Declared ]
6212 * In a document with an external subset or external parameter entities
6213 * with "standalone='no'", ... ... The declaration of a parameter entity
6214 * must precede any reference to it...
6215 *
6216 * [ WFC: In DTD ]
6217 * Parameter-entity references may only appear in the DTD.
6218 * NOTE: misleading but this is handled.
6219 *
6220 * Returns the string of the entity content.
6221 * str is updated to the current value of the index
6222 */
6223xmlEntityPtr
6224xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6225 const xmlChar *ptr;
6226 xmlChar cur;
6227 xmlChar *name;
6228 xmlEntityPtr entity = NULL;
6229
6230 if ((str == NULL) || (*str == NULL)) return(NULL);
6231 ptr = *str;
6232 cur = *ptr;
6233 if (cur == '%') {
6234 ptr++;
6235 cur = *ptr;
6236 name = xmlParseStringName(ctxt, &ptr);
6237 if (name == NULL) {
6238 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6239 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6240 ctxt->sax->error(ctxt->userData,
6241 "xmlParseStringPEReference: no name\n");
6242 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006243 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006244 } else {
6245 cur = *ptr;
6246 if (cur == ';') {
6247 ptr++;
6248 cur = *ptr;
6249 if ((ctxt->sax != NULL) &&
6250 (ctxt->sax->getParameterEntity != NULL))
6251 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6252 name);
6253 if (entity == NULL) {
6254 /*
6255 * [ WFC: Entity Declared ]
6256 * In a document without any DTD, a document with only an
6257 * internal DTD subset which contains no parameter entity
6258 * references, or a document with "standalone='yes'", ...
6259 * ... The declaration of a parameter entity must precede
6260 * any reference to it...
6261 */
6262 if ((ctxt->standalone == 1) ||
6263 ((ctxt->hasExternalSubset == 0) &&
6264 (ctxt->hasPErefs == 0))) {
6265 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6267 ctxt->sax->error(ctxt->userData,
6268 "PEReference: %%%s; not found\n", name);
6269 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006270 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006271 } else {
6272 /*
6273 * [ VC: Entity Declared ]
6274 * In a document with an external subset or external
6275 * parameter entities with "standalone='no'", ...
6276 * ... The declaration of a parameter entity must
6277 * precede any reference to it...
6278 */
6279 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6280 ctxt->sax->warning(ctxt->userData,
6281 "PEReference: %%%s; not found\n", name);
6282 ctxt->valid = 0;
6283 }
6284 } else {
6285 /*
6286 * Internal checking in case the entity quest barfed
6287 */
6288 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6289 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6290 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6291 ctxt->sax->warning(ctxt->userData,
6292 "Internal: %%%s; is not a parameter entity\n", name);
6293 }
6294 }
6295 ctxt->hasPErefs = 1;
6296 } else {
6297 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6298 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6299 ctxt->sax->error(ctxt->userData,
6300 "xmlParseStringPEReference: expecting ';'\n");
6301 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006302 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006303 }
6304 xmlFree(name);
6305 }
6306 }
6307 *str = ptr;
6308 return(entity);
6309}
6310
6311/**
6312 * xmlParseDocTypeDecl:
6313 * @ctxt: an XML parser context
6314 *
6315 * parse a DOCTYPE declaration
6316 *
6317 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6318 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6319 *
6320 * [ VC: Root Element Type ]
6321 * The Name in the document type declaration must match the element
6322 * type of the root element.
6323 */
6324
6325void
6326xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6327 xmlChar *name = NULL;
6328 xmlChar *ExternalID = NULL;
6329 xmlChar *URI = NULL;
6330
6331 /*
6332 * We know that '<!DOCTYPE' has been detected.
6333 */
6334 SKIP(9);
6335
6336 SKIP_BLANKS;
6337
6338 /*
6339 * Parse the DOCTYPE name.
6340 */
6341 name = xmlParseName(ctxt);
6342 if (name == NULL) {
6343 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6344 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6345 ctxt->sax->error(ctxt->userData,
6346 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6347 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006348 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006349 }
6350 ctxt->intSubName = name;
6351
6352 SKIP_BLANKS;
6353
6354 /*
6355 * Check for SystemID and ExternalID
6356 */
6357 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6358
6359 if ((URI != NULL) || (ExternalID != NULL)) {
6360 ctxt->hasExternalSubset = 1;
6361 }
6362 ctxt->extSubURI = URI;
6363 ctxt->extSubSystem = ExternalID;
6364
6365 SKIP_BLANKS;
6366
6367 /*
6368 * Create and update the internal subset.
6369 */
6370 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6371 (!ctxt->disableSAX))
6372 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6373
6374 /*
6375 * Is there any internal subset declarations ?
6376 * they are handled separately in xmlParseInternalSubset()
6377 */
6378 if (RAW == '[')
6379 return;
6380
6381 /*
6382 * We should be at the end of the DOCTYPE declaration.
6383 */
6384 if (RAW != '>') {
6385 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6386 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006387 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006388 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006389 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006390 }
6391 NEXT;
6392}
6393
6394/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006395 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006396 * @ctxt: an XML parser context
6397 *
6398 * parse the internal subset declaration
6399 *
6400 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6401 */
6402
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006403static void
Owen Taylor3473f882001-02-23 17:55:21 +00006404xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6405 /*
6406 * Is there any DTD definition ?
6407 */
6408 if (RAW == '[') {
6409 ctxt->instate = XML_PARSER_DTD;
6410 NEXT;
6411 /*
6412 * Parse the succession of Markup declarations and
6413 * PEReferences.
6414 * Subsequence (markupdecl | PEReference | S)*
6415 */
6416 while (RAW != ']') {
6417 const xmlChar *check = CUR_PTR;
6418 int cons = ctxt->input->consumed;
6419
6420 SKIP_BLANKS;
6421 xmlParseMarkupDecl(ctxt);
6422 xmlParsePEReference(ctxt);
6423
6424 /*
6425 * Pop-up of finished entities.
6426 */
6427 while ((RAW == 0) && (ctxt->inputNr > 1))
6428 xmlPopInput(ctxt);
6429
6430 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6431 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6432 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6433 ctxt->sax->error(ctxt->userData,
6434 "xmlParseInternalSubset: error detected in Markup declaration\n");
6435 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006436 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006437 break;
6438 }
6439 }
6440 if (RAW == ']') {
6441 NEXT;
6442 SKIP_BLANKS;
6443 }
6444 }
6445
6446 /*
6447 * We should be at the end of the DOCTYPE declaration.
6448 */
6449 if (RAW != '>') {
6450 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6451 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006452 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006453 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006454 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006455 }
6456 NEXT;
6457}
6458
6459/**
6460 * xmlParseAttribute:
6461 * @ctxt: an XML parser context
6462 * @value: a xmlChar ** used to store the value of the attribute
6463 *
6464 * parse an attribute
6465 *
6466 * [41] Attribute ::= Name Eq AttValue
6467 *
6468 * [ WFC: No External Entity References ]
6469 * Attribute values cannot contain direct or indirect entity references
6470 * to external entities.
6471 *
6472 * [ WFC: No < in Attribute Values ]
6473 * The replacement text of any entity referred to directly or indirectly in
6474 * an attribute value (other than "&lt;") must not contain a <.
6475 *
6476 * [ VC: Attribute Value Type ]
6477 * The attribute must have been declared; the value must be of the type
6478 * declared for it.
6479 *
6480 * [25] Eq ::= S? '=' S?
6481 *
6482 * With namespace:
6483 *
6484 * [NS 11] Attribute ::= QName Eq AttValue
6485 *
6486 * Also the case QName == xmlns:??? is handled independently as a namespace
6487 * definition.
6488 *
6489 * Returns the attribute name, and the value in *value.
6490 */
6491
6492xmlChar *
6493xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6494 xmlChar *name, *val;
6495
6496 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006497 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006498 name = xmlParseName(ctxt);
6499 if (name == NULL) {
6500 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6501 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6502 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6503 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006504 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006505 return(NULL);
6506 }
6507
6508 /*
6509 * read the value
6510 */
6511 SKIP_BLANKS;
6512 if (RAW == '=') {
6513 NEXT;
6514 SKIP_BLANKS;
6515 val = xmlParseAttValue(ctxt);
6516 ctxt->instate = XML_PARSER_CONTENT;
6517 } else {
6518 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6520 ctxt->sax->error(ctxt->userData,
6521 "Specification mandate value for attribute %s\n", name);
6522 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006523 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006524 xmlFree(name);
6525 return(NULL);
6526 }
6527
6528 /*
6529 * Check that xml:lang conforms to the specification
6530 * No more registered as an error, just generate a warning now
6531 * since this was deprecated in XML second edition
6532 */
6533 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6534 if (!xmlCheckLanguageID(val)) {
6535 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6536 ctxt->sax->warning(ctxt->userData,
6537 "Malformed value for xml:lang : %s\n", val);
6538 }
6539 }
6540
6541 /*
6542 * Check that xml:space conforms to the specification
6543 */
6544 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6545 if (xmlStrEqual(val, BAD_CAST "default"))
6546 *(ctxt->space) = 0;
6547 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6548 *(ctxt->space) = 1;
6549 else {
6550 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6552 ctxt->sax->error(ctxt->userData,
6553"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6554 val);
6555 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006556 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006557 }
6558 }
6559
6560 *value = val;
6561 return(name);
6562}
6563
6564/**
6565 * xmlParseStartTag:
6566 * @ctxt: an XML parser context
6567 *
6568 * parse a start of tag either for rule element or
6569 * EmptyElement. In both case we don't parse the tag closing chars.
6570 *
6571 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6572 *
6573 * [ WFC: Unique Att Spec ]
6574 * No attribute name may appear more than once in the same start-tag or
6575 * empty-element tag.
6576 *
6577 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6578 *
6579 * [ WFC: Unique Att Spec ]
6580 * No attribute name may appear more than once in the same start-tag or
6581 * empty-element tag.
6582 *
6583 * With namespace:
6584 *
6585 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6586 *
6587 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6588 *
6589 * Returns the element name parsed
6590 */
6591
6592xmlChar *
6593xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6594 xmlChar *name;
6595 xmlChar *attname;
6596 xmlChar *attvalue;
6597 const xmlChar **atts = NULL;
6598 int nbatts = 0;
6599 int maxatts = 0;
6600 int i;
6601
6602 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006603 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006604
6605 name = xmlParseName(ctxt);
6606 if (name == NULL) {
6607 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6609 ctxt->sax->error(ctxt->userData,
6610 "xmlParseStartTag: invalid element name\n");
6611 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006612 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006613 return(NULL);
6614 }
6615
6616 /*
6617 * Now parse the attributes, it ends up with the ending
6618 *
6619 * (S Attribute)* S?
6620 */
6621 SKIP_BLANKS;
6622 GROW;
6623
Daniel Veillard21a0f912001-02-25 19:54:14 +00006624 while ((RAW != '>') &&
6625 ((RAW != '/') || (NXT(1) != '>')) &&
6626 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006627 const xmlChar *q = CUR_PTR;
6628 int cons = ctxt->input->consumed;
6629
6630 attname = xmlParseAttribute(ctxt, &attvalue);
6631 if ((attname != NULL) && (attvalue != NULL)) {
6632 /*
6633 * [ WFC: Unique Att Spec ]
6634 * No attribute name may appear more than once in the same
6635 * start-tag or empty-element tag.
6636 */
6637 for (i = 0; i < nbatts;i += 2) {
6638 if (xmlStrEqual(atts[i], attname)) {
6639 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6641 ctxt->sax->error(ctxt->userData,
6642 "Attribute %s redefined\n",
6643 attname);
6644 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006645 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006646 xmlFree(attname);
6647 xmlFree(attvalue);
6648 goto failed;
6649 }
6650 }
6651
6652 /*
6653 * Add the pair to atts
6654 */
6655 if (atts == NULL) {
6656 maxatts = 10;
6657 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6658 if (atts == NULL) {
6659 xmlGenericError(xmlGenericErrorContext,
6660 "malloc of %ld byte failed\n",
6661 maxatts * (long)sizeof(xmlChar *));
6662 return(NULL);
6663 }
6664 } else if (nbatts + 4 > maxatts) {
6665 maxatts *= 2;
6666 atts = (const xmlChar **) xmlRealloc((void *) atts,
6667 maxatts * sizeof(xmlChar *));
6668 if (atts == NULL) {
6669 xmlGenericError(xmlGenericErrorContext,
6670 "realloc of %ld byte failed\n",
6671 maxatts * (long)sizeof(xmlChar *));
6672 return(NULL);
6673 }
6674 }
6675 atts[nbatts++] = attname;
6676 atts[nbatts++] = attvalue;
6677 atts[nbatts] = NULL;
6678 atts[nbatts + 1] = NULL;
6679 } else {
6680 if (attname != NULL)
6681 xmlFree(attname);
6682 if (attvalue != NULL)
6683 xmlFree(attvalue);
6684 }
6685
6686failed:
6687
6688 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6689 break;
6690 if (!IS_BLANK(RAW)) {
6691 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6693 ctxt->sax->error(ctxt->userData,
6694 "attributes construct error\n");
6695 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006696 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006697 }
6698 SKIP_BLANKS;
6699 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6700 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6701 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6702 ctxt->sax->error(ctxt->userData,
6703 "xmlParseStartTag: problem parsing attributes\n");
6704 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006705 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006706 break;
6707 }
6708 GROW;
6709 }
6710
6711 /*
6712 * SAX: Start of Element !
6713 */
6714 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6715 (!ctxt->disableSAX))
6716 ctxt->sax->startElement(ctxt->userData, name, atts);
6717
6718 if (atts != NULL) {
6719 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6720 xmlFree((void *) atts);
6721 }
6722 return(name);
6723}
6724
6725/**
6726 * xmlParseEndTag:
6727 * @ctxt: an XML parser context
6728 *
6729 * parse an end of tag
6730 *
6731 * [42] ETag ::= '</' Name S? '>'
6732 *
6733 * With namespace
6734 *
6735 * [NS 9] ETag ::= '</' QName S? '>'
6736 */
6737
6738void
6739xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6740 xmlChar *name;
6741 xmlChar *oldname;
6742
6743 GROW;
6744 if ((RAW != '<') || (NXT(1) != '/')) {
6745 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6747 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6748 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006749 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006750 return;
6751 }
6752 SKIP(2);
6753
Daniel Veillard46de64e2002-05-29 08:21:33 +00006754 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006755
6756 /*
6757 * We should definitely be at the ending "S? '>'" part
6758 */
6759 GROW;
6760 SKIP_BLANKS;
6761 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6762 ctxt->errNo = XML_ERR_GT_REQUIRED;
6763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6764 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6765 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006766 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006767 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006768 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006769
6770 /*
6771 * [ WFC: Element Type Match ]
6772 * The Name in an element's end-tag must match the element type in the
6773 * start-tag.
6774 *
6775 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006776 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006777 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006779 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006780 ctxt->sax->error(ctxt->userData,
6781 "Opening and ending tag mismatch: %s and %s\n",
6782 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006783 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006784 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006785 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006786 }
6787
6788 }
6789 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006790 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6791#if 0
6792 else {
6793 /*
6794 * Recover in case of one missing close
6795 */
6796 if ((ctxt->nameNr > 2) &&
6797 (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) {
6798 namePop(ctxt);
6799 spacePop(ctxt);
6800 }
6801 }
6802#endif
6803 if (name != NULL)
6804 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006805 }
6806
6807 /*
6808 * SAX: End of Tag
6809 */
6810 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6811 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006812 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006813
Owen Taylor3473f882001-02-23 17:55:21 +00006814 oldname = namePop(ctxt);
6815 spacePop(ctxt);
6816 if (oldname != NULL) {
6817#ifdef DEBUG_STACK
6818 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6819#endif
6820 xmlFree(oldname);
6821 }
6822 return;
6823}
6824
6825/**
6826 * xmlParseCDSect:
6827 * @ctxt: an XML parser context
6828 *
6829 * Parse escaped pure raw content.
6830 *
6831 * [18] CDSect ::= CDStart CData CDEnd
6832 *
6833 * [19] CDStart ::= '<![CDATA['
6834 *
6835 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6836 *
6837 * [21] CDEnd ::= ']]>'
6838 */
6839void
6840xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6841 xmlChar *buf = NULL;
6842 int len = 0;
6843 int size = XML_PARSER_BUFFER_SIZE;
6844 int r, rl;
6845 int s, sl;
6846 int cur, l;
6847 int count = 0;
6848
6849 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6850 (NXT(2) == '[') && (NXT(3) == 'C') &&
6851 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6852 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6853 (NXT(8) == '[')) {
6854 SKIP(9);
6855 } else
6856 return;
6857
6858 ctxt->instate = XML_PARSER_CDATA_SECTION;
6859 r = CUR_CHAR(rl);
6860 if (!IS_CHAR(r)) {
6861 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6863 ctxt->sax->error(ctxt->userData,
6864 "CData section not finished\n");
6865 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006866 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006867 ctxt->instate = XML_PARSER_CONTENT;
6868 return;
6869 }
6870 NEXTL(rl);
6871 s = CUR_CHAR(sl);
6872 if (!IS_CHAR(s)) {
6873 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6875 ctxt->sax->error(ctxt->userData,
6876 "CData section not finished\n");
6877 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006878 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006879 ctxt->instate = XML_PARSER_CONTENT;
6880 return;
6881 }
6882 NEXTL(sl);
6883 cur = CUR_CHAR(l);
6884 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6885 if (buf == NULL) {
6886 xmlGenericError(xmlGenericErrorContext,
6887 "malloc of %d byte failed\n", size);
6888 return;
6889 }
6890 while (IS_CHAR(cur) &&
6891 ((r != ']') || (s != ']') || (cur != '>'))) {
6892 if (len + 5 >= size) {
6893 size *= 2;
6894 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6895 if (buf == NULL) {
6896 xmlGenericError(xmlGenericErrorContext,
6897 "realloc of %d byte failed\n", size);
6898 return;
6899 }
6900 }
6901 COPY_BUF(rl,buf,len,r);
6902 r = s;
6903 rl = sl;
6904 s = cur;
6905 sl = l;
6906 count++;
6907 if (count > 50) {
6908 GROW;
6909 count = 0;
6910 }
6911 NEXTL(l);
6912 cur = CUR_CHAR(l);
6913 }
6914 buf[len] = 0;
6915 ctxt->instate = XML_PARSER_CONTENT;
6916 if (cur != '>') {
6917 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6919 ctxt->sax->error(ctxt->userData,
6920 "CData section not finished\n%.50s\n", buf);
6921 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006922 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006923 xmlFree(buf);
6924 return;
6925 }
6926 NEXTL(l);
6927
6928 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006929 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006930 */
6931 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6932 if (ctxt->sax->cdataBlock != NULL)
6933 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006934 else if (ctxt->sax->characters != NULL)
6935 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006936 }
6937 xmlFree(buf);
6938}
6939
6940/**
6941 * xmlParseContent:
6942 * @ctxt: an XML parser context
6943 *
6944 * Parse a content:
6945 *
6946 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6947 */
6948
6949void
6950xmlParseContent(xmlParserCtxtPtr ctxt) {
6951 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006952 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006953 ((RAW != '<') || (NXT(1) != '/'))) {
6954 const xmlChar *test = CUR_PTR;
6955 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006956 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006957
6958 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006959 * First case : a Processing Instruction.
6960 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006961 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006962 xmlParsePI(ctxt);
6963 }
6964
6965 /*
6966 * Second case : a CDSection
6967 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006968 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006969 (NXT(2) == '[') && (NXT(3) == 'C') &&
6970 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6971 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6972 (NXT(8) == '[')) {
6973 xmlParseCDSect(ctxt);
6974 }
6975
6976 /*
6977 * Third case : a comment
6978 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006979 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006980 (NXT(2) == '-') && (NXT(3) == '-')) {
6981 xmlParseComment(ctxt);
6982 ctxt->instate = XML_PARSER_CONTENT;
6983 }
6984
6985 /*
6986 * Fourth case : a sub-element.
6987 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006988 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006989 xmlParseElement(ctxt);
6990 }
6991
6992 /*
6993 * Fifth case : a reference. If if has not been resolved,
6994 * parsing returns it's Name, create the node
6995 */
6996
Daniel Veillard21a0f912001-02-25 19:54:14 +00006997 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006998 xmlParseReference(ctxt);
6999 }
7000
7001 /*
7002 * Last case, text. Note that References are handled directly.
7003 */
7004 else {
7005 xmlParseCharData(ctxt, 0);
7006 }
7007
7008 GROW;
7009 /*
7010 * Pop-up of finished entities.
7011 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007012 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007013 xmlPopInput(ctxt);
7014 SHRINK;
7015
Daniel Veillardfdc91562002-07-01 21:52:03 +00007016 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007017 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7019 ctxt->sax->error(ctxt->userData,
7020 "detected an error in element content\n");
7021 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007022 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007023 ctxt->instate = XML_PARSER_EOF;
7024 break;
7025 }
7026 }
7027}
7028
7029/**
7030 * xmlParseElement:
7031 * @ctxt: an XML parser context
7032 *
7033 * parse an XML element, this is highly recursive
7034 *
7035 * [39] element ::= EmptyElemTag | STag content ETag
7036 *
7037 * [ WFC: Element Type Match ]
7038 * The Name in an element's end-tag must match the element type in the
7039 * start-tag.
7040 *
7041 * [ VC: Element Valid ]
7042 * An element is valid if there is a declaration matching elementdecl
7043 * where the Name matches the element type and one of the following holds:
7044 * - The declaration matches EMPTY and the element has no content.
7045 * - The declaration matches children and the sequence of child elements
7046 * belongs to the language generated by the regular expression in the
7047 * content model, with optional white space (characters matching the
7048 * nonterminal S) between each pair of child elements.
7049 * - The declaration matches Mixed and the content consists of character
7050 * data and child elements whose types match names in the content model.
7051 * - The declaration matches ANY, and the types of any child elements have
7052 * been declared.
7053 */
7054
7055void
7056xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007057 xmlChar *name;
7058 xmlChar *oldname;
7059 xmlParserNodeInfo node_info;
7060 xmlNodePtr ret;
7061
7062 /* Capture start position */
7063 if (ctxt->record_info) {
7064 node_info.begin_pos = ctxt->input->consumed +
7065 (CUR_PTR - ctxt->input->base);
7066 node_info.begin_line = ctxt->input->line;
7067 }
7068
7069 if (ctxt->spaceNr == 0)
7070 spacePush(ctxt, -1);
7071 else
7072 spacePush(ctxt, *ctxt->space);
7073
7074 name = xmlParseStartTag(ctxt);
7075 if (name == NULL) {
7076 spacePop(ctxt);
7077 return;
7078 }
7079 namePush(ctxt, name);
7080 ret = ctxt->node;
7081
7082 /*
7083 * [ VC: Root Element Type ]
7084 * The Name in the document type declaration must match the element
7085 * type of the root element.
7086 */
7087 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7088 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7089 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7090
7091 /*
7092 * Check for an Empty Element.
7093 */
7094 if ((RAW == '/') && (NXT(1) == '>')) {
7095 SKIP(2);
7096 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7097 (!ctxt->disableSAX))
7098 ctxt->sax->endElement(ctxt->userData, name);
7099 oldname = namePop(ctxt);
7100 spacePop(ctxt);
7101 if (oldname != NULL) {
7102#ifdef DEBUG_STACK
7103 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7104#endif
7105 xmlFree(oldname);
7106 }
7107 if ( ret != NULL && ctxt->record_info ) {
7108 node_info.end_pos = ctxt->input->consumed +
7109 (CUR_PTR - ctxt->input->base);
7110 node_info.end_line = ctxt->input->line;
7111 node_info.node = ret;
7112 xmlParserAddNodeInfo(ctxt, &node_info);
7113 }
7114 return;
7115 }
7116 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007117 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007118 } else {
7119 ctxt->errNo = XML_ERR_GT_REQUIRED;
7120 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7121 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007122 "Couldn't find end of Start Tag %s\n",
7123 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007124 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007125 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007126
7127 /*
7128 * end of parsing of this node.
7129 */
7130 nodePop(ctxt);
7131 oldname = namePop(ctxt);
7132 spacePop(ctxt);
7133 if (oldname != NULL) {
7134#ifdef DEBUG_STACK
7135 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7136#endif
7137 xmlFree(oldname);
7138 }
7139
7140 /*
7141 * Capture end position and add node
7142 */
7143 if ( ret != NULL && ctxt->record_info ) {
7144 node_info.end_pos = ctxt->input->consumed +
7145 (CUR_PTR - ctxt->input->base);
7146 node_info.end_line = ctxt->input->line;
7147 node_info.node = ret;
7148 xmlParserAddNodeInfo(ctxt, &node_info);
7149 }
7150 return;
7151 }
7152
7153 /*
7154 * Parse the content of the element:
7155 */
7156 xmlParseContent(ctxt);
7157 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007158 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007159 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7160 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007161 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007162 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007163 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007164
7165 /*
7166 * end of parsing of this node.
7167 */
7168 nodePop(ctxt);
7169 oldname = namePop(ctxt);
7170 spacePop(ctxt);
7171 if (oldname != NULL) {
7172#ifdef DEBUG_STACK
7173 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7174#endif
7175 xmlFree(oldname);
7176 }
7177 return;
7178 }
7179
7180 /*
7181 * parse the end of tag: '</' should be here.
7182 */
7183 xmlParseEndTag(ctxt);
7184
7185 /*
7186 * Capture end position and add node
7187 */
7188 if ( ret != NULL && ctxt->record_info ) {
7189 node_info.end_pos = ctxt->input->consumed +
7190 (CUR_PTR - ctxt->input->base);
7191 node_info.end_line = ctxt->input->line;
7192 node_info.node = ret;
7193 xmlParserAddNodeInfo(ctxt, &node_info);
7194 }
7195}
7196
7197/**
7198 * xmlParseVersionNum:
7199 * @ctxt: an XML parser context
7200 *
7201 * parse the XML version value.
7202 *
7203 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7204 *
7205 * Returns the string giving the XML version number, or NULL
7206 */
7207xmlChar *
7208xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7209 xmlChar *buf = NULL;
7210 int len = 0;
7211 int size = 10;
7212 xmlChar cur;
7213
7214 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7215 if (buf == NULL) {
7216 xmlGenericError(xmlGenericErrorContext,
7217 "malloc of %d byte failed\n", size);
7218 return(NULL);
7219 }
7220 cur = CUR;
7221 while (((cur >= 'a') && (cur <= 'z')) ||
7222 ((cur >= 'A') && (cur <= 'Z')) ||
7223 ((cur >= '0') && (cur <= '9')) ||
7224 (cur == '_') || (cur == '.') ||
7225 (cur == ':') || (cur == '-')) {
7226 if (len + 1 >= size) {
7227 size *= 2;
7228 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7229 if (buf == NULL) {
7230 xmlGenericError(xmlGenericErrorContext,
7231 "realloc of %d byte failed\n", size);
7232 return(NULL);
7233 }
7234 }
7235 buf[len++] = cur;
7236 NEXT;
7237 cur=CUR;
7238 }
7239 buf[len] = 0;
7240 return(buf);
7241}
7242
7243/**
7244 * xmlParseVersionInfo:
7245 * @ctxt: an XML parser context
7246 *
7247 * parse the XML version.
7248 *
7249 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7250 *
7251 * [25] Eq ::= S? '=' S?
7252 *
7253 * Returns the version string, e.g. "1.0"
7254 */
7255
7256xmlChar *
7257xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7258 xmlChar *version = NULL;
7259 const xmlChar *q;
7260
7261 if ((RAW == 'v') && (NXT(1) == 'e') &&
7262 (NXT(2) == 'r') && (NXT(3) == 's') &&
7263 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7264 (NXT(6) == 'n')) {
7265 SKIP(7);
7266 SKIP_BLANKS;
7267 if (RAW != '=') {
7268 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7270 ctxt->sax->error(ctxt->userData,
7271 "xmlParseVersionInfo : expected '='\n");
7272 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007273 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007274 return(NULL);
7275 }
7276 NEXT;
7277 SKIP_BLANKS;
7278 if (RAW == '"') {
7279 NEXT;
7280 q = CUR_PTR;
7281 version = xmlParseVersionNum(ctxt);
7282 if (RAW != '"') {
7283 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7285 ctxt->sax->error(ctxt->userData,
7286 "String not closed\n%.50s\n", q);
7287 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007288 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007289 } else
7290 NEXT;
7291 } else if (RAW == '\''){
7292 NEXT;
7293 q = CUR_PTR;
7294 version = xmlParseVersionNum(ctxt);
7295 if (RAW != '\'') {
7296 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7298 ctxt->sax->error(ctxt->userData,
7299 "String not closed\n%.50s\n", q);
7300 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007301 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007302 } else
7303 NEXT;
7304 } else {
7305 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7307 ctxt->sax->error(ctxt->userData,
7308 "xmlParseVersionInfo : expected ' or \"\n");
7309 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007310 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007311 }
7312 }
7313 return(version);
7314}
7315
7316/**
7317 * xmlParseEncName:
7318 * @ctxt: an XML parser context
7319 *
7320 * parse the XML encoding name
7321 *
7322 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7323 *
7324 * Returns the encoding name value or NULL
7325 */
7326xmlChar *
7327xmlParseEncName(xmlParserCtxtPtr ctxt) {
7328 xmlChar *buf = NULL;
7329 int len = 0;
7330 int size = 10;
7331 xmlChar cur;
7332
7333 cur = CUR;
7334 if (((cur >= 'a') && (cur <= 'z')) ||
7335 ((cur >= 'A') && (cur <= 'Z'))) {
7336 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7337 if (buf == NULL) {
7338 xmlGenericError(xmlGenericErrorContext,
7339 "malloc of %d byte failed\n", size);
7340 return(NULL);
7341 }
7342
7343 buf[len++] = cur;
7344 NEXT;
7345 cur = CUR;
7346 while (((cur >= 'a') && (cur <= 'z')) ||
7347 ((cur >= 'A') && (cur <= 'Z')) ||
7348 ((cur >= '0') && (cur <= '9')) ||
7349 (cur == '.') || (cur == '_') ||
7350 (cur == '-')) {
7351 if (len + 1 >= size) {
7352 size *= 2;
7353 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7354 if (buf == NULL) {
7355 xmlGenericError(xmlGenericErrorContext,
7356 "realloc of %d byte failed\n", size);
7357 return(NULL);
7358 }
7359 }
7360 buf[len++] = cur;
7361 NEXT;
7362 cur = CUR;
7363 if (cur == 0) {
7364 SHRINK;
7365 GROW;
7366 cur = CUR;
7367 }
7368 }
7369 buf[len] = 0;
7370 } else {
7371 ctxt->errNo = XML_ERR_ENCODING_NAME;
7372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7373 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7374 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007375 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007376 }
7377 return(buf);
7378}
7379
7380/**
7381 * xmlParseEncodingDecl:
7382 * @ctxt: an XML parser context
7383 *
7384 * parse the XML encoding declaration
7385 *
7386 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7387 *
7388 * this setups the conversion filters.
7389 *
7390 * Returns the encoding value or NULL
7391 */
7392
7393xmlChar *
7394xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7395 xmlChar *encoding = NULL;
7396 const xmlChar *q;
7397
7398 SKIP_BLANKS;
7399 if ((RAW == 'e') && (NXT(1) == 'n') &&
7400 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7401 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7402 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7403 SKIP(8);
7404 SKIP_BLANKS;
7405 if (RAW != '=') {
7406 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7408 ctxt->sax->error(ctxt->userData,
7409 "xmlParseEncodingDecl : expected '='\n");
7410 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007411 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007412 return(NULL);
7413 }
7414 NEXT;
7415 SKIP_BLANKS;
7416 if (RAW == '"') {
7417 NEXT;
7418 q = CUR_PTR;
7419 encoding = xmlParseEncName(ctxt);
7420 if (RAW != '"') {
7421 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7423 ctxt->sax->error(ctxt->userData,
7424 "String not closed\n%.50s\n", q);
7425 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007426 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007427 } else
7428 NEXT;
7429 } else if (RAW == '\''){
7430 NEXT;
7431 q = CUR_PTR;
7432 encoding = xmlParseEncName(ctxt);
7433 if (RAW != '\'') {
7434 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7436 ctxt->sax->error(ctxt->userData,
7437 "String not closed\n%.50s\n", q);
7438 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007439 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007440 } else
7441 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007442 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007443 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7445 ctxt->sax->error(ctxt->userData,
7446 "xmlParseEncodingDecl : expected ' or \"\n");
7447 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007448 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007449 }
7450 if (encoding != NULL) {
7451 xmlCharEncoding enc;
7452 xmlCharEncodingHandlerPtr handler;
7453
7454 if (ctxt->input->encoding != NULL)
7455 xmlFree((xmlChar *) ctxt->input->encoding);
7456 ctxt->input->encoding = encoding;
7457
7458 enc = xmlParseCharEncoding((const char *) encoding);
7459 /*
7460 * registered set of known encodings
7461 */
7462 if (enc != XML_CHAR_ENCODING_ERROR) {
7463 xmlSwitchEncoding(ctxt, enc);
7464 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007465 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007466 xmlFree(encoding);
7467 return(NULL);
7468 }
7469 } else {
7470 /*
7471 * fallback for unknown encodings
7472 */
7473 handler = xmlFindCharEncodingHandler((const char *) encoding);
7474 if (handler != NULL) {
7475 xmlSwitchToEncoding(ctxt, handler);
7476 } else {
7477 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7479 ctxt->sax->error(ctxt->userData,
7480 "Unsupported encoding %s\n", encoding);
7481 return(NULL);
7482 }
7483 }
7484 }
7485 }
7486 return(encoding);
7487}
7488
7489/**
7490 * xmlParseSDDecl:
7491 * @ctxt: an XML parser context
7492 *
7493 * parse the XML standalone declaration
7494 *
7495 * [32] SDDecl ::= S 'standalone' Eq
7496 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7497 *
7498 * [ VC: Standalone Document Declaration ]
7499 * TODO The standalone document declaration must have the value "no"
7500 * if any external markup declarations contain declarations of:
7501 * - attributes with default values, if elements to which these
7502 * attributes apply appear in the document without specifications
7503 * of values for these attributes, or
7504 * - entities (other than amp, lt, gt, apos, quot), if references
7505 * to those entities appear in the document, or
7506 * - attributes with values subject to normalization, where the
7507 * attribute appears in the document with a value which will change
7508 * as a result of normalization, or
7509 * - element types with element content, if white space occurs directly
7510 * within any instance of those types.
7511 *
7512 * Returns 1 if standalone, 0 otherwise
7513 */
7514
7515int
7516xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7517 int standalone = -1;
7518
7519 SKIP_BLANKS;
7520 if ((RAW == 's') && (NXT(1) == 't') &&
7521 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7522 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7523 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7524 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7525 SKIP(10);
7526 SKIP_BLANKS;
7527 if (RAW != '=') {
7528 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7530 ctxt->sax->error(ctxt->userData,
7531 "XML standalone declaration : expected '='\n");
7532 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007533 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007534 return(standalone);
7535 }
7536 NEXT;
7537 SKIP_BLANKS;
7538 if (RAW == '\''){
7539 NEXT;
7540 if ((RAW == 'n') && (NXT(1) == 'o')) {
7541 standalone = 0;
7542 SKIP(2);
7543 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7544 (NXT(2) == 's')) {
7545 standalone = 1;
7546 SKIP(3);
7547 } else {
7548 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7550 ctxt->sax->error(ctxt->userData,
7551 "standalone accepts only 'yes' or 'no'\n");
7552 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007553 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007554 }
7555 if (RAW != '\'') {
7556 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7558 ctxt->sax->error(ctxt->userData, "String not closed\n");
7559 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007560 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007561 } else
7562 NEXT;
7563 } else if (RAW == '"'){
7564 NEXT;
7565 if ((RAW == 'n') && (NXT(1) == 'o')) {
7566 standalone = 0;
7567 SKIP(2);
7568 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7569 (NXT(2) == 's')) {
7570 standalone = 1;
7571 SKIP(3);
7572 } else {
7573 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7575 ctxt->sax->error(ctxt->userData,
7576 "standalone accepts only 'yes' or 'no'\n");
7577 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007578 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007579 }
7580 if (RAW != '"') {
7581 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7582 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7583 ctxt->sax->error(ctxt->userData, "String not closed\n");
7584 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007585 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007586 } else
7587 NEXT;
7588 } else {
7589 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7591 ctxt->sax->error(ctxt->userData,
7592 "Standalone value not found\n");
7593 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007594 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007595 }
7596 }
7597 return(standalone);
7598}
7599
7600/**
7601 * xmlParseXMLDecl:
7602 * @ctxt: an XML parser context
7603 *
7604 * parse an XML declaration header
7605 *
7606 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7607 */
7608
7609void
7610xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7611 xmlChar *version;
7612
7613 /*
7614 * We know that '<?xml' is here.
7615 */
7616 SKIP(5);
7617
7618 if (!IS_BLANK(RAW)) {
7619 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7621 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7622 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007623 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007624 }
7625 SKIP_BLANKS;
7626
7627 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007628 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007629 */
7630 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007631 if (version == NULL) {
7632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7633 ctxt->sax->error(ctxt->userData,
7634 "Malformed declaration expecting version\n");
7635 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007636 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007637 } else {
7638 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7639 /*
7640 * TODO: Blueberry should be detected here
7641 */
7642 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7643 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7644 version);
7645 }
7646 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007647 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007648 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007649 }
Owen Taylor3473f882001-02-23 17:55:21 +00007650
7651 /*
7652 * We may have the encoding declaration
7653 */
7654 if (!IS_BLANK(RAW)) {
7655 if ((RAW == '?') && (NXT(1) == '>')) {
7656 SKIP(2);
7657 return;
7658 }
7659 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7661 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7662 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007663 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007664 }
7665 xmlParseEncodingDecl(ctxt);
7666 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7667 /*
7668 * The XML REC instructs us to stop parsing right here
7669 */
7670 return;
7671 }
7672
7673 /*
7674 * We may have the standalone status.
7675 */
7676 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7677 if ((RAW == '?') && (NXT(1) == '>')) {
7678 SKIP(2);
7679 return;
7680 }
7681 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7682 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7683 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7684 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007685 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007686 }
7687 SKIP_BLANKS;
7688 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7689
7690 SKIP_BLANKS;
7691 if ((RAW == '?') && (NXT(1) == '>')) {
7692 SKIP(2);
7693 } else if (RAW == '>') {
7694 /* Deprecated old WD ... */
7695 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7697 ctxt->sax->error(ctxt->userData,
7698 "XML declaration must end-up with '?>'\n");
7699 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007700 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007701 NEXT;
7702 } else {
7703 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7705 ctxt->sax->error(ctxt->userData,
7706 "parsing XML declaration: '?>' expected\n");
7707 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007708 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007709 MOVETO_ENDTAG(CUR_PTR);
7710 NEXT;
7711 }
7712}
7713
7714/**
7715 * xmlParseMisc:
7716 * @ctxt: an XML parser context
7717 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007718 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007719 *
7720 * [27] Misc ::= Comment | PI | S
7721 */
7722
7723void
7724xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007725 while (((RAW == '<') && (NXT(1) == '?')) ||
7726 ((RAW == '<') && (NXT(1) == '!') &&
7727 (NXT(2) == '-') && (NXT(3) == '-')) ||
7728 IS_BLANK(CUR)) {
7729 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007730 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007731 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007732 NEXT;
7733 } else
7734 xmlParseComment(ctxt);
7735 }
7736}
7737
7738/**
7739 * xmlParseDocument:
7740 * @ctxt: an XML parser context
7741 *
7742 * parse an XML document (and build a tree if using the standard SAX
7743 * interface).
7744 *
7745 * [1] document ::= prolog element Misc*
7746 *
7747 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7748 *
7749 * Returns 0, -1 in case of error. the parser context is augmented
7750 * as a result of the parsing.
7751 */
7752
7753int
7754xmlParseDocument(xmlParserCtxtPtr ctxt) {
7755 xmlChar start[4];
7756 xmlCharEncoding enc;
7757
7758 xmlInitParser();
7759
7760 GROW;
7761
7762 /*
7763 * SAX: beginning of the document processing.
7764 */
7765 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7766 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7767
Daniel Veillard50f34372001-08-03 12:06:36 +00007768 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007769 /*
7770 * Get the 4 first bytes and decode the charset
7771 * if enc != XML_CHAR_ENCODING_NONE
7772 * plug some encoding conversion routines.
7773 */
7774 start[0] = RAW;
7775 start[1] = NXT(1);
7776 start[2] = NXT(2);
7777 start[3] = NXT(3);
7778 enc = xmlDetectCharEncoding(start, 4);
7779 if (enc != XML_CHAR_ENCODING_NONE) {
7780 xmlSwitchEncoding(ctxt, enc);
7781 }
Owen Taylor3473f882001-02-23 17:55:21 +00007782 }
7783
7784
7785 if (CUR == 0) {
7786 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7788 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7789 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007790 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007791 }
7792
7793 /*
7794 * Check for the XMLDecl in the Prolog.
7795 */
7796 GROW;
7797 if ((RAW == '<') && (NXT(1) == '?') &&
7798 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7799 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7800
7801 /*
7802 * Note that we will switch encoding on the fly.
7803 */
7804 xmlParseXMLDecl(ctxt);
7805 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7806 /*
7807 * The XML REC instructs us to stop parsing right here
7808 */
7809 return(-1);
7810 }
7811 ctxt->standalone = ctxt->input->standalone;
7812 SKIP_BLANKS;
7813 } else {
7814 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7815 }
7816 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7817 ctxt->sax->startDocument(ctxt->userData);
7818
7819 /*
7820 * The Misc part of the Prolog
7821 */
7822 GROW;
7823 xmlParseMisc(ctxt);
7824
7825 /*
7826 * Then possibly doc type declaration(s) and more Misc
7827 * (doctypedecl Misc*)?
7828 */
7829 GROW;
7830 if ((RAW == '<') && (NXT(1) == '!') &&
7831 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7832 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7833 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7834 (NXT(8) == 'E')) {
7835
7836 ctxt->inSubset = 1;
7837 xmlParseDocTypeDecl(ctxt);
7838 if (RAW == '[') {
7839 ctxt->instate = XML_PARSER_DTD;
7840 xmlParseInternalSubset(ctxt);
7841 }
7842
7843 /*
7844 * Create and update the external subset.
7845 */
7846 ctxt->inSubset = 2;
7847 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7848 (!ctxt->disableSAX))
7849 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7850 ctxt->extSubSystem, ctxt->extSubURI);
7851 ctxt->inSubset = 0;
7852
7853
7854 ctxt->instate = XML_PARSER_PROLOG;
7855 xmlParseMisc(ctxt);
7856 }
7857
7858 /*
7859 * Time to start parsing the tree itself
7860 */
7861 GROW;
7862 if (RAW != '<') {
7863 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7865 ctxt->sax->error(ctxt->userData,
7866 "Start tag expected, '<' not found\n");
7867 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007868 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007869 ctxt->instate = XML_PARSER_EOF;
7870 } else {
7871 ctxt->instate = XML_PARSER_CONTENT;
7872 xmlParseElement(ctxt);
7873 ctxt->instate = XML_PARSER_EPILOG;
7874
7875
7876 /*
7877 * The Misc part at the end
7878 */
7879 xmlParseMisc(ctxt);
7880
Daniel Veillard561b7f82002-03-20 21:55:57 +00007881 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007882 ctxt->errNo = XML_ERR_DOCUMENT_END;
7883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7884 ctxt->sax->error(ctxt->userData,
7885 "Extra content at the end of the document\n");
7886 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007887 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007888 }
7889 ctxt->instate = XML_PARSER_EOF;
7890 }
7891
7892 /*
7893 * SAX: end of the document processing.
7894 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007895 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007896 ctxt->sax->endDocument(ctxt->userData);
7897
Daniel Veillard5997aca2002-03-18 18:36:20 +00007898 /*
7899 * Remove locally kept entity definitions if the tree was not built
7900 */
7901 if ((ctxt->myDoc != NULL) &&
7902 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7903 xmlFreeDoc(ctxt->myDoc);
7904 ctxt->myDoc = NULL;
7905 }
7906
Daniel Veillardc7612992002-02-17 22:47:37 +00007907 if (! ctxt->wellFormed) {
7908 ctxt->valid = 0;
7909 return(-1);
7910 }
Owen Taylor3473f882001-02-23 17:55:21 +00007911 return(0);
7912}
7913
7914/**
7915 * xmlParseExtParsedEnt:
7916 * @ctxt: an XML parser context
7917 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007918 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007919 * An external general parsed entity is well-formed if it matches the
7920 * production labeled extParsedEnt.
7921 *
7922 * [78] extParsedEnt ::= TextDecl? content
7923 *
7924 * Returns 0, -1 in case of error. the parser context is augmented
7925 * as a result of the parsing.
7926 */
7927
7928int
7929xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7930 xmlChar start[4];
7931 xmlCharEncoding enc;
7932
7933 xmlDefaultSAXHandlerInit();
7934
7935 GROW;
7936
7937 /*
7938 * SAX: beginning of the document processing.
7939 */
7940 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7941 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7942
7943 /*
7944 * Get the 4 first bytes and decode the charset
7945 * if enc != XML_CHAR_ENCODING_NONE
7946 * plug some encoding conversion routines.
7947 */
7948 start[0] = RAW;
7949 start[1] = NXT(1);
7950 start[2] = NXT(2);
7951 start[3] = NXT(3);
7952 enc = xmlDetectCharEncoding(start, 4);
7953 if (enc != XML_CHAR_ENCODING_NONE) {
7954 xmlSwitchEncoding(ctxt, enc);
7955 }
7956
7957
7958 if (CUR == 0) {
7959 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7960 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7961 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7962 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007963 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007964 }
7965
7966 /*
7967 * Check for the XMLDecl in the Prolog.
7968 */
7969 GROW;
7970 if ((RAW == '<') && (NXT(1) == '?') &&
7971 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7972 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7973
7974 /*
7975 * Note that we will switch encoding on the fly.
7976 */
7977 xmlParseXMLDecl(ctxt);
7978 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7979 /*
7980 * The XML REC instructs us to stop parsing right here
7981 */
7982 return(-1);
7983 }
7984 SKIP_BLANKS;
7985 } else {
7986 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7987 }
7988 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7989 ctxt->sax->startDocument(ctxt->userData);
7990
7991 /*
7992 * Doing validity checking on chunk doesn't make sense
7993 */
7994 ctxt->instate = XML_PARSER_CONTENT;
7995 ctxt->validate = 0;
7996 ctxt->loadsubset = 0;
7997 ctxt->depth = 0;
7998
7999 xmlParseContent(ctxt);
8000
8001 if ((RAW == '<') && (NXT(1) == '/')) {
8002 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8004 ctxt->sax->error(ctxt->userData,
8005 "chunk is not well balanced\n");
8006 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008007 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008008 } else if (RAW != 0) {
8009 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8011 ctxt->sax->error(ctxt->userData,
8012 "extra content at the end of well balanced chunk\n");
8013 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008014 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008015 }
8016
8017 /*
8018 * SAX: end of the document processing.
8019 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008020 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008021 ctxt->sax->endDocument(ctxt->userData);
8022
8023 if (! ctxt->wellFormed) return(-1);
8024 return(0);
8025}
8026
8027/************************************************************************
8028 * *
8029 * Progressive parsing interfaces *
8030 * *
8031 ************************************************************************/
8032
8033/**
8034 * xmlParseLookupSequence:
8035 * @ctxt: an XML parser context
8036 * @first: the first char to lookup
8037 * @next: the next char to lookup or zero
8038 * @third: the next char to lookup or zero
8039 *
8040 * Try to find if a sequence (first, next, third) or just (first next) or
8041 * (first) is available in the input stream.
8042 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8043 * to avoid rescanning sequences of bytes, it DOES change the state of the
8044 * parser, do not use liberally.
8045 *
8046 * Returns the index to the current parsing point if the full sequence
8047 * is available, -1 otherwise.
8048 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008049static int
Owen Taylor3473f882001-02-23 17:55:21 +00008050xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8051 xmlChar next, xmlChar third) {
8052 int base, len;
8053 xmlParserInputPtr in;
8054 const xmlChar *buf;
8055
8056 in = ctxt->input;
8057 if (in == NULL) return(-1);
8058 base = in->cur - in->base;
8059 if (base < 0) return(-1);
8060 if (ctxt->checkIndex > base)
8061 base = ctxt->checkIndex;
8062 if (in->buf == NULL) {
8063 buf = in->base;
8064 len = in->length;
8065 } else {
8066 buf = in->buf->buffer->content;
8067 len = in->buf->buffer->use;
8068 }
8069 /* take into account the sequence length */
8070 if (third) len -= 2;
8071 else if (next) len --;
8072 for (;base < len;base++) {
8073 if (buf[base] == first) {
8074 if (third != 0) {
8075 if ((buf[base + 1] != next) ||
8076 (buf[base + 2] != third)) continue;
8077 } else if (next != 0) {
8078 if (buf[base + 1] != next) continue;
8079 }
8080 ctxt->checkIndex = 0;
8081#ifdef DEBUG_PUSH
8082 if (next == 0)
8083 xmlGenericError(xmlGenericErrorContext,
8084 "PP: lookup '%c' found at %d\n",
8085 first, base);
8086 else if (third == 0)
8087 xmlGenericError(xmlGenericErrorContext,
8088 "PP: lookup '%c%c' found at %d\n",
8089 first, next, base);
8090 else
8091 xmlGenericError(xmlGenericErrorContext,
8092 "PP: lookup '%c%c%c' found at %d\n",
8093 first, next, third, base);
8094#endif
8095 return(base - (in->cur - in->base));
8096 }
8097 }
8098 ctxt->checkIndex = base;
8099#ifdef DEBUG_PUSH
8100 if (next == 0)
8101 xmlGenericError(xmlGenericErrorContext,
8102 "PP: lookup '%c' failed\n", first);
8103 else if (third == 0)
8104 xmlGenericError(xmlGenericErrorContext,
8105 "PP: lookup '%c%c' failed\n", first, next);
8106 else
8107 xmlGenericError(xmlGenericErrorContext,
8108 "PP: lookup '%c%c%c' failed\n", first, next, third);
8109#endif
8110 return(-1);
8111}
8112
8113/**
8114 * xmlParseTryOrFinish:
8115 * @ctxt: an XML parser context
8116 * @terminate: last chunk indicator
8117 *
8118 * Try to progress on parsing
8119 *
8120 * Returns zero if no parsing was possible
8121 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008122static int
Owen Taylor3473f882001-02-23 17:55:21 +00008123xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8124 int ret = 0;
8125 int avail;
8126 xmlChar cur, next;
8127
8128#ifdef DEBUG_PUSH
8129 switch (ctxt->instate) {
8130 case XML_PARSER_EOF:
8131 xmlGenericError(xmlGenericErrorContext,
8132 "PP: try EOF\n"); break;
8133 case XML_PARSER_START:
8134 xmlGenericError(xmlGenericErrorContext,
8135 "PP: try START\n"); break;
8136 case XML_PARSER_MISC:
8137 xmlGenericError(xmlGenericErrorContext,
8138 "PP: try MISC\n");break;
8139 case XML_PARSER_COMMENT:
8140 xmlGenericError(xmlGenericErrorContext,
8141 "PP: try COMMENT\n");break;
8142 case XML_PARSER_PROLOG:
8143 xmlGenericError(xmlGenericErrorContext,
8144 "PP: try PROLOG\n");break;
8145 case XML_PARSER_START_TAG:
8146 xmlGenericError(xmlGenericErrorContext,
8147 "PP: try START_TAG\n");break;
8148 case XML_PARSER_CONTENT:
8149 xmlGenericError(xmlGenericErrorContext,
8150 "PP: try CONTENT\n");break;
8151 case XML_PARSER_CDATA_SECTION:
8152 xmlGenericError(xmlGenericErrorContext,
8153 "PP: try CDATA_SECTION\n");break;
8154 case XML_PARSER_END_TAG:
8155 xmlGenericError(xmlGenericErrorContext,
8156 "PP: try END_TAG\n");break;
8157 case XML_PARSER_ENTITY_DECL:
8158 xmlGenericError(xmlGenericErrorContext,
8159 "PP: try ENTITY_DECL\n");break;
8160 case XML_PARSER_ENTITY_VALUE:
8161 xmlGenericError(xmlGenericErrorContext,
8162 "PP: try ENTITY_VALUE\n");break;
8163 case XML_PARSER_ATTRIBUTE_VALUE:
8164 xmlGenericError(xmlGenericErrorContext,
8165 "PP: try ATTRIBUTE_VALUE\n");break;
8166 case XML_PARSER_DTD:
8167 xmlGenericError(xmlGenericErrorContext,
8168 "PP: try DTD\n");break;
8169 case XML_PARSER_EPILOG:
8170 xmlGenericError(xmlGenericErrorContext,
8171 "PP: try EPILOG\n");break;
8172 case XML_PARSER_PI:
8173 xmlGenericError(xmlGenericErrorContext,
8174 "PP: try PI\n");break;
8175 case XML_PARSER_IGNORE:
8176 xmlGenericError(xmlGenericErrorContext,
8177 "PP: try IGNORE\n");break;
8178 }
8179#endif
8180
8181 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008182 SHRINK;
8183
Owen Taylor3473f882001-02-23 17:55:21 +00008184 /*
8185 * Pop-up of finished entities.
8186 */
8187 while ((RAW == 0) && (ctxt->inputNr > 1))
8188 xmlPopInput(ctxt);
8189
8190 if (ctxt->input ==NULL) break;
8191 if (ctxt->input->buf == NULL)
8192 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008193 else {
8194 /*
8195 * If we are operating on converted input, try to flush
8196 * remainng chars to avoid them stalling in the non-converted
8197 * buffer.
8198 */
8199 if ((ctxt->input->buf->raw != NULL) &&
8200 (ctxt->input->buf->raw->use > 0)) {
8201 int base = ctxt->input->base -
8202 ctxt->input->buf->buffer->content;
8203 int current = ctxt->input->cur - ctxt->input->base;
8204
8205 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8206 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8207 ctxt->input->cur = ctxt->input->base + current;
8208 ctxt->input->end =
8209 &ctxt->input->buf->buffer->content[
8210 ctxt->input->buf->buffer->use];
8211 }
8212 avail = ctxt->input->buf->buffer->use -
8213 (ctxt->input->cur - ctxt->input->base);
8214 }
Owen Taylor3473f882001-02-23 17:55:21 +00008215 if (avail < 1)
8216 goto done;
8217 switch (ctxt->instate) {
8218 case XML_PARSER_EOF:
8219 /*
8220 * Document parsing is done !
8221 */
8222 goto done;
8223 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008224 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8225 xmlChar start[4];
8226 xmlCharEncoding enc;
8227
8228 /*
8229 * Very first chars read from the document flow.
8230 */
8231 if (avail < 4)
8232 goto done;
8233
8234 /*
8235 * Get the 4 first bytes and decode the charset
8236 * if enc != XML_CHAR_ENCODING_NONE
8237 * plug some encoding conversion routines.
8238 */
8239 start[0] = RAW;
8240 start[1] = NXT(1);
8241 start[2] = NXT(2);
8242 start[3] = NXT(3);
8243 enc = xmlDetectCharEncoding(start, 4);
8244 if (enc != XML_CHAR_ENCODING_NONE) {
8245 xmlSwitchEncoding(ctxt, enc);
8246 }
8247 break;
8248 }
Owen Taylor3473f882001-02-23 17:55:21 +00008249
8250 cur = ctxt->input->cur[0];
8251 next = ctxt->input->cur[1];
8252 if (cur == 0) {
8253 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8254 ctxt->sax->setDocumentLocator(ctxt->userData,
8255 &xmlDefaultSAXLocator);
8256 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8257 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8258 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8259 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008260 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008261 ctxt->instate = XML_PARSER_EOF;
8262#ifdef DEBUG_PUSH
8263 xmlGenericError(xmlGenericErrorContext,
8264 "PP: entering EOF\n");
8265#endif
8266 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8267 ctxt->sax->endDocument(ctxt->userData);
8268 goto done;
8269 }
8270 if ((cur == '<') && (next == '?')) {
8271 /* PI or XML decl */
8272 if (avail < 5) return(ret);
8273 if ((!terminate) &&
8274 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8275 return(ret);
8276 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8277 ctxt->sax->setDocumentLocator(ctxt->userData,
8278 &xmlDefaultSAXLocator);
8279 if ((ctxt->input->cur[2] == 'x') &&
8280 (ctxt->input->cur[3] == 'm') &&
8281 (ctxt->input->cur[4] == 'l') &&
8282 (IS_BLANK(ctxt->input->cur[5]))) {
8283 ret += 5;
8284#ifdef DEBUG_PUSH
8285 xmlGenericError(xmlGenericErrorContext,
8286 "PP: Parsing XML Decl\n");
8287#endif
8288 xmlParseXMLDecl(ctxt);
8289 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8290 /*
8291 * The XML REC instructs us to stop parsing right
8292 * here
8293 */
8294 ctxt->instate = XML_PARSER_EOF;
8295 return(0);
8296 }
8297 ctxt->standalone = ctxt->input->standalone;
8298 if ((ctxt->encoding == NULL) &&
8299 (ctxt->input->encoding != NULL))
8300 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8301 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8302 (!ctxt->disableSAX))
8303 ctxt->sax->startDocument(ctxt->userData);
8304 ctxt->instate = XML_PARSER_MISC;
8305#ifdef DEBUG_PUSH
8306 xmlGenericError(xmlGenericErrorContext,
8307 "PP: entering MISC\n");
8308#endif
8309 } else {
8310 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8311 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8312 (!ctxt->disableSAX))
8313 ctxt->sax->startDocument(ctxt->userData);
8314 ctxt->instate = XML_PARSER_MISC;
8315#ifdef DEBUG_PUSH
8316 xmlGenericError(xmlGenericErrorContext,
8317 "PP: entering MISC\n");
8318#endif
8319 }
8320 } else {
8321 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8322 ctxt->sax->setDocumentLocator(ctxt->userData,
8323 &xmlDefaultSAXLocator);
8324 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8325 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8326 (!ctxt->disableSAX))
8327 ctxt->sax->startDocument(ctxt->userData);
8328 ctxt->instate = XML_PARSER_MISC;
8329#ifdef DEBUG_PUSH
8330 xmlGenericError(xmlGenericErrorContext,
8331 "PP: entering MISC\n");
8332#endif
8333 }
8334 break;
8335 case XML_PARSER_MISC:
8336 SKIP_BLANKS;
8337 if (ctxt->input->buf == NULL)
8338 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8339 else
8340 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8341 if (avail < 2)
8342 goto done;
8343 cur = ctxt->input->cur[0];
8344 next = ctxt->input->cur[1];
8345 if ((cur == '<') && (next == '?')) {
8346 if ((!terminate) &&
8347 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8348 goto done;
8349#ifdef DEBUG_PUSH
8350 xmlGenericError(xmlGenericErrorContext,
8351 "PP: Parsing PI\n");
8352#endif
8353 xmlParsePI(ctxt);
8354 } else if ((cur == '<') && (next == '!') &&
8355 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8356 if ((!terminate) &&
8357 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8358 goto done;
8359#ifdef DEBUG_PUSH
8360 xmlGenericError(xmlGenericErrorContext,
8361 "PP: Parsing Comment\n");
8362#endif
8363 xmlParseComment(ctxt);
8364 ctxt->instate = XML_PARSER_MISC;
8365 } else if ((cur == '<') && (next == '!') &&
8366 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8367 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8368 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8369 (ctxt->input->cur[8] == 'E')) {
8370 if ((!terminate) &&
8371 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8372 goto done;
8373#ifdef DEBUG_PUSH
8374 xmlGenericError(xmlGenericErrorContext,
8375 "PP: Parsing internal subset\n");
8376#endif
8377 ctxt->inSubset = 1;
8378 xmlParseDocTypeDecl(ctxt);
8379 if (RAW == '[') {
8380 ctxt->instate = XML_PARSER_DTD;
8381#ifdef DEBUG_PUSH
8382 xmlGenericError(xmlGenericErrorContext,
8383 "PP: entering DTD\n");
8384#endif
8385 } else {
8386 /*
8387 * Create and update the external subset.
8388 */
8389 ctxt->inSubset = 2;
8390 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8391 (ctxt->sax->externalSubset != NULL))
8392 ctxt->sax->externalSubset(ctxt->userData,
8393 ctxt->intSubName, ctxt->extSubSystem,
8394 ctxt->extSubURI);
8395 ctxt->inSubset = 0;
8396 ctxt->instate = XML_PARSER_PROLOG;
8397#ifdef DEBUG_PUSH
8398 xmlGenericError(xmlGenericErrorContext,
8399 "PP: entering PROLOG\n");
8400#endif
8401 }
8402 } else if ((cur == '<') && (next == '!') &&
8403 (avail < 9)) {
8404 goto done;
8405 } else {
8406 ctxt->instate = XML_PARSER_START_TAG;
8407#ifdef DEBUG_PUSH
8408 xmlGenericError(xmlGenericErrorContext,
8409 "PP: entering START_TAG\n");
8410#endif
8411 }
8412 break;
8413 case XML_PARSER_IGNORE:
8414 xmlGenericError(xmlGenericErrorContext,
8415 "PP: internal error, state == IGNORE");
8416 ctxt->instate = XML_PARSER_DTD;
8417#ifdef DEBUG_PUSH
8418 xmlGenericError(xmlGenericErrorContext,
8419 "PP: entering DTD\n");
8420#endif
8421 break;
8422 case XML_PARSER_PROLOG:
8423 SKIP_BLANKS;
8424 if (ctxt->input->buf == NULL)
8425 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8426 else
8427 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8428 if (avail < 2)
8429 goto done;
8430 cur = ctxt->input->cur[0];
8431 next = ctxt->input->cur[1];
8432 if ((cur == '<') && (next == '?')) {
8433 if ((!terminate) &&
8434 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8435 goto done;
8436#ifdef DEBUG_PUSH
8437 xmlGenericError(xmlGenericErrorContext,
8438 "PP: Parsing PI\n");
8439#endif
8440 xmlParsePI(ctxt);
8441 } else if ((cur == '<') && (next == '!') &&
8442 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8443 if ((!terminate) &&
8444 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8445 goto done;
8446#ifdef DEBUG_PUSH
8447 xmlGenericError(xmlGenericErrorContext,
8448 "PP: Parsing Comment\n");
8449#endif
8450 xmlParseComment(ctxt);
8451 ctxt->instate = XML_PARSER_PROLOG;
8452 } else if ((cur == '<') && (next == '!') &&
8453 (avail < 4)) {
8454 goto done;
8455 } else {
8456 ctxt->instate = XML_PARSER_START_TAG;
8457#ifdef DEBUG_PUSH
8458 xmlGenericError(xmlGenericErrorContext,
8459 "PP: entering START_TAG\n");
8460#endif
8461 }
8462 break;
8463 case XML_PARSER_EPILOG:
8464 SKIP_BLANKS;
8465 if (ctxt->input->buf == NULL)
8466 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8467 else
8468 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8469 if (avail < 2)
8470 goto done;
8471 cur = ctxt->input->cur[0];
8472 next = ctxt->input->cur[1];
8473 if ((cur == '<') && (next == '?')) {
8474 if ((!terminate) &&
8475 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8476 goto done;
8477#ifdef DEBUG_PUSH
8478 xmlGenericError(xmlGenericErrorContext,
8479 "PP: Parsing PI\n");
8480#endif
8481 xmlParsePI(ctxt);
8482 ctxt->instate = XML_PARSER_EPILOG;
8483 } else if ((cur == '<') && (next == '!') &&
8484 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8485 if ((!terminate) &&
8486 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8487 goto done;
8488#ifdef DEBUG_PUSH
8489 xmlGenericError(xmlGenericErrorContext,
8490 "PP: Parsing Comment\n");
8491#endif
8492 xmlParseComment(ctxt);
8493 ctxt->instate = XML_PARSER_EPILOG;
8494 } else if ((cur == '<') && (next == '!') &&
8495 (avail < 4)) {
8496 goto done;
8497 } else {
8498 ctxt->errNo = XML_ERR_DOCUMENT_END;
8499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8500 ctxt->sax->error(ctxt->userData,
8501 "Extra content at the end of the document\n");
8502 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008503 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008504 ctxt->instate = XML_PARSER_EOF;
8505#ifdef DEBUG_PUSH
8506 xmlGenericError(xmlGenericErrorContext,
8507 "PP: entering EOF\n");
8508#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008509 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008510 ctxt->sax->endDocument(ctxt->userData);
8511 goto done;
8512 }
8513 break;
8514 case XML_PARSER_START_TAG: {
8515 xmlChar *name, *oldname;
8516
8517 if ((avail < 2) && (ctxt->inputNr == 1))
8518 goto done;
8519 cur = ctxt->input->cur[0];
8520 if (cur != '<') {
8521 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8523 ctxt->sax->error(ctxt->userData,
8524 "Start tag expect, '<' not found\n");
8525 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008526 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008527 ctxt->instate = XML_PARSER_EOF;
8528#ifdef DEBUG_PUSH
8529 xmlGenericError(xmlGenericErrorContext,
8530 "PP: entering EOF\n");
8531#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008532 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008533 ctxt->sax->endDocument(ctxt->userData);
8534 goto done;
8535 }
8536 if ((!terminate) &&
8537 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8538 goto done;
8539 if (ctxt->spaceNr == 0)
8540 spacePush(ctxt, -1);
8541 else
8542 spacePush(ctxt, *ctxt->space);
8543 name = xmlParseStartTag(ctxt);
8544 if (name == NULL) {
8545 spacePop(ctxt);
8546 ctxt->instate = XML_PARSER_EOF;
8547#ifdef DEBUG_PUSH
8548 xmlGenericError(xmlGenericErrorContext,
8549 "PP: entering EOF\n");
8550#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008551 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008552 ctxt->sax->endDocument(ctxt->userData);
8553 goto done;
8554 }
8555 namePush(ctxt, xmlStrdup(name));
8556
8557 /*
8558 * [ VC: Root Element Type ]
8559 * The Name in the document type declaration must match
8560 * the element type of the root element.
8561 */
8562 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8563 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8564 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8565
8566 /*
8567 * Check for an Empty Element.
8568 */
8569 if ((RAW == '/') && (NXT(1) == '>')) {
8570 SKIP(2);
8571 if ((ctxt->sax != NULL) &&
8572 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8573 ctxt->sax->endElement(ctxt->userData, name);
8574 xmlFree(name);
8575 oldname = namePop(ctxt);
8576 spacePop(ctxt);
8577 if (oldname != NULL) {
8578#ifdef DEBUG_STACK
8579 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8580#endif
8581 xmlFree(oldname);
8582 }
8583 if (ctxt->name == NULL) {
8584 ctxt->instate = XML_PARSER_EPILOG;
8585#ifdef DEBUG_PUSH
8586 xmlGenericError(xmlGenericErrorContext,
8587 "PP: entering EPILOG\n");
8588#endif
8589 } else {
8590 ctxt->instate = XML_PARSER_CONTENT;
8591#ifdef DEBUG_PUSH
8592 xmlGenericError(xmlGenericErrorContext,
8593 "PP: entering CONTENT\n");
8594#endif
8595 }
8596 break;
8597 }
8598 if (RAW == '>') {
8599 NEXT;
8600 } else {
8601 ctxt->errNo = XML_ERR_GT_REQUIRED;
8602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8603 ctxt->sax->error(ctxt->userData,
8604 "Couldn't find end of Start Tag %s\n",
8605 name);
8606 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008607 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008608
8609 /*
8610 * end of parsing of this node.
8611 */
8612 nodePop(ctxt);
8613 oldname = namePop(ctxt);
8614 spacePop(ctxt);
8615 if (oldname != NULL) {
8616#ifdef DEBUG_STACK
8617 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8618#endif
8619 xmlFree(oldname);
8620 }
8621 }
8622 xmlFree(name);
8623 ctxt->instate = XML_PARSER_CONTENT;
8624#ifdef DEBUG_PUSH
8625 xmlGenericError(xmlGenericErrorContext,
8626 "PP: entering CONTENT\n");
8627#endif
8628 break;
8629 }
8630 case XML_PARSER_CONTENT: {
8631 const xmlChar *test;
8632 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008633 if ((avail < 2) && (ctxt->inputNr == 1))
8634 goto done;
8635 cur = ctxt->input->cur[0];
8636 next = ctxt->input->cur[1];
8637
8638 test = CUR_PTR;
8639 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008640 if ((cur == '<') && (next == '?')) {
8641 if ((!terminate) &&
8642 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8643 goto done;
8644#ifdef DEBUG_PUSH
8645 xmlGenericError(xmlGenericErrorContext,
8646 "PP: Parsing PI\n");
8647#endif
8648 xmlParsePI(ctxt);
8649 } else if ((cur == '<') && (next == '!') &&
8650 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8651 if ((!terminate) &&
8652 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8653 goto done;
8654#ifdef DEBUG_PUSH
8655 xmlGenericError(xmlGenericErrorContext,
8656 "PP: Parsing Comment\n");
8657#endif
8658 xmlParseComment(ctxt);
8659 ctxt->instate = XML_PARSER_CONTENT;
8660 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8661 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8662 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8663 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8664 (ctxt->input->cur[8] == '[')) {
8665 SKIP(9);
8666 ctxt->instate = XML_PARSER_CDATA_SECTION;
8667#ifdef DEBUG_PUSH
8668 xmlGenericError(xmlGenericErrorContext,
8669 "PP: entering CDATA_SECTION\n");
8670#endif
8671 break;
8672 } else if ((cur == '<') && (next == '!') &&
8673 (avail < 9)) {
8674 goto done;
8675 } else if ((cur == '<') && (next == '/')) {
8676 ctxt->instate = XML_PARSER_END_TAG;
8677#ifdef DEBUG_PUSH
8678 xmlGenericError(xmlGenericErrorContext,
8679 "PP: entering END_TAG\n");
8680#endif
8681 break;
8682 } else if (cur == '<') {
8683 ctxt->instate = XML_PARSER_START_TAG;
8684#ifdef DEBUG_PUSH
8685 xmlGenericError(xmlGenericErrorContext,
8686 "PP: entering START_TAG\n");
8687#endif
8688 break;
8689 } else if (cur == '&') {
8690 if ((!terminate) &&
8691 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8692 goto done;
8693#ifdef DEBUG_PUSH
8694 xmlGenericError(xmlGenericErrorContext,
8695 "PP: Parsing Reference\n");
8696#endif
8697 xmlParseReference(ctxt);
8698 } else {
8699 /* TODO Avoid the extra copy, handle directly !!! */
8700 /*
8701 * Goal of the following test is:
8702 * - minimize calls to the SAX 'character' callback
8703 * when they are mergeable
8704 * - handle an problem for isBlank when we only parse
8705 * a sequence of blank chars and the next one is
8706 * not available to check against '<' presence.
8707 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008708 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008709 * of the parser.
8710 */
8711 if ((ctxt->inputNr == 1) &&
8712 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8713 if ((!terminate) &&
8714 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8715 goto done;
8716 }
8717 ctxt->checkIndex = 0;
8718#ifdef DEBUG_PUSH
8719 xmlGenericError(xmlGenericErrorContext,
8720 "PP: Parsing char data\n");
8721#endif
8722 xmlParseCharData(ctxt, 0);
8723 }
8724 /*
8725 * Pop-up of finished entities.
8726 */
8727 while ((RAW == 0) && (ctxt->inputNr > 1))
8728 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008729 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008730 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8732 ctxt->sax->error(ctxt->userData,
8733 "detected an error in element content\n");
8734 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008735 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008736 ctxt->instate = XML_PARSER_EOF;
8737 break;
8738 }
8739 break;
8740 }
8741 case XML_PARSER_CDATA_SECTION: {
8742 /*
8743 * The Push mode need to have the SAX callback for
8744 * cdataBlock merge back contiguous callbacks.
8745 */
8746 int base;
8747
8748 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8749 if (base < 0) {
8750 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8751 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8752 if (ctxt->sax->cdataBlock != NULL)
8753 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8754 XML_PARSER_BIG_BUFFER_SIZE);
8755 }
8756 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8757 ctxt->checkIndex = 0;
8758 }
8759 goto done;
8760 } else {
8761 if ((ctxt->sax != NULL) && (base > 0) &&
8762 (!ctxt->disableSAX)) {
8763 if (ctxt->sax->cdataBlock != NULL)
8764 ctxt->sax->cdataBlock(ctxt->userData,
8765 ctxt->input->cur, base);
8766 }
8767 SKIP(base + 3);
8768 ctxt->checkIndex = 0;
8769 ctxt->instate = XML_PARSER_CONTENT;
8770#ifdef DEBUG_PUSH
8771 xmlGenericError(xmlGenericErrorContext,
8772 "PP: entering CONTENT\n");
8773#endif
8774 }
8775 break;
8776 }
8777 case XML_PARSER_END_TAG:
8778 if (avail < 2)
8779 goto done;
8780 if ((!terminate) &&
8781 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8782 goto done;
8783 xmlParseEndTag(ctxt);
8784 if (ctxt->name == NULL) {
8785 ctxt->instate = XML_PARSER_EPILOG;
8786#ifdef DEBUG_PUSH
8787 xmlGenericError(xmlGenericErrorContext,
8788 "PP: entering EPILOG\n");
8789#endif
8790 } else {
8791 ctxt->instate = XML_PARSER_CONTENT;
8792#ifdef DEBUG_PUSH
8793 xmlGenericError(xmlGenericErrorContext,
8794 "PP: entering CONTENT\n");
8795#endif
8796 }
8797 break;
8798 case XML_PARSER_DTD: {
8799 /*
8800 * Sorry but progressive parsing of the internal subset
8801 * is not expected to be supported. We first check that
8802 * the full content of the internal subset is available and
8803 * the parsing is launched only at that point.
8804 * Internal subset ends up with "']' S? '>'" in an unescaped
8805 * section and not in a ']]>' sequence which are conditional
8806 * sections (whoever argued to keep that crap in XML deserve
8807 * a place in hell !).
8808 */
8809 int base, i;
8810 xmlChar *buf;
8811 xmlChar quote = 0;
8812
8813 base = ctxt->input->cur - ctxt->input->base;
8814 if (base < 0) return(0);
8815 if (ctxt->checkIndex > base)
8816 base = ctxt->checkIndex;
8817 buf = ctxt->input->buf->buffer->content;
8818 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8819 base++) {
8820 if (quote != 0) {
8821 if (buf[base] == quote)
8822 quote = 0;
8823 continue;
8824 }
8825 if (buf[base] == '"') {
8826 quote = '"';
8827 continue;
8828 }
8829 if (buf[base] == '\'') {
8830 quote = '\'';
8831 continue;
8832 }
8833 if (buf[base] == ']') {
8834 if ((unsigned int) base +1 >=
8835 ctxt->input->buf->buffer->use)
8836 break;
8837 if (buf[base + 1] == ']') {
8838 /* conditional crap, skip both ']' ! */
8839 base++;
8840 continue;
8841 }
8842 for (i = 0;
8843 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8844 i++) {
8845 if (buf[base + i] == '>')
8846 goto found_end_int_subset;
8847 }
8848 break;
8849 }
8850 }
8851 /*
8852 * We didn't found the end of the Internal subset
8853 */
8854 if (quote == 0)
8855 ctxt->checkIndex = base;
8856#ifdef DEBUG_PUSH
8857 if (next == 0)
8858 xmlGenericError(xmlGenericErrorContext,
8859 "PP: lookup of int subset end filed\n");
8860#endif
8861 goto done;
8862
8863found_end_int_subset:
8864 xmlParseInternalSubset(ctxt);
8865 ctxt->inSubset = 2;
8866 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8867 (ctxt->sax->externalSubset != NULL))
8868 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8869 ctxt->extSubSystem, ctxt->extSubURI);
8870 ctxt->inSubset = 0;
8871 ctxt->instate = XML_PARSER_PROLOG;
8872 ctxt->checkIndex = 0;
8873#ifdef DEBUG_PUSH
8874 xmlGenericError(xmlGenericErrorContext,
8875 "PP: entering PROLOG\n");
8876#endif
8877 break;
8878 }
8879 case XML_PARSER_COMMENT:
8880 xmlGenericError(xmlGenericErrorContext,
8881 "PP: internal error, state == COMMENT\n");
8882 ctxt->instate = XML_PARSER_CONTENT;
8883#ifdef DEBUG_PUSH
8884 xmlGenericError(xmlGenericErrorContext,
8885 "PP: entering CONTENT\n");
8886#endif
8887 break;
8888 case XML_PARSER_PI:
8889 xmlGenericError(xmlGenericErrorContext,
8890 "PP: internal error, state == PI\n");
8891 ctxt->instate = XML_PARSER_CONTENT;
8892#ifdef DEBUG_PUSH
8893 xmlGenericError(xmlGenericErrorContext,
8894 "PP: entering CONTENT\n");
8895#endif
8896 break;
8897 case XML_PARSER_ENTITY_DECL:
8898 xmlGenericError(xmlGenericErrorContext,
8899 "PP: internal error, state == ENTITY_DECL\n");
8900 ctxt->instate = XML_PARSER_DTD;
8901#ifdef DEBUG_PUSH
8902 xmlGenericError(xmlGenericErrorContext,
8903 "PP: entering DTD\n");
8904#endif
8905 break;
8906 case XML_PARSER_ENTITY_VALUE:
8907 xmlGenericError(xmlGenericErrorContext,
8908 "PP: internal error, state == ENTITY_VALUE\n");
8909 ctxt->instate = XML_PARSER_CONTENT;
8910#ifdef DEBUG_PUSH
8911 xmlGenericError(xmlGenericErrorContext,
8912 "PP: entering DTD\n");
8913#endif
8914 break;
8915 case XML_PARSER_ATTRIBUTE_VALUE:
8916 xmlGenericError(xmlGenericErrorContext,
8917 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8918 ctxt->instate = XML_PARSER_START_TAG;
8919#ifdef DEBUG_PUSH
8920 xmlGenericError(xmlGenericErrorContext,
8921 "PP: entering START_TAG\n");
8922#endif
8923 break;
8924 case XML_PARSER_SYSTEM_LITERAL:
8925 xmlGenericError(xmlGenericErrorContext,
8926 "PP: internal error, state == SYSTEM_LITERAL\n");
8927 ctxt->instate = XML_PARSER_START_TAG;
8928#ifdef DEBUG_PUSH
8929 xmlGenericError(xmlGenericErrorContext,
8930 "PP: entering START_TAG\n");
8931#endif
8932 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008933 case XML_PARSER_PUBLIC_LITERAL:
8934 xmlGenericError(xmlGenericErrorContext,
8935 "PP: internal error, state == PUBLIC_LITERAL\n");
8936 ctxt->instate = XML_PARSER_START_TAG;
8937#ifdef DEBUG_PUSH
8938 xmlGenericError(xmlGenericErrorContext,
8939 "PP: entering START_TAG\n");
8940#endif
8941 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008942 }
8943 }
8944done:
8945#ifdef DEBUG_PUSH
8946 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8947#endif
8948 return(ret);
8949}
8950
8951/**
Owen Taylor3473f882001-02-23 17:55:21 +00008952 * xmlParseChunk:
8953 * @ctxt: an XML parser context
8954 * @chunk: an char array
8955 * @size: the size in byte of the chunk
8956 * @terminate: last chunk indicator
8957 *
8958 * Parse a Chunk of memory
8959 *
8960 * Returns zero if no error, the xmlParserErrors otherwise.
8961 */
8962int
8963xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8964 int terminate) {
8965 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8966 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8967 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8968 int cur = ctxt->input->cur - ctxt->input->base;
8969
8970 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8971 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8972 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008973 ctxt->input->end =
8974 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008975#ifdef DEBUG_PUSH
8976 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8977#endif
8978
8979 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8980 xmlParseTryOrFinish(ctxt, terminate);
8981 } else if (ctxt->instate != XML_PARSER_EOF) {
8982 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8983 xmlParserInputBufferPtr in = ctxt->input->buf;
8984 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8985 (in->raw != NULL)) {
8986 int nbchars;
8987
8988 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8989 if (nbchars < 0) {
8990 xmlGenericError(xmlGenericErrorContext,
8991 "xmlParseChunk: encoder error\n");
8992 return(XML_ERR_INVALID_ENCODING);
8993 }
8994 }
8995 }
8996 }
8997 xmlParseTryOrFinish(ctxt, terminate);
8998 if (terminate) {
8999 /*
9000 * Check for termination
9001 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009002 int avail = 0;
9003 if (ctxt->input->buf == NULL)
9004 avail = ctxt->input->length -
9005 (ctxt->input->cur - ctxt->input->base);
9006 else
9007 avail = ctxt->input->buf->buffer->use -
9008 (ctxt->input->cur - ctxt->input->base);
9009
Owen Taylor3473f882001-02-23 17:55:21 +00009010 if ((ctxt->instate != XML_PARSER_EOF) &&
9011 (ctxt->instate != XML_PARSER_EPILOG)) {
9012 ctxt->errNo = XML_ERR_DOCUMENT_END;
9013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9014 ctxt->sax->error(ctxt->userData,
9015 "Extra content at the end of the document\n");
9016 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009017 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009018 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009019 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9020 ctxt->errNo = XML_ERR_DOCUMENT_END;
9021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9022 ctxt->sax->error(ctxt->userData,
9023 "Extra content at the end of the document\n");
9024 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009025 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009026
9027 }
Owen Taylor3473f882001-02-23 17:55:21 +00009028 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009029 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009030 ctxt->sax->endDocument(ctxt->userData);
9031 }
9032 ctxt->instate = XML_PARSER_EOF;
9033 }
9034 return((xmlParserErrors) ctxt->errNo);
9035}
9036
9037/************************************************************************
9038 * *
9039 * I/O front end functions to the parser *
9040 * *
9041 ************************************************************************/
9042
9043/**
9044 * xmlStopParser:
9045 * @ctxt: an XML parser context
9046 *
9047 * Blocks further parser processing
9048 */
9049void
9050xmlStopParser(xmlParserCtxtPtr ctxt) {
9051 ctxt->instate = XML_PARSER_EOF;
9052 if (ctxt->input != NULL)
9053 ctxt->input->cur = BAD_CAST"";
9054}
9055
9056/**
9057 * xmlCreatePushParserCtxt:
9058 * @sax: a SAX handler
9059 * @user_data: The user data returned on SAX callbacks
9060 * @chunk: a pointer to an array of chars
9061 * @size: number of chars in the array
9062 * @filename: an optional file name or URI
9063 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009064 * Create a parser context for using the XML parser in push mode.
9065 * If @buffer and @size are non-NULL, the data is used to detect
9066 * the encoding. The remaining characters will be parsed so they
9067 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009068 * To allow content encoding detection, @size should be >= 4
9069 * The value of @filename is used for fetching external entities
9070 * and error/warning reports.
9071 *
9072 * Returns the new parser context or NULL
9073 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009074
Owen Taylor3473f882001-02-23 17:55:21 +00009075xmlParserCtxtPtr
9076xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9077 const char *chunk, int size, const char *filename) {
9078 xmlParserCtxtPtr ctxt;
9079 xmlParserInputPtr inputStream;
9080 xmlParserInputBufferPtr buf;
9081 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9082
9083 /*
9084 * plug some encoding conversion routines
9085 */
9086 if ((chunk != NULL) && (size >= 4))
9087 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9088
9089 buf = xmlAllocParserInputBuffer(enc);
9090 if (buf == NULL) return(NULL);
9091
9092 ctxt = xmlNewParserCtxt();
9093 if (ctxt == NULL) {
9094 xmlFree(buf);
9095 return(NULL);
9096 }
9097 if (sax != NULL) {
9098 if (ctxt->sax != &xmlDefaultSAXHandler)
9099 xmlFree(ctxt->sax);
9100 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9101 if (ctxt->sax == NULL) {
9102 xmlFree(buf);
9103 xmlFree(ctxt);
9104 return(NULL);
9105 }
9106 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9107 if (user_data != NULL)
9108 ctxt->userData = user_data;
9109 }
9110 if (filename == NULL) {
9111 ctxt->directory = NULL;
9112 } else {
9113 ctxt->directory = xmlParserGetDirectory(filename);
9114 }
9115
9116 inputStream = xmlNewInputStream(ctxt);
9117 if (inputStream == NULL) {
9118 xmlFreeParserCtxt(ctxt);
9119 return(NULL);
9120 }
9121
9122 if (filename == NULL)
9123 inputStream->filename = NULL;
9124 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009125 inputStream->filename = (char *)
9126 xmlNormalizeWindowsPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009127 inputStream->buf = buf;
9128 inputStream->base = inputStream->buf->buffer->content;
9129 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009130 inputStream->end =
9131 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009132
9133 inputPush(ctxt, inputStream);
9134
9135 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9136 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009137 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9138 int cur = ctxt->input->cur - ctxt->input->base;
9139
Owen Taylor3473f882001-02-23 17:55:21 +00009140 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009141
9142 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9143 ctxt->input->cur = ctxt->input->base + cur;
9144 ctxt->input->end =
9145 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009146#ifdef DEBUG_PUSH
9147 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9148#endif
9149 }
9150
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009151 if (enc != XML_CHAR_ENCODING_NONE) {
9152 xmlSwitchEncoding(ctxt, enc);
9153 }
9154
Owen Taylor3473f882001-02-23 17:55:21 +00009155 return(ctxt);
9156}
9157
9158/**
9159 * xmlCreateIOParserCtxt:
9160 * @sax: a SAX handler
9161 * @user_data: The user data returned on SAX callbacks
9162 * @ioread: an I/O read function
9163 * @ioclose: an I/O close function
9164 * @ioctx: an I/O handler
9165 * @enc: the charset encoding if known
9166 *
9167 * Create a parser context for using the XML parser with an existing
9168 * I/O stream
9169 *
9170 * Returns the new parser context or NULL
9171 */
9172xmlParserCtxtPtr
9173xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9174 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9175 void *ioctx, xmlCharEncoding enc) {
9176 xmlParserCtxtPtr ctxt;
9177 xmlParserInputPtr inputStream;
9178 xmlParserInputBufferPtr buf;
9179
9180 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9181 if (buf == NULL) return(NULL);
9182
9183 ctxt = xmlNewParserCtxt();
9184 if (ctxt == NULL) {
9185 xmlFree(buf);
9186 return(NULL);
9187 }
9188 if (sax != NULL) {
9189 if (ctxt->sax != &xmlDefaultSAXHandler)
9190 xmlFree(ctxt->sax);
9191 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9192 if (ctxt->sax == NULL) {
9193 xmlFree(buf);
9194 xmlFree(ctxt);
9195 return(NULL);
9196 }
9197 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9198 if (user_data != NULL)
9199 ctxt->userData = user_data;
9200 }
9201
9202 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9203 if (inputStream == NULL) {
9204 xmlFreeParserCtxt(ctxt);
9205 return(NULL);
9206 }
9207 inputPush(ctxt, inputStream);
9208
9209 return(ctxt);
9210}
9211
9212/************************************************************************
9213 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009214 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009215 * *
9216 ************************************************************************/
9217
9218/**
9219 * xmlIOParseDTD:
9220 * @sax: the SAX handler block or NULL
9221 * @input: an Input Buffer
9222 * @enc: the charset encoding if known
9223 *
9224 * Load and parse a DTD
9225 *
9226 * Returns the resulting xmlDtdPtr or NULL in case of error.
9227 * @input will be freed at parsing end.
9228 */
9229
9230xmlDtdPtr
9231xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9232 xmlCharEncoding enc) {
9233 xmlDtdPtr ret = NULL;
9234 xmlParserCtxtPtr ctxt;
9235 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009236 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009237
9238 if (input == NULL)
9239 return(NULL);
9240
9241 ctxt = xmlNewParserCtxt();
9242 if (ctxt == NULL) {
9243 return(NULL);
9244 }
9245
9246 /*
9247 * Set-up the SAX context
9248 */
9249 if (sax != NULL) {
9250 if (ctxt->sax != NULL)
9251 xmlFree(ctxt->sax);
9252 ctxt->sax = sax;
9253 ctxt->userData = NULL;
9254 }
9255
9256 /*
9257 * generate a parser input from the I/O handler
9258 */
9259
9260 pinput = xmlNewIOInputStream(ctxt, input, enc);
9261 if (pinput == NULL) {
9262 if (sax != NULL) ctxt->sax = NULL;
9263 xmlFreeParserCtxt(ctxt);
9264 return(NULL);
9265 }
9266
9267 /*
9268 * plug some encoding conversion routines here.
9269 */
9270 xmlPushInput(ctxt, pinput);
9271
9272 pinput->filename = NULL;
9273 pinput->line = 1;
9274 pinput->col = 1;
9275 pinput->base = ctxt->input->cur;
9276 pinput->cur = ctxt->input->cur;
9277 pinput->free = NULL;
9278
9279 /*
9280 * let's parse that entity knowing it's an external subset.
9281 */
9282 ctxt->inSubset = 2;
9283 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9284 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9285 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009286
9287 if (enc == XML_CHAR_ENCODING_NONE) {
9288 /*
9289 * Get the 4 first bytes and decode the charset
9290 * if enc != XML_CHAR_ENCODING_NONE
9291 * plug some encoding conversion routines.
9292 */
9293 start[0] = RAW;
9294 start[1] = NXT(1);
9295 start[2] = NXT(2);
9296 start[3] = NXT(3);
9297 enc = xmlDetectCharEncoding(start, 4);
9298 if (enc != XML_CHAR_ENCODING_NONE) {
9299 xmlSwitchEncoding(ctxt, enc);
9300 }
9301 }
9302
Owen Taylor3473f882001-02-23 17:55:21 +00009303 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9304
9305 if (ctxt->myDoc != NULL) {
9306 if (ctxt->wellFormed) {
9307 ret = ctxt->myDoc->extSubset;
9308 ctxt->myDoc->extSubset = NULL;
9309 } else {
9310 ret = NULL;
9311 }
9312 xmlFreeDoc(ctxt->myDoc);
9313 ctxt->myDoc = NULL;
9314 }
9315 if (sax != NULL) ctxt->sax = NULL;
9316 xmlFreeParserCtxt(ctxt);
9317
9318 return(ret);
9319}
9320
9321/**
9322 * xmlSAXParseDTD:
9323 * @sax: the SAX handler block
9324 * @ExternalID: a NAME* containing the External ID of the DTD
9325 * @SystemID: a NAME* containing the URL to the DTD
9326 *
9327 * Load and parse an external subset.
9328 *
9329 * Returns the resulting xmlDtdPtr or NULL in case of error.
9330 */
9331
9332xmlDtdPtr
9333xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9334 const xmlChar *SystemID) {
9335 xmlDtdPtr ret = NULL;
9336 xmlParserCtxtPtr ctxt;
9337 xmlParserInputPtr input = NULL;
9338 xmlCharEncoding enc;
9339
9340 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9341
9342 ctxt = xmlNewParserCtxt();
9343 if (ctxt == NULL) {
9344 return(NULL);
9345 }
9346
9347 /*
9348 * Set-up the SAX context
9349 */
9350 if (sax != NULL) {
9351 if (ctxt->sax != NULL)
9352 xmlFree(ctxt->sax);
9353 ctxt->sax = sax;
9354 ctxt->userData = NULL;
9355 }
9356
9357 /*
9358 * Ask the Entity resolver to load the damn thing
9359 */
9360
9361 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9362 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9363 if (input == NULL) {
9364 if (sax != NULL) ctxt->sax = NULL;
9365 xmlFreeParserCtxt(ctxt);
9366 return(NULL);
9367 }
9368
9369 /*
9370 * plug some encoding conversion routines here.
9371 */
9372 xmlPushInput(ctxt, input);
9373 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9374 xmlSwitchEncoding(ctxt, enc);
9375
9376 if (input->filename == NULL)
9377 input->filename = (char *) xmlStrdup(SystemID);
9378 input->line = 1;
9379 input->col = 1;
9380 input->base = ctxt->input->cur;
9381 input->cur = ctxt->input->cur;
9382 input->free = NULL;
9383
9384 /*
9385 * let's parse that entity knowing it's an external subset.
9386 */
9387 ctxt->inSubset = 2;
9388 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9389 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9390 ExternalID, SystemID);
9391 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9392
9393 if (ctxt->myDoc != NULL) {
9394 if (ctxt->wellFormed) {
9395 ret = ctxt->myDoc->extSubset;
9396 ctxt->myDoc->extSubset = NULL;
9397 } else {
9398 ret = NULL;
9399 }
9400 xmlFreeDoc(ctxt->myDoc);
9401 ctxt->myDoc = NULL;
9402 }
9403 if (sax != NULL) ctxt->sax = NULL;
9404 xmlFreeParserCtxt(ctxt);
9405
9406 return(ret);
9407}
9408
9409/**
9410 * xmlParseDTD:
9411 * @ExternalID: a NAME* containing the External ID of the DTD
9412 * @SystemID: a NAME* containing the URL to the DTD
9413 *
9414 * Load and parse an external subset.
9415 *
9416 * Returns the resulting xmlDtdPtr or NULL in case of error.
9417 */
9418
9419xmlDtdPtr
9420xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9421 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9422}
9423
9424/************************************************************************
9425 * *
9426 * Front ends when parsing an Entity *
9427 * *
9428 ************************************************************************/
9429
9430/**
Owen Taylor3473f882001-02-23 17:55:21 +00009431 * xmlParseCtxtExternalEntity:
9432 * @ctx: the existing parsing context
9433 * @URL: the URL for the entity to load
9434 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009435 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009436 *
9437 * Parse an external general entity within an existing parsing context
9438 * An external general parsed entity is well-formed if it matches the
9439 * production labeled extParsedEnt.
9440 *
9441 * [78] extParsedEnt ::= TextDecl? content
9442 *
9443 * Returns 0 if the entity is well formed, -1 in case of args problem and
9444 * the parser error code otherwise
9445 */
9446
9447int
9448xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009449 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009450 xmlParserCtxtPtr ctxt;
9451 xmlDocPtr newDoc;
9452 xmlSAXHandlerPtr oldsax = NULL;
9453 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009454 xmlChar start[4];
9455 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009456
9457 if (ctx->depth > 40) {
9458 return(XML_ERR_ENTITY_LOOP);
9459 }
9460
Daniel Veillardcda96922001-08-21 10:56:31 +00009461 if (lst != NULL)
9462 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009463 if ((URL == NULL) && (ID == NULL))
9464 return(-1);
9465 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9466 return(-1);
9467
9468
9469 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9470 if (ctxt == NULL) return(-1);
9471 ctxt->userData = ctxt;
9472 oldsax = ctxt->sax;
9473 ctxt->sax = ctx->sax;
9474 newDoc = xmlNewDoc(BAD_CAST "1.0");
9475 if (newDoc == NULL) {
9476 xmlFreeParserCtxt(ctxt);
9477 return(-1);
9478 }
9479 if (ctx->myDoc != NULL) {
9480 newDoc->intSubset = ctx->myDoc->intSubset;
9481 newDoc->extSubset = ctx->myDoc->extSubset;
9482 }
9483 if (ctx->myDoc->URL != NULL) {
9484 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9485 }
9486 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9487 if (newDoc->children == NULL) {
9488 ctxt->sax = oldsax;
9489 xmlFreeParserCtxt(ctxt);
9490 newDoc->intSubset = NULL;
9491 newDoc->extSubset = NULL;
9492 xmlFreeDoc(newDoc);
9493 return(-1);
9494 }
9495 nodePush(ctxt, newDoc->children);
9496 if (ctx->myDoc == NULL) {
9497 ctxt->myDoc = newDoc;
9498 } else {
9499 ctxt->myDoc = ctx->myDoc;
9500 newDoc->children->doc = ctx->myDoc;
9501 }
9502
Daniel Veillard87a764e2001-06-20 17:41:10 +00009503 /*
9504 * Get the 4 first bytes and decode the charset
9505 * if enc != XML_CHAR_ENCODING_NONE
9506 * plug some encoding conversion routines.
9507 */
9508 GROW
9509 start[0] = RAW;
9510 start[1] = NXT(1);
9511 start[2] = NXT(2);
9512 start[3] = NXT(3);
9513 enc = xmlDetectCharEncoding(start, 4);
9514 if (enc != XML_CHAR_ENCODING_NONE) {
9515 xmlSwitchEncoding(ctxt, enc);
9516 }
9517
Owen Taylor3473f882001-02-23 17:55:21 +00009518 /*
9519 * Parse a possible text declaration first
9520 */
Owen Taylor3473f882001-02-23 17:55:21 +00009521 if ((RAW == '<') && (NXT(1) == '?') &&
9522 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9523 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9524 xmlParseTextDecl(ctxt);
9525 }
9526
9527 /*
9528 * Doing validity checking on chunk doesn't make sense
9529 */
9530 ctxt->instate = XML_PARSER_CONTENT;
9531 ctxt->validate = ctx->validate;
9532 ctxt->loadsubset = ctx->loadsubset;
9533 ctxt->depth = ctx->depth + 1;
9534 ctxt->replaceEntities = ctx->replaceEntities;
9535 if (ctxt->validate) {
9536 ctxt->vctxt.error = ctx->vctxt.error;
9537 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009538 } else {
9539 ctxt->vctxt.error = NULL;
9540 ctxt->vctxt.warning = NULL;
9541 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009542 ctxt->vctxt.nodeTab = NULL;
9543 ctxt->vctxt.nodeNr = 0;
9544 ctxt->vctxt.nodeMax = 0;
9545 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009546
9547 xmlParseContent(ctxt);
9548
9549 if ((RAW == '<') && (NXT(1) == '/')) {
9550 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9552 ctxt->sax->error(ctxt->userData,
9553 "chunk is not well balanced\n");
9554 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009555 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009556 } else if (RAW != 0) {
9557 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9559 ctxt->sax->error(ctxt->userData,
9560 "extra content at the end of well balanced chunk\n");
9561 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009562 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009563 }
9564 if (ctxt->node != newDoc->children) {
9565 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9566 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9567 ctxt->sax->error(ctxt->userData,
9568 "chunk is not well balanced\n");
9569 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009570 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009571 }
9572
9573 if (!ctxt->wellFormed) {
9574 if (ctxt->errNo == 0)
9575 ret = 1;
9576 else
9577 ret = ctxt->errNo;
9578 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009579 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009580 xmlNodePtr cur;
9581
9582 /*
9583 * Return the newly created nodeset after unlinking it from
9584 * they pseudo parent.
9585 */
9586 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009587 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009588 while (cur != NULL) {
9589 cur->parent = NULL;
9590 cur = cur->next;
9591 }
9592 newDoc->children->children = NULL;
9593 }
9594 ret = 0;
9595 }
9596 ctxt->sax = oldsax;
9597 xmlFreeParserCtxt(ctxt);
9598 newDoc->intSubset = NULL;
9599 newDoc->extSubset = NULL;
9600 xmlFreeDoc(newDoc);
9601
9602 return(ret);
9603}
9604
9605/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009606 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009607 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009608 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009609 * @sax: the SAX handler bloc (possibly NULL)
9610 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9611 * @depth: Used for loop detection, use 0
9612 * @URL: the URL for the entity to load
9613 * @ID: the System ID for the entity to load
9614 * @list: the return value for the set of parsed nodes
9615 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009616 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009617 *
9618 * Returns 0 if the entity is well formed, -1 in case of args problem and
9619 * the parser error code otherwise
9620 */
9621
Daniel Veillard257d9102001-05-08 10:41:44 +00009622static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009623xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9624 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009625 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009626 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009627 xmlParserCtxtPtr ctxt;
9628 xmlDocPtr newDoc;
9629 xmlSAXHandlerPtr oldsax = NULL;
9630 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009631 xmlChar start[4];
9632 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009633
9634 if (depth > 40) {
9635 return(XML_ERR_ENTITY_LOOP);
9636 }
9637
9638
9639
9640 if (list != NULL)
9641 *list = NULL;
9642 if ((URL == NULL) && (ID == NULL))
9643 return(-1);
9644 if (doc == NULL) /* @@ relax but check for dereferences */
9645 return(-1);
9646
9647
9648 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9649 if (ctxt == NULL) return(-1);
9650 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009651 if (oldctxt != NULL) {
9652 ctxt->_private = oldctxt->_private;
9653 ctxt->loadsubset = oldctxt->loadsubset;
9654 ctxt->validate = oldctxt->validate;
9655 ctxt->external = oldctxt->external;
9656 } else {
9657 /*
9658 * Doing validity checking on chunk without context
9659 * doesn't make sense
9660 */
9661 ctxt->_private = NULL;
9662 ctxt->validate = 0;
9663 ctxt->external = 2;
9664 ctxt->loadsubset = 0;
9665 }
Owen Taylor3473f882001-02-23 17:55:21 +00009666 if (sax != NULL) {
9667 oldsax = ctxt->sax;
9668 ctxt->sax = sax;
9669 if (user_data != NULL)
9670 ctxt->userData = user_data;
9671 }
9672 newDoc = xmlNewDoc(BAD_CAST "1.0");
9673 if (newDoc == NULL) {
9674 xmlFreeParserCtxt(ctxt);
9675 return(-1);
9676 }
9677 if (doc != NULL) {
9678 newDoc->intSubset = doc->intSubset;
9679 newDoc->extSubset = doc->extSubset;
9680 }
9681 if (doc->URL != NULL) {
9682 newDoc->URL = xmlStrdup(doc->URL);
9683 }
9684 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9685 if (newDoc->children == NULL) {
9686 if (sax != NULL)
9687 ctxt->sax = oldsax;
9688 xmlFreeParserCtxt(ctxt);
9689 newDoc->intSubset = NULL;
9690 newDoc->extSubset = NULL;
9691 xmlFreeDoc(newDoc);
9692 return(-1);
9693 }
9694 nodePush(ctxt, newDoc->children);
9695 if (doc == NULL) {
9696 ctxt->myDoc = newDoc;
9697 } else {
9698 ctxt->myDoc = doc;
9699 newDoc->children->doc = doc;
9700 }
9701
Daniel Veillard87a764e2001-06-20 17:41:10 +00009702 /*
9703 * Get the 4 first bytes and decode the charset
9704 * if enc != XML_CHAR_ENCODING_NONE
9705 * plug some encoding conversion routines.
9706 */
9707 GROW;
9708 start[0] = RAW;
9709 start[1] = NXT(1);
9710 start[2] = NXT(2);
9711 start[3] = NXT(3);
9712 enc = xmlDetectCharEncoding(start, 4);
9713 if (enc != XML_CHAR_ENCODING_NONE) {
9714 xmlSwitchEncoding(ctxt, enc);
9715 }
9716
Owen Taylor3473f882001-02-23 17:55:21 +00009717 /*
9718 * Parse a possible text declaration first
9719 */
Owen Taylor3473f882001-02-23 17:55:21 +00009720 if ((RAW == '<') && (NXT(1) == '?') &&
9721 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9722 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9723 xmlParseTextDecl(ctxt);
9724 }
9725
Owen Taylor3473f882001-02-23 17:55:21 +00009726 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009727 ctxt->depth = depth;
9728
9729 xmlParseContent(ctxt);
9730
Daniel Veillard561b7f82002-03-20 21:55:57 +00009731 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009732 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9734 ctxt->sax->error(ctxt->userData,
9735 "chunk is not well balanced\n");
9736 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009737 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009738 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009739 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9741 ctxt->sax->error(ctxt->userData,
9742 "extra content at the end of well balanced chunk\n");
9743 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009744 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009745 }
9746 if (ctxt->node != newDoc->children) {
9747 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9748 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9749 ctxt->sax->error(ctxt->userData,
9750 "chunk is not well balanced\n");
9751 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009752 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009753 }
9754
9755 if (!ctxt->wellFormed) {
9756 if (ctxt->errNo == 0)
9757 ret = 1;
9758 else
9759 ret = ctxt->errNo;
9760 } else {
9761 if (list != NULL) {
9762 xmlNodePtr cur;
9763
9764 /*
9765 * Return the newly created nodeset after unlinking it from
9766 * they pseudo parent.
9767 */
9768 cur = newDoc->children->children;
9769 *list = cur;
9770 while (cur != NULL) {
9771 cur->parent = NULL;
9772 cur = cur->next;
9773 }
9774 newDoc->children->children = NULL;
9775 }
9776 ret = 0;
9777 }
9778 if (sax != NULL)
9779 ctxt->sax = oldsax;
9780 xmlFreeParserCtxt(ctxt);
9781 newDoc->intSubset = NULL;
9782 newDoc->extSubset = NULL;
9783 xmlFreeDoc(newDoc);
9784
9785 return(ret);
9786}
9787
9788/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009789 * xmlParseExternalEntity:
9790 * @doc: the document the chunk pertains to
9791 * @sax: the SAX handler bloc (possibly NULL)
9792 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9793 * @depth: Used for loop detection, use 0
9794 * @URL: the URL for the entity to load
9795 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009796 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009797 *
9798 * Parse an external general entity
9799 * An external general parsed entity is well-formed if it matches the
9800 * production labeled extParsedEnt.
9801 *
9802 * [78] extParsedEnt ::= TextDecl? content
9803 *
9804 * Returns 0 if the entity is well formed, -1 in case of args problem and
9805 * the parser error code otherwise
9806 */
9807
9808int
9809xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009810 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009811 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009812 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009813}
9814
9815/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009816 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009817 * @doc: the document the chunk pertains to
9818 * @sax: the SAX handler bloc (possibly NULL)
9819 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9820 * @depth: Used for loop detection, use 0
9821 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009822 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009823 *
9824 * Parse a well-balanced chunk of an XML document
9825 * called by the parser
9826 * The allowed sequence for the Well Balanced Chunk is the one defined by
9827 * the content production in the XML grammar:
9828 *
9829 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9830 *
9831 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9832 * the parser error code otherwise
9833 */
9834
9835int
9836xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009837 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009838 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9839 depth, string, lst, 0 );
9840}
9841
9842/**
Daniel Veillard328f48c2002-11-15 15:24:34 +00009843 * xmlParseBalancedChunkMemoryInternal:
9844 * @oldctxt: the existing parsing context
9845 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9846 * @user_data: the user data field for the parser context
9847 * @lst: the return value for the set of parsed nodes
9848 *
9849 *
9850 * Parse a well-balanced chunk of an XML document
9851 * called by the parser
9852 * The allowed sequence for the Well Balanced Chunk is the one defined by
9853 * the content production in the XML grammar:
9854 *
9855 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9856 *
9857 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9858 * the parser error code otherwise
9859 *
9860 * In case recover is set to 1, the nodelist will not be empty even if
9861 * the parsed chunk is not well balanced.
9862 */
9863static int
9864xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9865 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9866 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009867 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009868 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009869 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009870 int size;
9871 int ret = 0;
9872
9873 if (oldctxt->depth > 40) {
9874 return(XML_ERR_ENTITY_LOOP);
9875 }
9876
9877
9878 if (lst != NULL)
9879 *lst = NULL;
9880 if (string == NULL)
9881 return(-1);
9882
9883 size = xmlStrlen(string);
9884
9885 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9886 if (ctxt == NULL) return(-1);
9887 if (user_data != NULL)
9888 ctxt->userData = user_data;
9889 else
9890 ctxt->userData = ctxt;
9891
9892 oldsax = ctxt->sax;
9893 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +00009894 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009895 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +00009896 newDoc = xmlNewDoc(BAD_CAST "1.0");
9897 if (newDoc == NULL) {
9898 ctxt->sax = oldsax;
9899 xmlFreeParserCtxt(ctxt);
9900 return(-1);
9901 }
Daniel Veillard328f48c2002-11-15 15:24:34 +00009902 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009903 } else {
9904 ctxt->myDoc = oldctxt->myDoc;
9905 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009906 }
Daniel Veillard9bc53102002-11-25 13:20:04 +00009907 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +00009908 BAD_CAST "pseudoroot", NULL);
9909 if (ctxt->myDoc->children == NULL) {
9910 ctxt->sax = oldsax;
9911 xmlFreeParserCtxt(ctxt);
9912 if (newDoc != NULL)
9913 xmlFreeDoc(newDoc);
9914 return(-1);
9915 }
9916 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009917 ctxt->instate = XML_PARSER_CONTENT;
9918 ctxt->depth = oldctxt->depth + 1;
9919
9920 /*
9921 * Doing validity checking on chunk doesn't make sense
9922 */
9923 ctxt->validate = 0;
9924 ctxt->loadsubset = oldctxt->loadsubset;
9925
Daniel Veillard68e9e742002-11-16 15:35:11 +00009926 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009927 if ((RAW == '<') && (NXT(1) == '/')) {
9928 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9930 ctxt->sax->error(ctxt->userData,
9931 "chunk is not well balanced\n");
9932 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009933 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009934 } else if (RAW != 0) {
9935 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9937 ctxt->sax->error(ctxt->userData,
9938 "extra content at the end of well balanced chunk\n");
9939 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009940 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009941 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009942 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +00009943 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9945 ctxt->sax->error(ctxt->userData,
9946 "chunk is not well balanced\n");
9947 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009948 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009949 }
9950
9951 if (!ctxt->wellFormed) {
9952 if (ctxt->errNo == 0)
9953 ret = 1;
9954 else
9955 ret = ctxt->errNo;
9956 } else {
9957 ret = 0;
9958 }
9959
9960 if ((lst != NULL) && (ret == 0)) {
9961 xmlNodePtr cur;
9962
9963 /*
9964 * Return the newly created nodeset after unlinking it from
9965 * they pseudo parent.
9966 */
Daniel Veillard68e9e742002-11-16 15:35:11 +00009967 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009968 *lst = cur;
9969 while (cur != NULL) {
9970 cur->parent = NULL;
9971 cur = cur->next;
9972 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009973 ctxt->myDoc->children->children = NULL;
9974 }
9975 if (ctxt->myDoc != NULL) {
9976 xmlFreeNode(ctxt->myDoc->children);
9977 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009978 }
9979
9980 ctxt->sax = oldsax;
9981 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +00009982 if (newDoc != NULL)
9983 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009984
9985 return(ret);
9986}
9987
9988/**
Daniel Veillard58e44c92002-08-02 22:19:49 +00009989 * xmlParseBalancedChunkMemoryRecover:
9990 * @doc: the document the chunk pertains to
9991 * @sax: the SAX handler bloc (possibly NULL)
9992 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9993 * @depth: Used for loop detection, use 0
9994 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9995 * @lst: the return value for the set of parsed nodes
9996 * @recover: return nodes even if the data is broken (use 0)
9997 *
9998 *
9999 * Parse a well-balanced chunk of an XML document
10000 * called by the parser
10001 * The allowed sequence for the Well Balanced Chunk is the one defined by
10002 * the content production in the XML grammar:
10003 *
10004 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10005 *
10006 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10007 * the parser error code otherwise
10008 *
10009 * In case recover is set to 1, the nodelist will not be empty even if
10010 * the parsed chunk is not well balanced.
10011 */
10012int
10013xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10014 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10015 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010016 xmlParserCtxtPtr ctxt;
10017 xmlDocPtr newDoc;
10018 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010019 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010020 int size;
10021 int ret = 0;
10022
10023 if (depth > 40) {
10024 return(XML_ERR_ENTITY_LOOP);
10025 }
10026
10027
Daniel Veillardcda96922001-08-21 10:56:31 +000010028 if (lst != NULL)
10029 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010030 if (string == NULL)
10031 return(-1);
10032
10033 size = xmlStrlen(string);
10034
10035 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10036 if (ctxt == NULL) return(-1);
10037 ctxt->userData = ctxt;
10038 if (sax != NULL) {
10039 oldsax = ctxt->sax;
10040 ctxt->sax = sax;
10041 if (user_data != NULL)
10042 ctxt->userData = user_data;
10043 }
10044 newDoc = xmlNewDoc(BAD_CAST "1.0");
10045 if (newDoc == NULL) {
10046 xmlFreeParserCtxt(ctxt);
10047 return(-1);
10048 }
10049 if (doc != NULL) {
10050 newDoc->intSubset = doc->intSubset;
10051 newDoc->extSubset = doc->extSubset;
10052 }
10053 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10054 if (newDoc->children == NULL) {
10055 if (sax != NULL)
10056 ctxt->sax = oldsax;
10057 xmlFreeParserCtxt(ctxt);
10058 newDoc->intSubset = NULL;
10059 newDoc->extSubset = NULL;
10060 xmlFreeDoc(newDoc);
10061 return(-1);
10062 }
10063 nodePush(ctxt, newDoc->children);
10064 if (doc == NULL) {
10065 ctxt->myDoc = newDoc;
10066 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010067 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010068 newDoc->children->doc = doc;
10069 }
10070 ctxt->instate = XML_PARSER_CONTENT;
10071 ctxt->depth = depth;
10072
10073 /*
10074 * Doing validity checking on chunk doesn't make sense
10075 */
10076 ctxt->validate = 0;
10077 ctxt->loadsubset = 0;
10078
Daniel Veillardb39bc392002-10-26 19:29:51 +000010079 if ( doc != NULL ){
10080 content = doc->children;
10081 doc->children = NULL;
10082 xmlParseContent(ctxt);
10083 doc->children = content;
10084 }
10085 else {
10086 xmlParseContent(ctxt);
10087 }
Owen Taylor3473f882001-02-23 17:55:21 +000010088 if ((RAW == '<') && (NXT(1) == '/')) {
10089 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10091 ctxt->sax->error(ctxt->userData,
10092 "chunk is not well balanced\n");
10093 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010094 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010095 } else if (RAW != 0) {
10096 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10098 ctxt->sax->error(ctxt->userData,
10099 "extra content at the end of well balanced chunk\n");
10100 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010101 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010102 }
10103 if (ctxt->node != newDoc->children) {
10104 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10105 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10106 ctxt->sax->error(ctxt->userData,
10107 "chunk is not well balanced\n");
10108 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010109 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010110 }
10111
10112 if (!ctxt->wellFormed) {
10113 if (ctxt->errNo == 0)
10114 ret = 1;
10115 else
10116 ret = ctxt->errNo;
10117 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010118 ret = 0;
10119 }
10120
10121 if (lst != NULL && (ret == 0 || recover == 1)) {
10122 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010123
10124 /*
10125 * Return the newly created nodeset after unlinking it from
10126 * they pseudo parent.
10127 */
10128 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010129 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010130 while (cur != NULL) {
10131 cur->parent = NULL;
10132 cur = cur->next;
10133 }
10134 newDoc->children->children = NULL;
10135 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010136
Owen Taylor3473f882001-02-23 17:55:21 +000010137 if (sax != NULL)
10138 ctxt->sax = oldsax;
10139 xmlFreeParserCtxt(ctxt);
10140 newDoc->intSubset = NULL;
10141 newDoc->extSubset = NULL;
10142 xmlFreeDoc(newDoc);
10143
10144 return(ret);
10145}
10146
10147/**
10148 * xmlSAXParseEntity:
10149 * @sax: the SAX handler block
10150 * @filename: the filename
10151 *
10152 * parse an XML external entity out of context and build a tree.
10153 * It use the given SAX function block to handle the parsing callback.
10154 * If sax is NULL, fallback to the default DOM tree building routines.
10155 *
10156 * [78] extParsedEnt ::= TextDecl? content
10157 *
10158 * This correspond to a "Well Balanced" chunk
10159 *
10160 * Returns the resulting document tree
10161 */
10162
10163xmlDocPtr
10164xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10165 xmlDocPtr ret;
10166 xmlParserCtxtPtr ctxt;
10167 char *directory = NULL;
10168
10169 ctxt = xmlCreateFileParserCtxt(filename);
10170 if (ctxt == NULL) {
10171 return(NULL);
10172 }
10173 if (sax != NULL) {
10174 if (ctxt->sax != NULL)
10175 xmlFree(ctxt->sax);
10176 ctxt->sax = sax;
10177 ctxt->userData = NULL;
10178 }
10179
10180 if ((ctxt->directory == NULL) && (directory == NULL))
10181 directory = xmlParserGetDirectory(filename);
10182
10183 xmlParseExtParsedEnt(ctxt);
10184
10185 if (ctxt->wellFormed)
10186 ret = ctxt->myDoc;
10187 else {
10188 ret = NULL;
10189 xmlFreeDoc(ctxt->myDoc);
10190 ctxt->myDoc = NULL;
10191 }
10192 if (sax != NULL)
10193 ctxt->sax = NULL;
10194 xmlFreeParserCtxt(ctxt);
10195
10196 return(ret);
10197}
10198
10199/**
10200 * xmlParseEntity:
10201 * @filename: the filename
10202 *
10203 * parse an XML external entity out of context and build a tree.
10204 *
10205 * [78] extParsedEnt ::= TextDecl? content
10206 *
10207 * This correspond to a "Well Balanced" chunk
10208 *
10209 * Returns the resulting document tree
10210 */
10211
10212xmlDocPtr
10213xmlParseEntity(const char *filename) {
10214 return(xmlSAXParseEntity(NULL, filename));
10215}
10216
10217/**
10218 * xmlCreateEntityParserCtxt:
10219 * @URL: the entity URL
10220 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010221 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010222 *
10223 * Create a parser context for an external entity
10224 * Automatic support for ZLIB/Compress compressed document is provided
10225 * by default if found at compile-time.
10226 *
10227 * Returns the new parser context or NULL
10228 */
10229xmlParserCtxtPtr
10230xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10231 const xmlChar *base) {
10232 xmlParserCtxtPtr ctxt;
10233 xmlParserInputPtr inputStream;
10234 char *directory = NULL;
10235 xmlChar *uri;
10236
10237 ctxt = xmlNewParserCtxt();
10238 if (ctxt == NULL) {
10239 return(NULL);
10240 }
10241
10242 uri = xmlBuildURI(URL, base);
10243
10244 if (uri == NULL) {
10245 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10246 if (inputStream == NULL) {
10247 xmlFreeParserCtxt(ctxt);
10248 return(NULL);
10249 }
10250
10251 inputPush(ctxt, inputStream);
10252
10253 if ((ctxt->directory == NULL) && (directory == NULL))
10254 directory = xmlParserGetDirectory((char *)URL);
10255 if ((ctxt->directory == NULL) && (directory != NULL))
10256 ctxt->directory = directory;
10257 } else {
10258 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10259 if (inputStream == NULL) {
10260 xmlFree(uri);
10261 xmlFreeParserCtxt(ctxt);
10262 return(NULL);
10263 }
10264
10265 inputPush(ctxt, inputStream);
10266
10267 if ((ctxt->directory == NULL) && (directory == NULL))
10268 directory = xmlParserGetDirectory((char *)uri);
10269 if ((ctxt->directory == NULL) && (directory != NULL))
10270 ctxt->directory = directory;
10271 xmlFree(uri);
10272 }
10273
10274 return(ctxt);
10275}
10276
10277/************************************************************************
10278 * *
10279 * Front ends when parsing from a file *
10280 * *
10281 ************************************************************************/
10282
10283/**
10284 * xmlCreateFileParserCtxt:
10285 * @filename: the filename
10286 *
10287 * Create a parser context for a file content.
10288 * Automatic support for ZLIB/Compress compressed document is provided
10289 * by default if found at compile-time.
10290 *
10291 * Returns the new parser context or NULL
10292 */
10293xmlParserCtxtPtr
10294xmlCreateFileParserCtxt(const char *filename)
10295{
10296 xmlParserCtxtPtr ctxt;
10297 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010298 char *directory = NULL;
Daniel Veillardf4862f02002-09-10 11:13:43 +000010299 xmlChar *normalized;
Owen Taylor3473f882001-02-23 17:55:21 +000010300
Owen Taylor3473f882001-02-23 17:55:21 +000010301 ctxt = xmlNewParserCtxt();
10302 if (ctxt == NULL) {
10303 if (xmlDefaultSAXHandler.error != NULL) {
10304 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10305 }
10306 return(NULL);
10307 }
10308
Daniel Veillardf4862f02002-09-10 11:13:43 +000010309 normalized = xmlNormalizeWindowsPath((const xmlChar *) filename);
10310 if (normalized == NULL) {
10311 xmlFreeParserCtxt(ctxt);
10312 return(NULL);
10313 }
10314 inputStream = xmlLoadExternalEntity((char *) normalized, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010315 if (inputStream == NULL) {
10316 xmlFreeParserCtxt(ctxt);
Daniel Veillardf4862f02002-09-10 11:13:43 +000010317 xmlFree(normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010318 return(NULL);
10319 }
10320
Owen Taylor3473f882001-02-23 17:55:21 +000010321 inputPush(ctxt, inputStream);
10322 if ((ctxt->directory == NULL) && (directory == NULL))
Daniel Veillardf4862f02002-09-10 11:13:43 +000010323 directory = xmlParserGetDirectory((char *) normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010324 if ((ctxt->directory == NULL) && (directory != NULL))
10325 ctxt->directory = directory;
10326
Daniel Veillardf4862f02002-09-10 11:13:43 +000010327 xmlFree(normalized);
10328
Owen Taylor3473f882001-02-23 17:55:21 +000010329 return(ctxt);
10330}
10331
10332/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010333 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010334 * @sax: the SAX handler block
10335 * @filename: the filename
10336 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10337 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010338 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010339 *
10340 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10341 * compressed document is provided by default if found at compile-time.
10342 * It use the given SAX function block to handle the parsing callback.
10343 * If sax is NULL, fallback to the default DOM tree building routines.
10344 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010345 * User data (void *) is stored within the parser context in the
10346 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010347 *
Owen Taylor3473f882001-02-23 17:55:21 +000010348 * Returns the resulting document tree
10349 */
10350
10351xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010352xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10353 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010354 xmlDocPtr ret;
10355 xmlParserCtxtPtr ctxt;
10356 char *directory = NULL;
10357
Daniel Veillard635ef722001-10-29 11:48:19 +000010358 xmlInitParser();
10359
Owen Taylor3473f882001-02-23 17:55:21 +000010360 ctxt = xmlCreateFileParserCtxt(filename);
10361 if (ctxt == NULL) {
10362 return(NULL);
10363 }
10364 if (sax != NULL) {
10365 if (ctxt->sax != NULL)
10366 xmlFree(ctxt->sax);
10367 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010368 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010369 if (data!=NULL) {
10370 ctxt->_private=data;
10371 }
Owen Taylor3473f882001-02-23 17:55:21 +000010372
10373 if ((ctxt->directory == NULL) && (directory == NULL))
10374 directory = xmlParserGetDirectory(filename);
10375 if ((ctxt->directory == NULL) && (directory != NULL))
10376 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10377
Daniel Veillarddad3f682002-11-17 16:47:27 +000010378 ctxt->recovery = recovery;
10379
Owen Taylor3473f882001-02-23 17:55:21 +000010380 xmlParseDocument(ctxt);
10381
10382 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10383 else {
10384 ret = NULL;
10385 xmlFreeDoc(ctxt->myDoc);
10386 ctxt->myDoc = NULL;
10387 }
10388 if (sax != NULL)
10389 ctxt->sax = NULL;
10390 xmlFreeParserCtxt(ctxt);
10391
10392 return(ret);
10393}
10394
10395/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010396 * xmlSAXParseFile:
10397 * @sax: the SAX handler block
10398 * @filename: the filename
10399 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10400 * documents
10401 *
10402 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10403 * compressed document is provided by default if found at compile-time.
10404 * It use the given SAX function block to handle the parsing callback.
10405 * If sax is NULL, fallback to the default DOM tree building routines.
10406 *
10407 * Returns the resulting document tree
10408 */
10409
10410xmlDocPtr
10411xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10412 int recovery) {
10413 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10414}
10415
10416/**
Owen Taylor3473f882001-02-23 17:55:21 +000010417 * xmlRecoverDoc:
10418 * @cur: a pointer to an array of xmlChar
10419 *
10420 * parse an XML in-memory document and build a tree.
10421 * In the case the document is not Well Formed, a tree is built anyway
10422 *
10423 * Returns the resulting document tree
10424 */
10425
10426xmlDocPtr
10427xmlRecoverDoc(xmlChar *cur) {
10428 return(xmlSAXParseDoc(NULL, cur, 1));
10429}
10430
10431/**
10432 * xmlParseFile:
10433 * @filename: the filename
10434 *
10435 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10436 * compressed document is provided by default if found at compile-time.
10437 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010438 * Returns the resulting document tree if the file was wellformed,
10439 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010440 */
10441
10442xmlDocPtr
10443xmlParseFile(const char *filename) {
10444 return(xmlSAXParseFile(NULL, filename, 0));
10445}
10446
10447/**
10448 * xmlRecoverFile:
10449 * @filename: the filename
10450 *
10451 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10452 * compressed document is provided by default if found at compile-time.
10453 * In the case the document is not Well Formed, a tree is built anyway
10454 *
10455 * Returns the resulting document tree
10456 */
10457
10458xmlDocPtr
10459xmlRecoverFile(const char *filename) {
10460 return(xmlSAXParseFile(NULL, filename, 1));
10461}
10462
10463
10464/**
10465 * xmlSetupParserForBuffer:
10466 * @ctxt: an XML parser context
10467 * @buffer: a xmlChar * buffer
10468 * @filename: a file name
10469 *
10470 * Setup the parser context to parse a new buffer; Clears any prior
10471 * contents from the parser context. The buffer parameter must not be
10472 * NULL, but the filename parameter can be
10473 */
10474void
10475xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10476 const char* filename)
10477{
10478 xmlParserInputPtr input;
10479
10480 input = xmlNewInputStream(ctxt);
10481 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010482 xmlGenericError(xmlGenericErrorContext,
10483 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010484 xmlFree(ctxt);
10485 return;
10486 }
10487
10488 xmlClearParserCtxt(ctxt);
10489 if (filename != NULL)
10490 input->filename = xmlMemStrdup(filename);
10491 input->base = buffer;
10492 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010493 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010494 inputPush(ctxt, input);
10495}
10496
10497/**
10498 * xmlSAXUserParseFile:
10499 * @sax: a SAX handler
10500 * @user_data: The user data returned on SAX callbacks
10501 * @filename: a file name
10502 *
10503 * parse an XML file and call the given SAX handler routines.
10504 * Automatic support for ZLIB/Compress compressed document is provided
10505 *
10506 * Returns 0 in case of success or a error number otherwise
10507 */
10508int
10509xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10510 const char *filename) {
10511 int ret = 0;
10512 xmlParserCtxtPtr ctxt;
10513
10514 ctxt = xmlCreateFileParserCtxt(filename);
10515 if (ctxt == NULL) return -1;
10516 if (ctxt->sax != &xmlDefaultSAXHandler)
10517 xmlFree(ctxt->sax);
10518 ctxt->sax = sax;
10519 if (user_data != NULL)
10520 ctxt->userData = user_data;
10521
10522 xmlParseDocument(ctxt);
10523
10524 if (ctxt->wellFormed)
10525 ret = 0;
10526 else {
10527 if (ctxt->errNo != 0)
10528 ret = ctxt->errNo;
10529 else
10530 ret = -1;
10531 }
10532 if (sax != NULL)
10533 ctxt->sax = NULL;
10534 xmlFreeParserCtxt(ctxt);
10535
10536 return ret;
10537}
10538
10539/************************************************************************
10540 * *
10541 * Front ends when parsing from memory *
10542 * *
10543 ************************************************************************/
10544
10545/**
10546 * xmlCreateMemoryParserCtxt:
10547 * @buffer: a pointer to a char array
10548 * @size: the size of the array
10549 *
10550 * Create a parser context for an XML in-memory document.
10551 *
10552 * Returns the new parser context or NULL
10553 */
10554xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010555xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010556 xmlParserCtxtPtr ctxt;
10557 xmlParserInputPtr input;
10558 xmlParserInputBufferPtr buf;
10559
10560 if (buffer == NULL)
10561 return(NULL);
10562 if (size <= 0)
10563 return(NULL);
10564
10565 ctxt = xmlNewParserCtxt();
10566 if (ctxt == NULL)
10567 return(NULL);
10568
10569 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010570 if (buf == NULL) {
10571 xmlFreeParserCtxt(ctxt);
10572 return(NULL);
10573 }
Owen Taylor3473f882001-02-23 17:55:21 +000010574
10575 input = xmlNewInputStream(ctxt);
10576 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010577 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010578 xmlFreeParserCtxt(ctxt);
10579 return(NULL);
10580 }
10581
10582 input->filename = NULL;
10583 input->buf = buf;
10584 input->base = input->buf->buffer->content;
10585 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010586 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010587
10588 inputPush(ctxt, input);
10589 return(ctxt);
10590}
10591
10592/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010593 * xmlSAXParseMemoryWithData:
10594 * @sax: the SAX handler block
10595 * @buffer: an pointer to a char array
10596 * @size: the size of the array
10597 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10598 * documents
10599 * @data: the userdata
10600 *
10601 * parse an XML in-memory block and use the given SAX function block
10602 * to handle the parsing callback. If sax is NULL, fallback to the default
10603 * DOM tree building routines.
10604 *
10605 * User data (void *) is stored within the parser context in the
10606 * context's _private member, so it is available nearly everywhere in libxml
10607 *
10608 * Returns the resulting document tree
10609 */
10610
10611xmlDocPtr
10612xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10613 int size, int recovery, void *data) {
10614 xmlDocPtr ret;
10615 xmlParserCtxtPtr ctxt;
10616
10617 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10618 if (ctxt == NULL) return(NULL);
10619 if (sax != NULL) {
10620 if (ctxt->sax != NULL)
10621 xmlFree(ctxt->sax);
10622 ctxt->sax = sax;
10623 }
10624 if (data!=NULL) {
10625 ctxt->_private=data;
10626 }
10627
10628 xmlParseDocument(ctxt);
10629
10630 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10631 else {
10632 ret = NULL;
10633 xmlFreeDoc(ctxt->myDoc);
10634 ctxt->myDoc = NULL;
10635 }
10636 if (sax != NULL)
10637 ctxt->sax = NULL;
10638 xmlFreeParserCtxt(ctxt);
10639
10640 return(ret);
10641}
10642
10643/**
Owen Taylor3473f882001-02-23 17:55:21 +000010644 * xmlSAXParseMemory:
10645 * @sax: the SAX handler block
10646 * @buffer: an pointer to a char array
10647 * @size: the size of the array
10648 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10649 * documents
10650 *
10651 * parse an XML in-memory block and use the given SAX function block
10652 * to handle the parsing callback. If sax is NULL, fallback to the default
10653 * DOM tree building routines.
10654 *
10655 * Returns the resulting document tree
10656 */
10657xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010658xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10659 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010660 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010661}
10662
10663/**
10664 * xmlParseMemory:
10665 * @buffer: an pointer to a char array
10666 * @size: the size of the array
10667 *
10668 * parse an XML in-memory block and build a tree.
10669 *
10670 * Returns the resulting document tree
10671 */
10672
Daniel Veillard50822cb2001-07-26 20:05:51 +000010673xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010674 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10675}
10676
10677/**
10678 * xmlRecoverMemory:
10679 * @buffer: an pointer to a char array
10680 * @size: the size of the array
10681 *
10682 * parse an XML in-memory block and build a tree.
10683 * In the case the document is not Well Formed, a tree is built anyway
10684 *
10685 * Returns the resulting document tree
10686 */
10687
Daniel Veillard50822cb2001-07-26 20:05:51 +000010688xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010689 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10690}
10691
10692/**
10693 * xmlSAXUserParseMemory:
10694 * @sax: a SAX handler
10695 * @user_data: The user data returned on SAX callbacks
10696 * @buffer: an in-memory XML document input
10697 * @size: the length of the XML document in bytes
10698 *
10699 * A better SAX parsing routine.
10700 * parse an XML in-memory buffer and call the given SAX handler routines.
10701 *
10702 * Returns 0 in case of success or a error number otherwise
10703 */
10704int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010705 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010706 int ret = 0;
10707 xmlParserCtxtPtr ctxt;
10708 xmlSAXHandlerPtr oldsax = NULL;
10709
Daniel Veillard9e923512002-08-14 08:48:52 +000010710 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010711 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10712 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010713 oldsax = ctxt->sax;
10714 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010715 if (user_data != NULL)
10716 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010717
10718 xmlParseDocument(ctxt);
10719
10720 if (ctxt->wellFormed)
10721 ret = 0;
10722 else {
10723 if (ctxt->errNo != 0)
10724 ret = ctxt->errNo;
10725 else
10726 ret = -1;
10727 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010728 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010729 xmlFreeParserCtxt(ctxt);
10730
10731 return ret;
10732}
10733
10734/**
10735 * xmlCreateDocParserCtxt:
10736 * @cur: a pointer to an array of xmlChar
10737 *
10738 * Creates a parser context for an XML in-memory document.
10739 *
10740 * Returns the new parser context or NULL
10741 */
10742xmlParserCtxtPtr
10743xmlCreateDocParserCtxt(xmlChar *cur) {
10744 int len;
10745
10746 if (cur == NULL)
10747 return(NULL);
10748 len = xmlStrlen(cur);
10749 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10750}
10751
10752/**
10753 * xmlSAXParseDoc:
10754 * @sax: the SAX handler block
10755 * @cur: a pointer to an array of xmlChar
10756 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10757 * documents
10758 *
10759 * parse an XML in-memory document and build a tree.
10760 * It use the given SAX function block to handle the parsing callback.
10761 * If sax is NULL, fallback to the default DOM tree building routines.
10762 *
10763 * Returns the resulting document tree
10764 */
10765
10766xmlDocPtr
10767xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10768 xmlDocPtr ret;
10769 xmlParserCtxtPtr ctxt;
10770
10771 if (cur == NULL) return(NULL);
10772
10773
10774 ctxt = xmlCreateDocParserCtxt(cur);
10775 if (ctxt == NULL) return(NULL);
10776 if (sax != NULL) {
10777 ctxt->sax = sax;
10778 ctxt->userData = NULL;
10779 }
10780
10781 xmlParseDocument(ctxt);
10782 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10783 else {
10784 ret = NULL;
10785 xmlFreeDoc(ctxt->myDoc);
10786 ctxt->myDoc = NULL;
10787 }
10788 if (sax != NULL)
10789 ctxt->sax = NULL;
10790 xmlFreeParserCtxt(ctxt);
10791
10792 return(ret);
10793}
10794
10795/**
10796 * xmlParseDoc:
10797 * @cur: a pointer to an array of xmlChar
10798 *
10799 * parse an XML in-memory document and build a tree.
10800 *
10801 * Returns the resulting document tree
10802 */
10803
10804xmlDocPtr
10805xmlParseDoc(xmlChar *cur) {
10806 return(xmlSAXParseDoc(NULL, cur, 0));
10807}
10808
Daniel Veillard8107a222002-01-13 14:10:10 +000010809/************************************************************************
10810 * *
10811 * Specific function to keep track of entities references *
10812 * and used by the XSLT debugger *
10813 * *
10814 ************************************************************************/
10815
10816static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10817
10818/**
10819 * xmlAddEntityReference:
10820 * @ent : A valid entity
10821 * @firstNode : A valid first node for children of entity
10822 * @lastNode : A valid last node of children entity
10823 *
10824 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10825 */
10826static void
10827xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10828 xmlNodePtr lastNode)
10829{
10830 if (xmlEntityRefFunc != NULL) {
10831 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10832 }
10833}
10834
10835
10836/**
10837 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000010838 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000010839 *
10840 * Set the function to call call back when a xml reference has been made
10841 */
10842void
10843xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10844{
10845 xmlEntityRefFunc = func;
10846}
Owen Taylor3473f882001-02-23 17:55:21 +000010847
10848/************************************************************************
10849 * *
10850 * Miscellaneous *
10851 * *
10852 ************************************************************************/
10853
10854#ifdef LIBXML_XPATH_ENABLED
10855#include <libxml/xpath.h>
10856#endif
10857
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010858extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010859static int xmlParserInitialized = 0;
10860
10861/**
10862 * xmlInitParser:
10863 *
10864 * Initialization function for the XML parser.
10865 * This is not reentrant. Call once before processing in case of
10866 * use in multithreaded programs.
10867 */
10868
10869void
10870xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010871 if (xmlParserInitialized != 0)
10872 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010873
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010874 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10875 (xmlGenericError == NULL))
10876 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010877 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010878 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010879 xmlInitCharEncodingHandlers();
10880 xmlInitializePredefinedEntities();
10881 xmlDefaultSAXHandlerInit();
10882 xmlRegisterDefaultInputCallbacks();
10883 xmlRegisterDefaultOutputCallbacks();
10884#ifdef LIBXML_HTML_ENABLED
10885 htmlInitAutoClose();
10886 htmlDefaultSAXHandlerInit();
10887#endif
10888#ifdef LIBXML_XPATH_ENABLED
10889 xmlXPathInit();
10890#endif
10891 xmlParserInitialized = 1;
10892}
10893
10894/**
10895 * xmlCleanupParser:
10896 *
10897 * Cleanup function for the XML parser. It tries to reclaim all
10898 * parsing related global memory allocated for the parser processing.
10899 * It doesn't deallocate any document related memory. Calling this
10900 * function should not prevent reusing the parser.
10901 */
10902
10903void
10904xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010905 xmlCleanupCharEncodingHandlers();
10906 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010907#ifdef LIBXML_CATALOG_ENABLED
10908 xmlCatalogCleanup();
10909#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010910 xmlCleanupThreads();
10911 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010912}