blob: a38245e9c6f1d0bf7e9cd9eedcb5257d94c04d55 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Daniel Veillard328f48c2002-11-15 15:24:34 +0000108static int
109xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
110 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000111/************************************************************************
112 * *
113 * Parser stacks related functions and macros *
114 * *
115 ************************************************************************/
116
117xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
118 const xmlChar ** str);
119
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000120/**
121 * inputPush:
122 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000123 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000124 *
125 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000126 *
127 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000129extern int
130inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
131{
132 if (ctxt->inputNr >= ctxt->inputMax) {
133 ctxt->inputMax *= 2;
134 ctxt->inputTab =
135 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
136 ctxt->inputMax *
137 sizeof(ctxt->inputTab[0]));
138 if (ctxt->inputTab == NULL) {
139 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
140 return (0);
141 }
142 }
143 ctxt->inputTab[ctxt->inputNr] = value;
144 ctxt->input = value;
145 return (ctxt->inputNr++);
146}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000148 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000149 * @ctxt: an XML parser context
150 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000151 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000152 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000153 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000154 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000155extern xmlParserInputPtr
156inputPop(xmlParserCtxtPtr ctxt)
157{
158 xmlParserInputPtr ret;
159
160 if (ctxt->inputNr <= 0)
161 return (0);
162 ctxt->inputNr--;
163 if (ctxt->inputNr > 0)
164 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
165 else
166 ctxt->input = NULL;
167 ret = ctxt->inputTab[ctxt->inputNr];
168 ctxt->inputTab[ctxt->inputNr] = 0;
169 return (ret);
170}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000171/**
172 * nodePush:
173 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000174 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000175 *
176 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000177 *
178 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000180extern int
181nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
182{
183 if (ctxt->nodeNr >= ctxt->nodeMax) {
184 ctxt->nodeMax *= 2;
185 ctxt->nodeTab =
186 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
187 ctxt->nodeMax *
188 sizeof(ctxt->nodeTab[0]));
189 if (ctxt->nodeTab == NULL) {
190 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
191 return (0);
192 }
193 }
194 ctxt->nodeTab[ctxt->nodeNr] = value;
195 ctxt->node = value;
196 return (ctxt->nodeNr++);
197}
198/**
199 * nodePop:
200 * @ctxt: an XML parser context
201 *
202 * Pops the top element node from the node stack
203 *
204 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000205 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000206extern xmlNodePtr
207nodePop(xmlParserCtxtPtr ctxt)
208{
209 xmlNodePtr ret;
210
211 if (ctxt->nodeNr <= 0)
212 return (0);
213 ctxt->nodeNr--;
214 if (ctxt->nodeNr > 0)
215 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
216 else
217 ctxt->node = NULL;
218 ret = ctxt->nodeTab[ctxt->nodeNr];
219 ctxt->nodeTab[ctxt->nodeNr] = 0;
220 return (ret);
221}
222/**
223 * namePush:
224 * @ctxt: an XML parser context
225 * @value: the element name
226 *
227 * Pushes a new element name on top of the name stack
228 *
229 * Returns 0 in case of error, the index in the stack otherwise
230 */
231extern int
232namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
233{
234 if (ctxt->nameNr >= ctxt->nameMax) {
235 ctxt->nameMax *= 2;
236 ctxt->nameTab =
237 (xmlChar * *)xmlRealloc(ctxt->nameTab,
238 ctxt->nameMax *
239 sizeof(ctxt->nameTab[0]));
240 if (ctxt->nameTab == NULL) {
241 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
242 return (0);
243 }
244 }
245 ctxt->nameTab[ctxt->nameNr] = value;
246 ctxt->name = value;
247 return (ctxt->nameNr++);
248}
249/**
250 * namePop:
251 * @ctxt: an XML parser context
252 *
253 * Pops the top element name from the name stack
254 *
255 * Returns the name just removed
256 */
257extern xmlChar *
258namePop(xmlParserCtxtPtr ctxt)
259{
260 xmlChar *ret;
261
262 if (ctxt->nameNr <= 0)
263 return (0);
264 ctxt->nameNr--;
265 if (ctxt->nameNr > 0)
266 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
267 else
268 ctxt->name = NULL;
269 ret = ctxt->nameTab[ctxt->nameNr];
270 ctxt->nameTab[ctxt->nameNr] = 0;
271 return (ret);
272}
Owen Taylor3473f882001-02-23 17:55:21 +0000273
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000274static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000275 if (ctxt->spaceNr >= ctxt->spaceMax) {
276 ctxt->spaceMax *= 2;
277 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
278 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
279 if (ctxt->spaceTab == NULL) {
280 xmlGenericError(xmlGenericErrorContext,
281 "realloc failed !\n");
282 return(0);
283 }
284 }
285 ctxt->spaceTab[ctxt->spaceNr] = val;
286 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
287 return(ctxt->spaceNr++);
288}
289
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000290static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000291 int ret;
292 if (ctxt->spaceNr <= 0) return(0);
293 ctxt->spaceNr--;
294 if (ctxt->spaceNr > 0)
295 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
296 else
297 ctxt->space = NULL;
298 ret = ctxt->spaceTab[ctxt->spaceNr];
299 ctxt->spaceTab[ctxt->spaceNr] = -1;
300 return(ret);
301}
302
303/*
304 * Macros for accessing the content. Those should be used only by the parser,
305 * and not exported.
306 *
307 * Dirty macros, i.e. one often need to make assumption on the context to
308 * use them
309 *
310 * CUR_PTR return the current pointer to the xmlChar to be parsed.
311 * To be used with extreme caution since operations consuming
312 * characters may move the input buffer to a different location !
313 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
314 * This should be used internally by the parser
315 * only to compare to ASCII values otherwise it would break when
316 * running with UTF-8 encoding.
317 * RAW same as CUR but in the input buffer, bypass any token
318 * extraction that may have been done
319 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
320 * to compare on ASCII based substring.
321 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
322 * strings within the parser.
323 *
324 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
325 *
326 * NEXT Skip to the next character, this does the proper decoding
327 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000328 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000329 * CUR_CHAR(l) returns the current unicode character (int), set l
330 * to the number of xmlChars used for the encoding [0-5].
331 * CUR_SCHAR same but operate on a string instead of the context
332 * COPY_BUF copy the current unicode char to the target buffer, increment
333 * the index
334 * GROW, SHRINK handling of input buffers
335 */
336
Daniel Veillardfdc91562002-07-01 21:52:03 +0000337#define RAW (*ctxt->input->cur)
338#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000339#define NXT(val) ctxt->input->cur[(val)]
340#define CUR_PTR ctxt->input->cur
341
342#define SKIP(val) do { \
343 ctxt->nbChars += (val),ctxt->input->cur += (val); \
344 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000345 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000346 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
347 xmlPopInput(ctxt); \
348 } while (0)
349
Daniel Veillard46de64e2002-05-29 08:21:33 +0000350#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \
351 xmlSHRINK (ctxt);
352
353static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
354 xmlParserInputShrink(ctxt->input);
355 if ((*ctxt->input->cur == 0) &&
356 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
357 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000358 }
Owen Taylor3473f882001-02-23 17:55:21 +0000359
Daniel Veillard46de64e2002-05-29 08:21:33 +0000360#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
361 xmlGROW (ctxt);
362
363static void xmlGROW (xmlParserCtxtPtr ctxt) {
364 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
365 if ((*ctxt->input->cur == 0) &&
366 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
367 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000368 }
Owen Taylor3473f882001-02-23 17:55:21 +0000369
370#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
371
372#define NEXT xmlNextChar(ctxt)
373
Daniel Veillard21a0f912001-02-25 19:54:14 +0000374#define NEXT1 { \
375 ctxt->input->cur++; \
376 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000377 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000378 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
379 }
380
Owen Taylor3473f882001-02-23 17:55:21 +0000381#define NEXTL(l) do { \
382 if (*(ctxt->input->cur) == '\n') { \
383 ctxt->input->line++; ctxt->input->col = 1; \
384 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000385 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000386 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000387 } while (0)
388
389#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
390#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
391
392#define COPY_BUF(l,b,i,v) \
393 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000394 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000395
396/**
397 * xmlSkipBlankChars:
398 * @ctxt: the XML parser context
399 *
400 * skip all blanks character found at that point in the input streams.
401 * It pops up finished entities in the process if allowable at that point.
402 *
403 * Returns the number of space chars skipped
404 */
405
406int
407xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000408 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000409
410 /*
411 * It's Okay to use CUR/NEXT here since all the blanks are on
412 * the ASCII range.
413 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000414 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
415 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000416 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000417 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000418 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000419 cur = ctxt->input->cur;
420 while (IS_BLANK(*cur)) {
421 if (*cur == '\n') {
422 ctxt->input->line++; ctxt->input->col = 1;
423 }
424 cur++;
425 res++;
426 if (*cur == 0) {
427 ctxt->input->cur = cur;
428 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
429 cur = ctxt->input->cur;
430 }
431 }
432 ctxt->input->cur = cur;
433 } else {
434 int cur;
435 do {
436 cur = CUR;
437 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
438 NEXT;
439 cur = CUR;
440 res++;
441 }
442 while ((cur == 0) && (ctxt->inputNr > 1) &&
443 (ctxt->instate != XML_PARSER_COMMENT)) {
444 xmlPopInput(ctxt);
445 cur = CUR;
446 }
447 /*
448 * Need to handle support of entities branching here
449 */
450 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
451 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
452 }
Owen Taylor3473f882001-02-23 17:55:21 +0000453 return(res);
454}
455
456/************************************************************************
457 * *
458 * Commodity functions to handle entities *
459 * *
460 ************************************************************************/
461
462/**
463 * xmlPopInput:
464 * @ctxt: an XML parser context
465 *
466 * xmlPopInput: the current input pointed by ctxt->input came to an end
467 * pop it and return the next char.
468 *
469 * Returns the current xmlChar in the parser context
470 */
471xmlChar
472xmlPopInput(xmlParserCtxtPtr ctxt) {
473 if (ctxt->inputNr == 1) return(0); /* End of main Input */
474 if (xmlParserDebugEntities)
475 xmlGenericError(xmlGenericErrorContext,
476 "Popping input %d\n", ctxt->inputNr);
477 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000478 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000479 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
480 return(xmlPopInput(ctxt));
481 return(CUR);
482}
483
484/**
485 * xmlPushInput:
486 * @ctxt: an XML parser context
487 * @input: an XML parser input fragment (entity, XML fragment ...).
488 *
489 * xmlPushInput: switch to a new input stream which is stacked on top
490 * of the previous one(s).
491 */
492void
493xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
494 if (input == NULL) return;
495
496 if (xmlParserDebugEntities) {
497 if ((ctxt->input != NULL) && (ctxt->input->filename))
498 xmlGenericError(xmlGenericErrorContext,
499 "%s(%d): ", ctxt->input->filename,
500 ctxt->input->line);
501 xmlGenericError(xmlGenericErrorContext,
502 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
503 }
504 inputPush(ctxt, input);
505 GROW;
506}
507
508/**
509 * xmlParseCharRef:
510 * @ctxt: an XML parser context
511 *
512 * parse Reference declarations
513 *
514 * [66] CharRef ::= '&#' [0-9]+ ';' |
515 * '&#x' [0-9a-fA-F]+ ';'
516 *
517 * [ WFC: Legal Character ]
518 * Characters referred to using character references must match the
519 * production for Char.
520 *
521 * Returns the value parsed (as an int), 0 in case of error
522 */
523int
524xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000525 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000526 int count = 0;
527
Owen Taylor3473f882001-02-23 17:55:21 +0000528 /*
529 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
530 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000531 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000532 (NXT(2) == 'x')) {
533 SKIP(3);
534 GROW;
535 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000536 if (count++ > 20) {
537 count = 0;
538 GROW;
539 }
540 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000541 val = val * 16 + (CUR - '0');
542 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
543 val = val * 16 + (CUR - 'a') + 10;
544 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
545 val = val * 16 + (CUR - 'A') + 10;
546 else {
547 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
549 ctxt->sax->error(ctxt->userData,
550 "xmlParseCharRef: invalid hexadecimal value\n");
551 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000552 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000553 val = 0;
554 break;
555 }
556 NEXT;
557 count++;
558 }
559 if (RAW == ';') {
560 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
561 ctxt->nbChars ++;
562 ctxt->input->cur++;
563 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000564 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000565 SKIP(2);
566 GROW;
567 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000568 if (count++ > 20) {
569 count = 0;
570 GROW;
571 }
572 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000573 val = val * 10 + (CUR - '0');
574 else {
575 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
577 ctxt->sax->error(ctxt->userData,
578 "xmlParseCharRef: invalid decimal value\n");
579 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000580 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000581 val = 0;
582 break;
583 }
584 NEXT;
585 count++;
586 }
587 if (RAW == ';') {
588 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
589 ctxt->nbChars ++;
590 ctxt->input->cur++;
591 }
592 } else {
593 ctxt->errNo = XML_ERR_INVALID_CHARREF;
594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
595 ctxt->sax->error(ctxt->userData,
596 "xmlParseCharRef: invalid value\n");
597 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000599 }
600
601 /*
602 * [ WFC: Legal Character ]
603 * Characters referred to using character references must match the
604 * production for Char.
605 */
606 if (IS_CHAR(val)) {
607 return(val);
608 } else {
609 ctxt->errNo = XML_ERR_INVALID_CHAR;
610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000611 ctxt->sax->error(ctxt->userData,
612 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000613 val);
614 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000615 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000616 }
617 return(0);
618}
619
620/**
621 * xmlParseStringCharRef:
622 * @ctxt: an XML parser context
623 * @str: a pointer to an index in the string
624 *
625 * parse Reference declarations, variant parsing from a string rather
626 * than an an input flow.
627 *
628 * [66] CharRef ::= '&#' [0-9]+ ';' |
629 * '&#x' [0-9a-fA-F]+ ';'
630 *
631 * [ WFC: Legal Character ]
632 * Characters referred to using character references must match the
633 * production for Char.
634 *
635 * Returns the value parsed (as an int), 0 in case of error, str will be
636 * updated to the current value of the index
637 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000638static int
Owen Taylor3473f882001-02-23 17:55:21 +0000639xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
640 const xmlChar *ptr;
641 xmlChar cur;
642 int val = 0;
643
644 if ((str == NULL) || (*str == NULL)) return(0);
645 ptr = *str;
646 cur = *ptr;
647 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
648 ptr += 3;
649 cur = *ptr;
650 while (cur != ';') { /* Non input consuming loop */
651 if ((cur >= '0') && (cur <= '9'))
652 val = val * 16 + (cur - '0');
653 else if ((cur >= 'a') && (cur <= 'f'))
654 val = val * 16 + (cur - 'a') + 10;
655 else if ((cur >= 'A') && (cur <= 'F'))
656 val = val * 16 + (cur - 'A') + 10;
657 else {
658 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
660 ctxt->sax->error(ctxt->userData,
661 "xmlParseStringCharRef: invalid hexadecimal value\n");
662 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000663 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000664 val = 0;
665 break;
666 }
667 ptr++;
668 cur = *ptr;
669 }
670 if (cur == ';')
671 ptr++;
672 } else if ((cur == '&') && (ptr[1] == '#')){
673 ptr += 2;
674 cur = *ptr;
675 while (cur != ';') { /* Non input consuming loops */
676 if ((cur >= '0') && (cur <= '9'))
677 val = val * 10 + (cur - '0');
678 else {
679 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
681 ctxt->sax->error(ctxt->userData,
682 "xmlParseStringCharRef: invalid decimal value\n");
683 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000684 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000685 val = 0;
686 break;
687 }
688 ptr++;
689 cur = *ptr;
690 }
691 if (cur == ';')
692 ptr++;
693 } else {
694 ctxt->errNo = XML_ERR_INVALID_CHARREF;
695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
696 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000697 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000698 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000699 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000700 return(0);
701 }
702 *str = ptr;
703
704 /*
705 * [ WFC: Legal Character ]
706 * Characters referred to using character references must match the
707 * production for Char.
708 */
709 if (IS_CHAR(val)) {
710 return(val);
711 } else {
712 ctxt->errNo = XML_ERR_INVALID_CHAR;
713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
714 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000715 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000716 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000717 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000718 }
719 return(0);
720}
721
722/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000723 * xmlNewBlanksWrapperInputStream:
724 * @ctxt: an XML parser context
725 * @entity: an Entity pointer
726 *
727 * Create a new input stream for wrapping
728 * blanks around a PEReference
729 *
730 * Returns the new input stream or NULL
731 */
732
733static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
734
Daniel Veillardf4862f02002-09-10 11:13:43 +0000735static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000736xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
737 xmlParserInputPtr input;
738 xmlChar *buffer;
739 size_t length;
740 if (entity == NULL) {
741 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
743 ctxt->sax->error(ctxt->userData,
744 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
745 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
746 return(NULL);
747 }
748 if (xmlParserDebugEntities)
749 xmlGenericError(xmlGenericErrorContext,
750 "new blanks wrapper for entity: %s\n", entity->name);
751 input = xmlNewInputStream(ctxt);
752 if (input == NULL) {
753 return(NULL);
754 }
755 length = xmlStrlen(entity->name) + 5;
756 buffer = xmlMalloc(length);
757 if (buffer == NULL) {
758 return(NULL);
759 }
760 buffer [0] = ' ';
761 buffer [1] = '%';
762 buffer [length-3] = ';';
763 buffer [length-2] = ' ';
764 buffer [length-1] = 0;
765 memcpy(buffer + 2, entity->name, length - 5);
766 input->free = deallocblankswrapper;
767 input->base = buffer;
768 input->cur = buffer;
769 input->length = length;
770 input->end = &buffer[length];
771 return(input);
772}
773
774/**
Owen Taylor3473f882001-02-23 17:55:21 +0000775 * xmlParserHandlePEReference:
776 * @ctxt: the parser context
777 *
778 * [69] PEReference ::= '%' Name ';'
779 *
780 * [ WFC: No Recursion ]
781 * A parsed entity must not contain a recursive
782 * reference to itself, either directly or indirectly.
783 *
784 * [ WFC: Entity Declared ]
785 * In a document without any DTD, a document with only an internal DTD
786 * subset which contains no parameter entity references, or a document
787 * with "standalone='yes'", ... ... The declaration of a parameter
788 * entity must precede any reference to it...
789 *
790 * [ VC: Entity Declared ]
791 * In a document with an external subset or external parameter entities
792 * with "standalone='no'", ... ... The declaration of a parameter entity
793 * must precede any reference to it...
794 *
795 * [ WFC: In DTD ]
796 * Parameter-entity references may only appear in the DTD.
797 * NOTE: misleading but this is handled.
798 *
799 * A PEReference may have been detected in the current input stream
800 * the handling is done accordingly to
801 * http://www.w3.org/TR/REC-xml#entproc
802 * i.e.
803 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000804 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000805 */
806void
807xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
808 xmlChar *name;
809 xmlEntityPtr entity = NULL;
810 xmlParserInputPtr input;
811
Owen Taylor3473f882001-02-23 17:55:21 +0000812 if (RAW != '%') return;
813 switch(ctxt->instate) {
814 case XML_PARSER_CDATA_SECTION:
815 return;
816 case XML_PARSER_COMMENT:
817 return;
818 case XML_PARSER_START_TAG:
819 return;
820 case XML_PARSER_END_TAG:
821 return;
822 case XML_PARSER_EOF:
823 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
825 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
826 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000827 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000828 return;
829 case XML_PARSER_PROLOG:
830 case XML_PARSER_START:
831 case XML_PARSER_MISC:
832 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
834 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
835 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000836 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000837 return;
838 case XML_PARSER_ENTITY_DECL:
839 case XML_PARSER_CONTENT:
840 case XML_PARSER_ATTRIBUTE_VALUE:
841 case XML_PARSER_PI:
842 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000843 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000844 /* we just ignore it there */
845 return;
846 case XML_PARSER_EPILOG:
847 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
849 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
850 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000851 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000852 return;
853 case XML_PARSER_ENTITY_VALUE:
854 /*
855 * NOTE: in the case of entity values, we don't do the
856 * substitution here since we need the literal
857 * entity value to be able to save the internal
858 * subset of the document.
859 * This will be handled by xmlStringDecodeEntities
860 */
861 return;
862 case XML_PARSER_DTD:
863 /*
864 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
865 * In the internal DTD subset, parameter-entity references
866 * can occur only where markup declarations can occur, not
867 * within markup declarations.
868 * In that case this is handled in xmlParseMarkupDecl
869 */
870 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
871 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000872 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
873 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000874 break;
875 case XML_PARSER_IGNORE:
876 return;
877 }
878
879 NEXT;
880 name = xmlParseName(ctxt);
881 if (xmlParserDebugEntities)
882 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000883 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000884 if (name == NULL) {
885 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000887 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000888 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000889 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000890 } else {
891 if (RAW == ';') {
892 NEXT;
893 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
894 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
895 if (entity == NULL) {
896
897 /*
898 * [ WFC: Entity Declared ]
899 * In a document without any DTD, a document with only an
900 * internal DTD subset which contains no parameter entity
901 * references, or a document with "standalone='yes'", ...
902 * ... The declaration of a parameter entity must precede
903 * any reference to it...
904 */
905 if ((ctxt->standalone == 1) ||
906 ((ctxt->hasExternalSubset == 0) &&
907 (ctxt->hasPErefs == 0))) {
908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
909 ctxt->sax->error(ctxt->userData,
910 "PEReference: %%%s; not found\n", name);
911 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000912 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000913 } else {
914 /*
915 * [ VC: Entity Declared ]
916 * In a document with an external subset or external
917 * parameter entities with "standalone='no'", ...
918 * ... The declaration of a parameter entity must precede
919 * any reference to it...
920 */
921 if ((!ctxt->disableSAX) &&
922 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
923 ctxt->vctxt.error(ctxt->vctxt.userData,
924 "PEReference: %%%s; not found\n", name);
925 } else if ((!ctxt->disableSAX) &&
926 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
927 ctxt->sax->warning(ctxt->userData,
928 "PEReference: %%%s; not found\n", name);
929 ctxt->valid = 0;
930 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000931 } else if (ctxt->input->free != deallocblankswrapper) {
932 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
933 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000934 } else {
935 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
936 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000937 xmlChar start[4];
938 xmlCharEncoding enc;
939
Owen Taylor3473f882001-02-23 17:55:21 +0000940 /*
941 * handle the extra spaces added before and after
942 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000943 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000944 */
945 input = xmlNewEntityInputStream(ctxt, entity);
946 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000947
948 /*
949 * Get the 4 first bytes and decode the charset
950 * if enc != XML_CHAR_ENCODING_NONE
951 * plug some encoding conversion routines.
952 */
953 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000954 if (entity->length >= 4) {
955 start[0] = RAW;
956 start[1] = NXT(1);
957 start[2] = NXT(2);
958 start[3] = NXT(3);
959 enc = xmlDetectCharEncoding(start, 4);
960 if (enc != XML_CHAR_ENCODING_NONE) {
961 xmlSwitchEncoding(ctxt, enc);
962 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000963 }
964
Owen Taylor3473f882001-02-23 17:55:21 +0000965 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
966 (RAW == '<') && (NXT(1) == '?') &&
967 (NXT(2) == 'x') && (NXT(3) == 'm') &&
968 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
969 xmlParseTextDecl(ctxt);
970 }
Owen Taylor3473f882001-02-23 17:55:21 +0000971 } else {
972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
973 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000974 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000975 name);
976 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000977 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000978 }
979 }
980 } else {
981 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
983 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000984 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000985 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000986 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000987 }
988 xmlFree(name);
989 }
990}
991
992/*
993 * Macro used to grow the current buffer.
994 */
995#define growBuffer(buffer) { \
996 buffer##_size *= 2; \
997 buffer = (xmlChar *) \
998 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
999 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001000 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001001 return(NULL); \
1002 } \
1003}
1004
1005/**
1006 * xmlStringDecodeEntities:
1007 * @ctxt: the parser context
1008 * @str: the input string
1009 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1010 * @end: an end marker xmlChar, 0 if none
1011 * @end2: an end marker xmlChar, 0 if none
1012 * @end3: an end marker xmlChar, 0 if none
1013 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001014 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001015 *
1016 * [67] Reference ::= EntityRef | CharRef
1017 *
1018 * [69] PEReference ::= '%' Name ';'
1019 *
1020 * Returns A newly allocated string with the substitution done. The caller
1021 * must deallocate it !
1022 */
1023xmlChar *
1024xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1025 xmlChar end, xmlChar end2, xmlChar end3) {
1026 xmlChar *buffer = NULL;
1027 int buffer_size = 0;
1028
1029 xmlChar *current = NULL;
1030 xmlEntityPtr ent;
1031 int c,l;
1032 int nbchars = 0;
1033
1034 if (str == NULL)
1035 return(NULL);
1036
1037 if (ctxt->depth > 40) {
1038 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1040 ctxt->sax->error(ctxt->userData,
1041 "Detected entity reference loop\n");
1042 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001043 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001044 return(NULL);
1045 }
1046
1047 /*
1048 * allocate a translation buffer.
1049 */
1050 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1051 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1052 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001053 xmlGenericError(xmlGenericErrorContext,
1054 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001055 return(NULL);
1056 }
1057
1058 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001059 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001060 * we are operating on already parsed values.
1061 */
1062 c = CUR_SCHAR(str, l);
1063 while ((c != 0) && (c != end) && /* non input consuming loop */
1064 (c != end2) && (c != end3)) {
1065
1066 if (c == 0) break;
1067 if ((c == '&') && (str[1] == '#')) {
1068 int val = xmlParseStringCharRef(ctxt, &str);
1069 if (val != 0) {
1070 COPY_BUF(0,buffer,nbchars,val);
1071 }
1072 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1073 if (xmlParserDebugEntities)
1074 xmlGenericError(xmlGenericErrorContext,
1075 "String decoding Entity Reference: %.30s\n",
1076 str);
1077 ent = xmlParseStringEntityRef(ctxt, &str);
1078 if ((ent != NULL) &&
1079 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1080 if (ent->content != NULL) {
1081 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1082 } else {
1083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1084 ctxt->sax->error(ctxt->userData,
1085 "internal error entity has no content\n");
1086 }
1087 } else if ((ent != NULL) && (ent->content != NULL)) {
1088 xmlChar *rep;
1089
1090 ctxt->depth++;
1091 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1092 0, 0, 0);
1093 ctxt->depth--;
1094 if (rep != NULL) {
1095 current = rep;
1096 while (*current != 0) { /* non input consuming loop */
1097 buffer[nbchars++] = *current++;
1098 if (nbchars >
1099 buffer_size - XML_PARSER_BUFFER_SIZE) {
1100 growBuffer(buffer);
1101 }
1102 }
1103 xmlFree(rep);
1104 }
1105 } else if (ent != NULL) {
1106 int i = xmlStrlen(ent->name);
1107 const xmlChar *cur = ent->name;
1108
1109 buffer[nbchars++] = '&';
1110 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1111 growBuffer(buffer);
1112 }
1113 for (;i > 0;i--)
1114 buffer[nbchars++] = *cur++;
1115 buffer[nbchars++] = ';';
1116 }
1117 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1118 if (xmlParserDebugEntities)
1119 xmlGenericError(xmlGenericErrorContext,
1120 "String decoding PE Reference: %.30s\n", str);
1121 ent = xmlParseStringPEReference(ctxt, &str);
1122 if (ent != NULL) {
1123 xmlChar *rep;
1124
1125 ctxt->depth++;
1126 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1127 0, 0, 0);
1128 ctxt->depth--;
1129 if (rep != NULL) {
1130 current = rep;
1131 while (*current != 0) { /* non input consuming loop */
1132 buffer[nbchars++] = *current++;
1133 if (nbchars >
1134 buffer_size - XML_PARSER_BUFFER_SIZE) {
1135 growBuffer(buffer);
1136 }
1137 }
1138 xmlFree(rep);
1139 }
1140 }
1141 } else {
1142 COPY_BUF(l,buffer,nbchars,c);
1143 str += l;
1144 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1145 growBuffer(buffer);
1146 }
1147 }
1148 c = CUR_SCHAR(str, l);
1149 }
1150 buffer[nbchars++] = 0;
1151 return(buffer);
1152}
1153
1154
1155/************************************************************************
1156 * *
1157 * Commodity functions to handle xmlChars *
1158 * *
1159 ************************************************************************/
1160
1161/**
1162 * xmlStrndup:
1163 * @cur: the input xmlChar *
1164 * @len: the len of @cur
1165 *
1166 * a strndup for array of xmlChar's
1167 *
1168 * Returns a new xmlChar * or NULL
1169 */
1170xmlChar *
1171xmlStrndup(const xmlChar *cur, int len) {
1172 xmlChar *ret;
1173
1174 if ((cur == NULL) || (len < 0)) return(NULL);
1175 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1176 if (ret == NULL) {
1177 xmlGenericError(xmlGenericErrorContext,
1178 "malloc of %ld byte failed\n",
1179 (len + 1) * (long)sizeof(xmlChar));
1180 return(NULL);
1181 }
1182 memcpy(ret, cur, len * sizeof(xmlChar));
1183 ret[len] = 0;
1184 return(ret);
1185}
1186
1187/**
1188 * xmlStrdup:
1189 * @cur: the input xmlChar *
1190 *
1191 * a strdup for array of xmlChar's. Since they are supposed to be
1192 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1193 * a termination mark of '0'.
1194 *
1195 * Returns a new xmlChar * or NULL
1196 */
1197xmlChar *
1198xmlStrdup(const xmlChar *cur) {
1199 const xmlChar *p = cur;
1200
1201 if (cur == NULL) return(NULL);
1202 while (*p != 0) p++; /* non input consuming */
1203 return(xmlStrndup(cur, p - cur));
1204}
1205
1206/**
1207 * xmlCharStrndup:
1208 * @cur: the input char *
1209 * @len: the len of @cur
1210 *
1211 * a strndup for char's to xmlChar's
1212 *
1213 * Returns a new xmlChar * or NULL
1214 */
1215
1216xmlChar *
1217xmlCharStrndup(const char *cur, int len) {
1218 int i;
1219 xmlChar *ret;
1220
1221 if ((cur == NULL) || (len < 0)) return(NULL);
1222 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1223 if (ret == NULL) {
1224 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1225 (len + 1) * (long)sizeof(xmlChar));
1226 return(NULL);
1227 }
1228 for (i = 0;i < len;i++)
1229 ret[i] = (xmlChar) cur[i];
1230 ret[len] = 0;
1231 return(ret);
1232}
1233
1234/**
1235 * xmlCharStrdup:
1236 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001237 *
1238 * a strdup for char's to xmlChar's
1239 *
1240 * Returns a new xmlChar * or NULL
1241 */
1242
1243xmlChar *
1244xmlCharStrdup(const char *cur) {
1245 const char *p = cur;
1246
1247 if (cur == NULL) return(NULL);
1248 while (*p != '\0') p++; /* non input consuming */
1249 return(xmlCharStrndup(cur, p - cur));
1250}
1251
1252/**
1253 * xmlStrcmp:
1254 * @str1: the first xmlChar *
1255 * @str2: the second xmlChar *
1256 *
1257 * a strcmp for xmlChar's
1258 *
1259 * Returns the integer result of the comparison
1260 */
1261
1262int
1263xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1264 register int tmp;
1265
1266 if (str1 == str2) return(0);
1267 if (str1 == NULL) return(-1);
1268 if (str2 == NULL) return(1);
1269 do {
1270 tmp = *str1++ - *str2;
1271 if (tmp != 0) return(tmp);
1272 } while (*str2++ != 0);
1273 return 0;
1274}
1275
1276/**
1277 * xmlStrEqual:
1278 * @str1: the first xmlChar *
1279 * @str2: the second xmlChar *
1280 *
1281 * Check if both string are equal of have same content
1282 * Should be a bit more readable and faster than xmlStrEqual()
1283 *
1284 * Returns 1 if they are equal, 0 if they are different
1285 */
1286
1287int
1288xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1289 if (str1 == str2) return(1);
1290 if (str1 == NULL) return(0);
1291 if (str2 == NULL) return(0);
1292 do {
1293 if (*str1++ != *str2) return(0);
1294 } while (*str2++);
1295 return(1);
1296}
1297
1298/**
1299 * xmlStrncmp:
1300 * @str1: the first xmlChar *
1301 * @str2: the second xmlChar *
1302 * @len: the max comparison length
1303 *
1304 * a strncmp for xmlChar's
1305 *
1306 * Returns the integer result of the comparison
1307 */
1308
1309int
1310xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1311 register int tmp;
1312
1313 if (len <= 0) return(0);
1314 if (str1 == str2) return(0);
1315 if (str1 == NULL) return(-1);
1316 if (str2 == NULL) return(1);
1317 do {
1318 tmp = *str1++ - *str2;
1319 if (tmp != 0 || --len == 0) return(tmp);
1320 } while (*str2++ != 0);
1321 return 0;
1322}
1323
Daniel Veillardb44025c2001-10-11 22:55:55 +00001324static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001325 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1326 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1327 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1328 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1329 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1330 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1331 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1332 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1333 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1334 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1335 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1336 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1337 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1338 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1339 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1340 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1341 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1342 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1343 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1344 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1345 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1346 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1347 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1348 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1349 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1350 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1351 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1352 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1353 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1354 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1355 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1356 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1357};
1358
1359/**
1360 * xmlStrcasecmp:
1361 * @str1: the first xmlChar *
1362 * @str2: the second xmlChar *
1363 *
1364 * a strcasecmp for xmlChar's
1365 *
1366 * Returns the integer result of the comparison
1367 */
1368
1369int
1370xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1371 register int tmp;
1372
1373 if (str1 == str2) return(0);
1374 if (str1 == NULL) return(-1);
1375 if (str2 == NULL) return(1);
1376 do {
1377 tmp = casemap[*str1++] - casemap[*str2];
1378 if (tmp != 0) return(tmp);
1379 } while (*str2++ != 0);
1380 return 0;
1381}
1382
1383/**
1384 * xmlStrncasecmp:
1385 * @str1: the first xmlChar *
1386 * @str2: the second xmlChar *
1387 * @len: the max comparison length
1388 *
1389 * a strncasecmp for xmlChar's
1390 *
1391 * Returns the integer result of the comparison
1392 */
1393
1394int
1395xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1396 register int tmp;
1397
1398 if (len <= 0) return(0);
1399 if (str1 == str2) return(0);
1400 if (str1 == NULL) return(-1);
1401 if (str2 == NULL) return(1);
1402 do {
1403 tmp = casemap[*str1++] - casemap[*str2];
1404 if (tmp != 0 || --len == 0) return(tmp);
1405 } while (*str2++ != 0);
1406 return 0;
1407}
1408
1409/**
1410 * xmlStrchr:
1411 * @str: the xmlChar * array
1412 * @val: the xmlChar to search
1413 *
1414 * a strchr for xmlChar's
1415 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001416 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001417 */
1418
1419const xmlChar *
1420xmlStrchr(const xmlChar *str, xmlChar val) {
1421 if (str == NULL) return(NULL);
1422 while (*str != 0) { /* non input consuming */
1423 if (*str == val) return((xmlChar *) str);
1424 str++;
1425 }
1426 return(NULL);
1427}
1428
1429/**
1430 * xmlStrstr:
1431 * @str: the xmlChar * array (haystack)
1432 * @val: the xmlChar to search (needle)
1433 *
1434 * a strstr for xmlChar's
1435 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001436 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001437 */
1438
1439const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001440xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001441 int n;
1442
1443 if (str == NULL) return(NULL);
1444 if (val == NULL) return(NULL);
1445 n = xmlStrlen(val);
1446
1447 if (n == 0) return(str);
1448 while (*str != 0) { /* non input consuming */
1449 if (*str == *val) {
1450 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1451 }
1452 str++;
1453 }
1454 return(NULL);
1455}
1456
1457/**
1458 * xmlStrcasestr:
1459 * @str: the xmlChar * array (haystack)
1460 * @val: the xmlChar to search (needle)
1461 *
1462 * a case-ignoring strstr for xmlChar's
1463 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001464 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001465 */
1466
1467const xmlChar *
1468xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1469 int n;
1470
1471 if (str == NULL) return(NULL);
1472 if (val == NULL) return(NULL);
1473 n = xmlStrlen(val);
1474
1475 if (n == 0) return(str);
1476 while (*str != 0) { /* non input consuming */
1477 if (casemap[*str] == casemap[*val])
1478 if (!xmlStrncasecmp(str, val, n)) return(str);
1479 str++;
1480 }
1481 return(NULL);
1482}
1483
1484/**
1485 * xmlStrsub:
1486 * @str: the xmlChar * array (haystack)
1487 * @start: the index of the first char (zero based)
1488 * @len: the length of the substring
1489 *
1490 * Extract a substring of a given string
1491 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001492 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001493 */
1494
1495xmlChar *
1496xmlStrsub(const xmlChar *str, int start, int len) {
1497 int i;
1498
1499 if (str == NULL) return(NULL);
1500 if (start < 0) return(NULL);
1501 if (len < 0) return(NULL);
1502
1503 for (i = 0;i < start;i++) {
1504 if (*str == 0) return(NULL);
1505 str++;
1506 }
1507 if (*str == 0) return(NULL);
1508 return(xmlStrndup(str, len));
1509}
1510
1511/**
1512 * xmlStrlen:
1513 * @str: the xmlChar * array
1514 *
1515 * length of a xmlChar's string
1516 *
1517 * Returns the number of xmlChar contained in the ARRAY.
1518 */
1519
1520int
1521xmlStrlen(const xmlChar *str) {
1522 int len = 0;
1523
1524 if (str == NULL) return(0);
1525 while (*str != 0) { /* non input consuming */
1526 str++;
1527 len++;
1528 }
1529 return(len);
1530}
1531
1532/**
1533 * xmlStrncat:
1534 * @cur: the original xmlChar * array
1535 * @add: the xmlChar * array added
1536 * @len: the length of @add
1537 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001538 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001539 * first bytes of @add.
1540 *
1541 * Returns a new xmlChar *, the original @cur is reallocated if needed
1542 * and should not be freed
1543 */
1544
1545xmlChar *
1546xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1547 int size;
1548 xmlChar *ret;
1549
1550 if ((add == NULL) || (len == 0))
1551 return(cur);
1552 if (cur == NULL)
1553 return(xmlStrndup(add, len));
1554
1555 size = xmlStrlen(cur);
1556 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1557 if (ret == NULL) {
1558 xmlGenericError(xmlGenericErrorContext,
1559 "xmlStrncat: realloc of %ld byte failed\n",
1560 (size + len + 1) * (long)sizeof(xmlChar));
1561 return(cur);
1562 }
1563 memcpy(&ret[size], add, len * sizeof(xmlChar));
1564 ret[size + len] = 0;
1565 return(ret);
1566}
1567
1568/**
1569 * xmlStrcat:
1570 * @cur: the original xmlChar * array
1571 * @add: the xmlChar * array added
1572 *
1573 * a strcat for array of xmlChar's. Since they are supposed to be
1574 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1575 * a termination mark of '0'.
1576 *
1577 * Returns a new xmlChar * containing the concatenated string.
1578 */
1579xmlChar *
1580xmlStrcat(xmlChar *cur, const xmlChar *add) {
1581 const xmlChar *p = add;
1582
1583 if (add == NULL) return(cur);
1584 if (cur == NULL)
1585 return(xmlStrdup(add));
1586
1587 while (*p != 0) p++; /* non input consuming */
1588 return(xmlStrncat(cur, add, p - add));
1589}
1590
1591/************************************************************************
1592 * *
1593 * Commodity functions, cleanup needed ? *
1594 * *
1595 ************************************************************************/
1596
1597/**
1598 * areBlanks:
1599 * @ctxt: an XML parser context
1600 * @str: a xmlChar *
1601 * @len: the size of @str
1602 *
1603 * Is this a sequence of blank chars that one can ignore ?
1604 *
1605 * Returns 1 if ignorable 0 otherwise.
1606 */
1607
1608static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1609 int i, ret;
1610 xmlNodePtr lastChild;
1611
Daniel Veillard05c13a22001-09-09 08:38:09 +00001612 /*
1613 * Don't spend time trying to differentiate them, the same callback is
1614 * used !
1615 */
1616 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001617 return(0);
1618
Owen Taylor3473f882001-02-23 17:55:21 +00001619 /*
1620 * Check for xml:space value.
1621 */
1622 if (*(ctxt->space) == 1)
1623 return(0);
1624
1625 /*
1626 * Check that the string is made of blanks
1627 */
1628 for (i = 0;i < len;i++)
1629 if (!(IS_BLANK(str[i]))) return(0);
1630
1631 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001632 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001633 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001634 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001635 if (ctxt->myDoc != NULL) {
1636 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1637 if (ret == 0) return(1);
1638 if (ret == 1) return(0);
1639 }
1640
1641 /*
1642 * Otherwise, heuristic :-\
1643 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001644 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001645 if ((ctxt->node->children == NULL) &&
1646 (RAW == '<') && (NXT(1) == '/')) return(0);
1647
1648 lastChild = xmlGetLastChild(ctxt->node);
1649 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001650 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1651 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001652 } else if (xmlNodeIsText(lastChild))
1653 return(0);
1654 else if ((ctxt->node->children != NULL) &&
1655 (xmlNodeIsText(ctxt->node->children)))
1656 return(0);
1657 return(1);
1658}
1659
Owen Taylor3473f882001-02-23 17:55:21 +00001660/************************************************************************
1661 * *
1662 * Extra stuff for namespace support *
1663 * Relates to http://www.w3.org/TR/WD-xml-names *
1664 * *
1665 ************************************************************************/
1666
1667/**
1668 * xmlSplitQName:
1669 * @ctxt: an XML parser context
1670 * @name: an XML parser context
1671 * @prefix: a xmlChar **
1672 *
1673 * parse an UTF8 encoded XML qualified name string
1674 *
1675 * [NS 5] QName ::= (Prefix ':')? LocalPart
1676 *
1677 * [NS 6] Prefix ::= NCName
1678 *
1679 * [NS 7] LocalPart ::= NCName
1680 *
1681 * Returns the local part, and prefix is updated
1682 * to get the Prefix if any.
1683 */
1684
1685xmlChar *
1686xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1687 xmlChar buf[XML_MAX_NAMELEN + 5];
1688 xmlChar *buffer = NULL;
1689 int len = 0;
1690 int max = XML_MAX_NAMELEN;
1691 xmlChar *ret = NULL;
1692 const xmlChar *cur = name;
1693 int c;
1694
1695 *prefix = NULL;
1696
1697#ifndef XML_XML_NAMESPACE
1698 /* xml: prefix is not really a namespace */
1699 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1700 (cur[2] == 'l') && (cur[3] == ':'))
1701 return(xmlStrdup(name));
1702#endif
1703
1704 /* nasty but valid */
1705 if (cur[0] == ':')
1706 return(xmlStrdup(name));
1707
1708 c = *cur++;
1709 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1710 buf[len++] = c;
1711 c = *cur++;
1712 }
1713 if (len >= max) {
1714 /*
1715 * Okay someone managed to make a huge name, so he's ready to pay
1716 * for the processing speed.
1717 */
1718 max = len * 2;
1719
1720 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1721 if (buffer == NULL) {
1722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1723 ctxt->sax->error(ctxt->userData,
1724 "xmlSplitQName: out of memory\n");
1725 return(NULL);
1726 }
1727 memcpy(buffer, buf, len);
1728 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1729 if (len + 10 > max) {
1730 max *= 2;
1731 buffer = (xmlChar *) xmlRealloc(buffer,
1732 max * sizeof(xmlChar));
1733 if (buffer == NULL) {
1734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1735 ctxt->sax->error(ctxt->userData,
1736 "xmlSplitQName: out of memory\n");
1737 return(NULL);
1738 }
1739 }
1740 buffer[len++] = c;
1741 c = *cur++;
1742 }
1743 buffer[len] = 0;
1744 }
1745
1746 if (buffer == NULL)
1747 ret = xmlStrndup(buf, len);
1748 else {
1749 ret = buffer;
1750 buffer = NULL;
1751 max = XML_MAX_NAMELEN;
1752 }
1753
1754
1755 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001756 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001757 if (c == 0) return(ret);
1758 *prefix = ret;
1759 len = 0;
1760
Daniel Veillardbb284f42002-10-16 18:02:47 +00001761 /*
1762 * Check that the first character is proper to start
1763 * a new name
1764 */
1765 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1766 ((c >= 0x41) && (c <= 0x5A)) ||
1767 (c == '_') || (c == ':'))) {
1768 int l;
1769 int first = CUR_SCHAR(cur, l);
1770
1771 if (!IS_LETTER(first) && (first != '_')) {
1772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1773 ctxt->sax->error(ctxt->userData,
1774 "Name %s is not XML Namespace compliant\n",
1775 name);
1776 }
1777 }
1778 cur++;
1779
Owen Taylor3473f882001-02-23 17:55:21 +00001780 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1781 buf[len++] = c;
1782 c = *cur++;
1783 }
1784 if (len >= max) {
1785 /*
1786 * Okay someone managed to make a huge name, so he's ready to pay
1787 * for the processing speed.
1788 */
1789 max = len * 2;
1790
1791 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1792 if (buffer == NULL) {
1793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1794 ctxt->sax->error(ctxt->userData,
1795 "xmlSplitQName: out of memory\n");
1796 return(NULL);
1797 }
1798 memcpy(buffer, buf, len);
1799 while (c != 0) { /* tested bigname2.xml */
1800 if (len + 10 > max) {
1801 max *= 2;
1802 buffer = (xmlChar *) xmlRealloc(buffer,
1803 max * sizeof(xmlChar));
1804 if (buffer == NULL) {
1805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1806 ctxt->sax->error(ctxt->userData,
1807 "xmlSplitQName: out of memory\n");
1808 return(NULL);
1809 }
1810 }
1811 buffer[len++] = c;
1812 c = *cur++;
1813 }
1814 buffer[len] = 0;
1815 }
1816
1817 if (buffer == NULL)
1818 ret = xmlStrndup(buf, len);
1819 else {
1820 ret = buffer;
1821 }
1822 }
1823
1824 return(ret);
1825}
1826
1827/************************************************************************
1828 * *
1829 * The parser itself *
1830 * Relates to http://www.w3.org/TR/REC-xml *
1831 * *
1832 ************************************************************************/
1833
Daniel Veillard76d66f42001-05-16 21:05:17 +00001834static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001835/**
1836 * xmlParseName:
1837 * @ctxt: an XML parser context
1838 *
1839 * parse an XML name.
1840 *
1841 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1842 * CombiningChar | Extender
1843 *
1844 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1845 *
1846 * [6] Names ::= Name (S Name)*
1847 *
1848 * Returns the Name parsed or NULL
1849 */
1850
1851xmlChar *
1852xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001853 const xmlChar *in;
1854 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001855 int count = 0;
1856
1857 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001858
1859 /*
1860 * Accelerator for simple ASCII names
1861 */
1862 in = ctxt->input->cur;
1863 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1864 ((*in >= 0x41) && (*in <= 0x5A)) ||
1865 (*in == '_') || (*in == ':')) {
1866 in++;
1867 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1868 ((*in >= 0x41) && (*in <= 0x5A)) ||
1869 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001870 (*in == '_') || (*in == '-') ||
1871 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001872 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001873 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001874 count = in - ctxt->input->cur;
1875 ret = xmlStrndup(ctxt->input->cur, count);
1876 ctxt->input->cur = in;
1877 return(ret);
1878 }
1879 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001880 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001881}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001882
Daniel Veillard46de64e2002-05-29 08:21:33 +00001883/**
1884 * xmlParseNameAndCompare:
1885 * @ctxt: an XML parser context
1886 *
1887 * parse an XML name and compares for match
1888 * (specialized for endtag parsing)
1889 *
1890 *
1891 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1892 * and the name for mismatch
1893 */
1894
Daniel Veillardf4862f02002-09-10 11:13:43 +00001895static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001896xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1897 const xmlChar *cmp = other;
1898 const xmlChar *in;
1899 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001900
1901 GROW;
1902
1903 in = ctxt->input->cur;
1904 while (*in != 0 && *in == *cmp) {
1905 ++in;
1906 ++cmp;
1907 }
1908 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1909 /* success */
1910 ctxt->input->cur = in;
1911 return (xmlChar*) 1;
1912 }
1913 /* failure (or end of input buffer), check with full function */
1914 ret = xmlParseName (ctxt);
1915 if (ret != 0 && xmlStrEqual (ret, other)) {
1916 xmlFree (ret);
1917 return (xmlChar*) 1;
1918 }
1919 return ret;
1920}
1921
Daniel Veillard76d66f42001-05-16 21:05:17 +00001922static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001923xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1924 xmlChar buf[XML_MAX_NAMELEN + 5];
1925 int len = 0, l;
1926 int c;
1927 int count = 0;
1928
1929 /*
1930 * Handler for more complex cases
1931 */
1932 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001933 c = CUR_CHAR(l);
1934 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1935 (!IS_LETTER(c) && (c != '_') &&
1936 (c != ':'))) {
1937 return(NULL);
1938 }
1939
1940 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1941 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1942 (c == '.') || (c == '-') ||
1943 (c == '_') || (c == ':') ||
1944 (IS_COMBINING(c)) ||
1945 (IS_EXTENDER(c)))) {
1946 if (count++ > 100) {
1947 count = 0;
1948 GROW;
1949 }
1950 COPY_BUF(l,buf,len,c);
1951 NEXTL(l);
1952 c = CUR_CHAR(l);
1953 if (len >= XML_MAX_NAMELEN) {
1954 /*
1955 * Okay someone managed to make a huge name, so he's ready to pay
1956 * for the processing speed.
1957 */
1958 xmlChar *buffer;
1959 int max = len * 2;
1960
1961 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1962 if (buffer == NULL) {
1963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1964 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001965 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001966 return(NULL);
1967 }
1968 memcpy(buffer, buf, len);
1969 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1970 (c == '.') || (c == '-') ||
1971 (c == '_') || (c == ':') ||
1972 (IS_COMBINING(c)) ||
1973 (IS_EXTENDER(c))) {
1974 if (count++ > 100) {
1975 count = 0;
1976 GROW;
1977 }
1978 if (len + 10 > max) {
1979 max *= 2;
1980 buffer = (xmlChar *) xmlRealloc(buffer,
1981 max * sizeof(xmlChar));
1982 if (buffer == NULL) {
1983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1984 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001985 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001986 return(NULL);
1987 }
1988 }
1989 COPY_BUF(l,buffer,len,c);
1990 NEXTL(l);
1991 c = CUR_CHAR(l);
1992 }
1993 buffer[len] = 0;
1994 return(buffer);
1995 }
1996 }
1997 return(xmlStrndup(buf, len));
1998}
1999
2000/**
2001 * xmlParseStringName:
2002 * @ctxt: an XML parser context
2003 * @str: a pointer to the string pointer (IN/OUT)
2004 *
2005 * parse an XML name.
2006 *
2007 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2008 * CombiningChar | Extender
2009 *
2010 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2011 *
2012 * [6] Names ::= Name (S Name)*
2013 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002014 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002015 * is updated to the current location in the string.
2016 */
2017
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002018static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002019xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2020 xmlChar buf[XML_MAX_NAMELEN + 5];
2021 const xmlChar *cur = *str;
2022 int len = 0, l;
2023 int c;
2024
2025 c = CUR_SCHAR(cur, l);
2026 if (!IS_LETTER(c) && (c != '_') &&
2027 (c != ':')) {
2028 return(NULL);
2029 }
2030
2031 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2032 (c == '.') || (c == '-') ||
2033 (c == '_') || (c == ':') ||
2034 (IS_COMBINING(c)) ||
2035 (IS_EXTENDER(c))) {
2036 COPY_BUF(l,buf,len,c);
2037 cur += l;
2038 c = CUR_SCHAR(cur, l);
2039 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2040 /*
2041 * Okay someone managed to make a huge name, so he's ready to pay
2042 * for the processing speed.
2043 */
2044 xmlChar *buffer;
2045 int max = len * 2;
2046
2047 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2048 if (buffer == NULL) {
2049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2050 ctxt->sax->error(ctxt->userData,
2051 "xmlParseStringName: out of memory\n");
2052 return(NULL);
2053 }
2054 memcpy(buffer, buf, len);
2055 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2056 (c == '.') || (c == '-') ||
2057 (c == '_') || (c == ':') ||
2058 (IS_COMBINING(c)) ||
2059 (IS_EXTENDER(c))) {
2060 if (len + 10 > max) {
2061 max *= 2;
2062 buffer = (xmlChar *) xmlRealloc(buffer,
2063 max * sizeof(xmlChar));
2064 if (buffer == NULL) {
2065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2066 ctxt->sax->error(ctxt->userData,
2067 "xmlParseStringName: out of memory\n");
2068 return(NULL);
2069 }
2070 }
2071 COPY_BUF(l,buffer,len,c);
2072 cur += l;
2073 c = CUR_SCHAR(cur, l);
2074 }
2075 buffer[len] = 0;
2076 *str = cur;
2077 return(buffer);
2078 }
2079 }
2080 *str = cur;
2081 return(xmlStrndup(buf, len));
2082}
2083
2084/**
2085 * xmlParseNmtoken:
2086 * @ctxt: an XML parser context
2087 *
2088 * parse an XML Nmtoken.
2089 *
2090 * [7] Nmtoken ::= (NameChar)+
2091 *
2092 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2093 *
2094 * Returns the Nmtoken parsed or NULL
2095 */
2096
2097xmlChar *
2098xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2099 xmlChar buf[XML_MAX_NAMELEN + 5];
2100 int len = 0, l;
2101 int c;
2102 int count = 0;
2103
2104 GROW;
2105 c = CUR_CHAR(l);
2106
2107 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2108 (c == '.') || (c == '-') ||
2109 (c == '_') || (c == ':') ||
2110 (IS_COMBINING(c)) ||
2111 (IS_EXTENDER(c))) {
2112 if (count++ > 100) {
2113 count = 0;
2114 GROW;
2115 }
2116 COPY_BUF(l,buf,len,c);
2117 NEXTL(l);
2118 c = CUR_CHAR(l);
2119 if (len >= XML_MAX_NAMELEN) {
2120 /*
2121 * Okay someone managed to make a huge token, so he's ready to pay
2122 * for the processing speed.
2123 */
2124 xmlChar *buffer;
2125 int max = len * 2;
2126
2127 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
2128 if (buffer == NULL) {
2129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2130 ctxt->sax->error(ctxt->userData,
2131 "xmlParseNmtoken: out of memory\n");
2132 return(NULL);
2133 }
2134 memcpy(buffer, buf, len);
2135 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2136 (c == '.') || (c == '-') ||
2137 (c == '_') || (c == ':') ||
2138 (IS_COMBINING(c)) ||
2139 (IS_EXTENDER(c))) {
2140 if (count++ > 100) {
2141 count = 0;
2142 GROW;
2143 }
2144 if (len + 10 > max) {
2145 max *= 2;
2146 buffer = (xmlChar *) xmlRealloc(buffer,
2147 max * sizeof(xmlChar));
2148 if (buffer == NULL) {
2149 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2150 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002151 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002152 return(NULL);
2153 }
2154 }
2155 COPY_BUF(l,buffer,len,c);
2156 NEXTL(l);
2157 c = CUR_CHAR(l);
2158 }
2159 buffer[len] = 0;
2160 return(buffer);
2161 }
2162 }
2163 if (len == 0)
2164 return(NULL);
2165 return(xmlStrndup(buf, len));
2166}
2167
2168/**
2169 * xmlParseEntityValue:
2170 * @ctxt: an XML parser context
2171 * @orig: if non-NULL store a copy of the original entity value
2172 *
2173 * parse a value for ENTITY declarations
2174 *
2175 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2176 * "'" ([^%&'] | PEReference | Reference)* "'"
2177 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002178 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002179 */
2180
2181xmlChar *
2182xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2183 xmlChar *buf = NULL;
2184 int len = 0;
2185 int size = XML_PARSER_BUFFER_SIZE;
2186 int c, l;
2187 xmlChar stop;
2188 xmlChar *ret = NULL;
2189 const xmlChar *cur = NULL;
2190 xmlParserInputPtr input;
2191
2192 if (RAW == '"') stop = '"';
2193 else if (RAW == '\'') stop = '\'';
2194 else {
2195 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2197 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2198 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002199 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002200 return(NULL);
2201 }
2202 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2203 if (buf == NULL) {
2204 xmlGenericError(xmlGenericErrorContext,
2205 "malloc of %d byte failed\n", size);
2206 return(NULL);
2207 }
2208
2209 /*
2210 * The content of the entity definition is copied in a buffer.
2211 */
2212
2213 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2214 input = ctxt->input;
2215 GROW;
2216 NEXT;
2217 c = CUR_CHAR(l);
2218 /*
2219 * NOTE: 4.4.5 Included in Literal
2220 * When a parameter entity reference appears in a literal entity
2221 * value, ... a single or double quote character in the replacement
2222 * text is always treated as a normal data character and will not
2223 * terminate the literal.
2224 * In practice it means we stop the loop only when back at parsing
2225 * the initial entity and the quote is found
2226 */
2227 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2228 (ctxt->input != input))) {
2229 if (len + 5 >= size) {
2230 size *= 2;
2231 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2232 if (buf == NULL) {
2233 xmlGenericError(xmlGenericErrorContext,
2234 "realloc of %d byte failed\n", size);
2235 return(NULL);
2236 }
2237 }
2238 COPY_BUF(l,buf,len,c);
2239 NEXTL(l);
2240 /*
2241 * Pop-up of finished entities.
2242 */
2243 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2244 xmlPopInput(ctxt);
2245
2246 GROW;
2247 c = CUR_CHAR(l);
2248 if (c == 0) {
2249 GROW;
2250 c = CUR_CHAR(l);
2251 }
2252 }
2253 buf[len] = 0;
2254
2255 /*
2256 * Raise problem w.r.t. '&' and '%' being used in non-entities
2257 * reference constructs. Note Charref will be handled in
2258 * xmlStringDecodeEntities()
2259 */
2260 cur = buf;
2261 while (*cur != 0) { /* non input consuming */
2262 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2263 xmlChar *name;
2264 xmlChar tmp = *cur;
2265
2266 cur++;
2267 name = xmlParseStringName(ctxt, &cur);
2268 if ((name == NULL) || (*cur != ';')) {
2269 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2271 ctxt->sax->error(ctxt->userData,
2272 "EntityValue: '%c' forbidden except for entities references\n",
2273 tmp);
2274 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002275 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002276 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002277 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2278 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002279 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2281 ctxt->sax->error(ctxt->userData,
2282 "EntityValue: PEReferences forbidden in internal subset\n",
2283 tmp);
2284 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002285 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002286 }
2287 if (name != NULL)
2288 xmlFree(name);
2289 }
2290 cur++;
2291 }
2292
2293 /*
2294 * Then PEReference entities are substituted.
2295 */
2296 if (c != stop) {
2297 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2298 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2299 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2300 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002301 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002302 xmlFree(buf);
2303 } else {
2304 NEXT;
2305 /*
2306 * NOTE: 4.4.7 Bypassed
2307 * When a general entity reference appears in the EntityValue in
2308 * an entity declaration, it is bypassed and left as is.
2309 * so XML_SUBSTITUTE_REF is not set here.
2310 */
2311 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2312 0, 0, 0);
2313 if (orig != NULL)
2314 *orig = buf;
2315 else
2316 xmlFree(buf);
2317 }
2318
2319 return(ret);
2320}
2321
2322/**
2323 * xmlParseAttValue:
2324 * @ctxt: an XML parser context
2325 *
2326 * parse a value for an attribute
2327 * Note: the parser won't do substitution of entities here, this
2328 * will be handled later in xmlStringGetNodeList
2329 *
2330 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2331 * "'" ([^<&'] | Reference)* "'"
2332 *
2333 * 3.3.3 Attribute-Value Normalization:
2334 * Before the value of an attribute is passed to the application or
2335 * checked for validity, the XML processor must normalize it as follows:
2336 * - a character reference is processed by appending the referenced
2337 * character to the attribute value
2338 * - an entity reference is processed by recursively processing the
2339 * replacement text of the entity
2340 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2341 * appending #x20 to the normalized value, except that only a single
2342 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2343 * parsed entity or the literal entity value of an internal parsed entity
2344 * - other characters are processed by appending them to the normalized value
2345 * If the declared value is not CDATA, then the XML processor must further
2346 * process the normalized attribute value by discarding any leading and
2347 * trailing space (#x20) characters, and by replacing sequences of space
2348 * (#x20) characters by a single space (#x20) character.
2349 * All attributes for which no declaration has been read should be treated
2350 * by a non-validating parser as if declared CDATA.
2351 *
2352 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2353 */
2354
2355xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002356xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2357
2358xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002359xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2360 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002361 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002362 xmlChar *ret = NULL;
2363 SHRINK;
2364 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002365 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002366 if (*in != '"' && *in != '\'') {
2367 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2369 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2370 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002371 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002372 return(NULL);
2373 }
2374 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2375 limit = *in;
2376 ++in;
2377
2378 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2379 *in != '&' && *in != '<'
2380 ) {
2381 ++in;
2382 }
2383 if (*in != limit) {
2384 return xmlParseAttValueComplex(ctxt);
2385 }
2386 ++in;
2387 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2388 CUR_PTR = in;
2389 return ret;
2390}
2391
Daniel Veillard01c13b52002-12-10 15:19:08 +00002392/**
2393 * xmlParseAttValueComplex:
2394 * @ctxt: an XML parser context
2395 *
2396 * parse a value for an attribute, this is the fallback function
2397 * of xmlParseAttValue() when the attribute parsing requires handling
2398 * of non-ASCII characters.
2399 *
2400 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2401 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002402xmlChar *
2403xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2404 xmlChar limit = 0;
2405 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002406 int len = 0;
2407 int buf_size = 0;
2408 int c, l;
2409 xmlChar *current = NULL;
2410 xmlEntityPtr ent;
2411
2412
2413 SHRINK;
2414 if (NXT(0) == '"') {
2415 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2416 limit = '"';
2417 NEXT;
2418 } else if (NXT(0) == '\'') {
2419 limit = '\'';
2420 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2421 NEXT;
2422 } else {
2423 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2424 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2425 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2426 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002427 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002428 return(NULL);
2429 }
2430
2431 /*
2432 * allocate a translation buffer.
2433 */
2434 buf_size = XML_PARSER_BUFFER_SIZE;
2435 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2436 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002437 xmlGenericError(xmlGenericErrorContext,
2438 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002439 return(NULL);
2440 }
2441
2442 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002443 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002444 */
2445 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002446 while ((NXT(0) != limit) && /* checked */
2447 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002448 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002449 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002450 if (NXT(1) == '#') {
2451 int val = xmlParseCharRef(ctxt);
2452 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002453 if (ctxt->replaceEntities) {
2454 if (len > buf_size - 10) {
2455 growBuffer(buf);
2456 }
2457 buf[len++] = '&';
2458 } else {
2459 /*
2460 * The reparsing will be done in xmlStringGetNodeList()
2461 * called by the attribute() function in SAX.c
2462 */
2463 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002464
Daniel Veillard319a7422001-09-11 09:27:09 +00002465 if (len > buf_size - 10) {
2466 growBuffer(buf);
2467 }
2468 current = &buffer[0];
2469 while (*current != 0) { /* non input consuming */
2470 buf[len++] = *current++;
2471 }
Owen Taylor3473f882001-02-23 17:55:21 +00002472 }
2473 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002474 if (len > buf_size - 10) {
2475 growBuffer(buf);
2476 }
Owen Taylor3473f882001-02-23 17:55:21 +00002477 len += xmlCopyChar(0, &buf[len], val);
2478 }
2479 } else {
2480 ent = xmlParseEntityRef(ctxt);
2481 if ((ent != NULL) &&
2482 (ctxt->replaceEntities != 0)) {
2483 xmlChar *rep;
2484
2485 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2486 rep = xmlStringDecodeEntities(ctxt, ent->content,
2487 XML_SUBSTITUTE_REF, 0, 0, 0);
2488 if (rep != NULL) {
2489 current = rep;
2490 while (*current != 0) { /* non input consuming */
2491 buf[len++] = *current++;
2492 if (len > buf_size - 10) {
2493 growBuffer(buf);
2494 }
2495 }
2496 xmlFree(rep);
2497 }
2498 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002499 if (len > buf_size - 10) {
2500 growBuffer(buf);
2501 }
Owen Taylor3473f882001-02-23 17:55:21 +00002502 if (ent->content != NULL)
2503 buf[len++] = ent->content[0];
2504 }
2505 } else if (ent != NULL) {
2506 int i = xmlStrlen(ent->name);
2507 const xmlChar *cur = ent->name;
2508
2509 /*
2510 * This may look absurd but is needed to detect
2511 * entities problems
2512 */
2513 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2514 (ent->content != NULL)) {
2515 xmlChar *rep;
2516 rep = xmlStringDecodeEntities(ctxt, ent->content,
2517 XML_SUBSTITUTE_REF, 0, 0, 0);
2518 if (rep != NULL)
2519 xmlFree(rep);
2520 }
2521
2522 /*
2523 * Just output the reference
2524 */
2525 buf[len++] = '&';
2526 if (len > buf_size - i - 10) {
2527 growBuffer(buf);
2528 }
2529 for (;i > 0;i--)
2530 buf[len++] = *cur++;
2531 buf[len++] = ';';
2532 }
2533 }
2534 } else {
2535 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2536 COPY_BUF(l,buf,len,0x20);
2537 if (len > buf_size - 10) {
2538 growBuffer(buf);
2539 }
2540 } else {
2541 COPY_BUF(l,buf,len,c);
2542 if (len > buf_size - 10) {
2543 growBuffer(buf);
2544 }
2545 }
2546 NEXTL(l);
2547 }
2548 GROW;
2549 c = CUR_CHAR(l);
2550 }
2551 buf[len++] = 0;
2552 if (RAW == '<') {
2553 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2555 ctxt->sax->error(ctxt->userData,
2556 "Unescaped '<' not allowed in attributes values\n");
2557 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002558 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002559 } else if (RAW != limit) {
2560 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2562 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2563 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002564 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002565 } else
2566 NEXT;
2567 return(buf);
2568}
2569
2570/**
2571 * xmlParseSystemLiteral:
2572 * @ctxt: an XML parser context
2573 *
2574 * parse an XML Literal
2575 *
2576 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2577 *
2578 * Returns the SystemLiteral parsed or NULL
2579 */
2580
2581xmlChar *
2582xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2583 xmlChar *buf = NULL;
2584 int len = 0;
2585 int size = XML_PARSER_BUFFER_SIZE;
2586 int cur, l;
2587 xmlChar stop;
2588 int state = ctxt->instate;
2589 int count = 0;
2590
2591 SHRINK;
2592 if (RAW == '"') {
2593 NEXT;
2594 stop = '"';
2595 } else if (RAW == '\'') {
2596 NEXT;
2597 stop = '\'';
2598 } else {
2599 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2601 ctxt->sax->error(ctxt->userData,
2602 "SystemLiteral \" or ' expected\n");
2603 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002604 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002605 return(NULL);
2606 }
2607
2608 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2609 if (buf == NULL) {
2610 xmlGenericError(xmlGenericErrorContext,
2611 "malloc of %d byte failed\n", size);
2612 return(NULL);
2613 }
2614 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2615 cur = CUR_CHAR(l);
2616 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2617 if (len + 5 >= size) {
2618 size *= 2;
2619 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2620 if (buf == NULL) {
2621 xmlGenericError(xmlGenericErrorContext,
2622 "realloc of %d byte failed\n", size);
2623 ctxt->instate = (xmlParserInputState) state;
2624 return(NULL);
2625 }
2626 }
2627 count++;
2628 if (count > 50) {
2629 GROW;
2630 count = 0;
2631 }
2632 COPY_BUF(l,buf,len,cur);
2633 NEXTL(l);
2634 cur = CUR_CHAR(l);
2635 if (cur == 0) {
2636 GROW;
2637 SHRINK;
2638 cur = CUR_CHAR(l);
2639 }
2640 }
2641 buf[len] = 0;
2642 ctxt->instate = (xmlParserInputState) state;
2643 if (!IS_CHAR(cur)) {
2644 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2646 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2647 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002648 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002649 } else {
2650 NEXT;
2651 }
2652 return(buf);
2653}
2654
2655/**
2656 * xmlParsePubidLiteral:
2657 * @ctxt: an XML parser context
2658 *
2659 * parse an XML public literal
2660 *
2661 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2662 *
2663 * Returns the PubidLiteral parsed or NULL.
2664 */
2665
2666xmlChar *
2667xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2668 xmlChar *buf = NULL;
2669 int len = 0;
2670 int size = XML_PARSER_BUFFER_SIZE;
2671 xmlChar cur;
2672 xmlChar stop;
2673 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002674 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002675
2676 SHRINK;
2677 if (RAW == '"') {
2678 NEXT;
2679 stop = '"';
2680 } else if (RAW == '\'') {
2681 NEXT;
2682 stop = '\'';
2683 } else {
2684 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2686 ctxt->sax->error(ctxt->userData,
2687 "SystemLiteral \" or ' expected\n");
2688 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002689 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002690 return(NULL);
2691 }
2692 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2693 if (buf == NULL) {
2694 xmlGenericError(xmlGenericErrorContext,
2695 "malloc of %d byte failed\n", size);
2696 return(NULL);
2697 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002698 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002699 cur = CUR;
2700 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2701 if (len + 1 >= size) {
2702 size *= 2;
2703 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2704 if (buf == NULL) {
2705 xmlGenericError(xmlGenericErrorContext,
2706 "realloc of %d byte failed\n", size);
2707 return(NULL);
2708 }
2709 }
2710 buf[len++] = cur;
2711 count++;
2712 if (count > 50) {
2713 GROW;
2714 count = 0;
2715 }
2716 NEXT;
2717 cur = CUR;
2718 if (cur == 0) {
2719 GROW;
2720 SHRINK;
2721 cur = CUR;
2722 }
2723 }
2724 buf[len] = 0;
2725 if (cur != stop) {
2726 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2728 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2729 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002730 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002731 } else {
2732 NEXT;
2733 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002734 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002735 return(buf);
2736}
2737
Daniel Veillard48b2f892001-02-25 16:11:03 +00002738void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002739/**
2740 * xmlParseCharData:
2741 * @ctxt: an XML parser context
2742 * @cdata: int indicating whether we are within a CDATA section
2743 *
2744 * parse a CharData section.
2745 * if we are within a CDATA section ']]>' marks an end of section.
2746 *
2747 * The right angle bracket (>) may be represented using the string "&gt;",
2748 * and must, for compatibility, be escaped using "&gt;" or a character
2749 * reference when it appears in the string "]]>" in content, when that
2750 * string is not marking the end of a CDATA section.
2751 *
2752 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2753 */
2754
2755void
2756xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002757 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002758 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002759 int line = ctxt->input->line;
2760 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002761
2762 SHRINK;
2763 GROW;
2764 /*
2765 * Accelerated common case where input don't need to be
2766 * modified before passing it to the handler.
2767 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002768 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002769 in = ctxt->input->cur;
2770 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002771get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002772 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2773 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002774 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002775 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002776 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002777 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002778 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002779 ctxt->input->line++;
2780 in++;
2781 }
2782 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002783 }
2784 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002785 if ((in[1] == ']') && (in[2] == '>')) {
2786 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2788 ctxt->sax->error(ctxt->userData,
2789 "Sequence ']]>' not allowed in content\n");
2790 ctxt->input->cur = in;
2791 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002792 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002793 return;
2794 }
2795 in++;
2796 goto get_more;
2797 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002798 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002799 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002800 if (IS_BLANK(*ctxt->input->cur)) {
2801 const xmlChar *tmp = ctxt->input->cur;
2802 ctxt->input->cur = in;
2803 if (areBlanks(ctxt, tmp, nbchar)) {
2804 if (ctxt->sax->ignorableWhitespace != NULL)
2805 ctxt->sax->ignorableWhitespace(ctxt->userData,
2806 tmp, nbchar);
2807 } else {
2808 if (ctxt->sax->characters != NULL)
2809 ctxt->sax->characters(ctxt->userData,
2810 tmp, nbchar);
2811 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002812 line = ctxt->input->line;
2813 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002814 } else {
2815 if (ctxt->sax->characters != NULL)
2816 ctxt->sax->characters(ctxt->userData,
2817 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002818 line = ctxt->input->line;
2819 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002820 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002821 }
2822 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002823 if (*in == 0xD) {
2824 in++;
2825 if (*in == 0xA) {
2826 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002827 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002828 ctxt->input->line++;
2829 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002830 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002831 in--;
2832 }
2833 if (*in == '<') {
2834 return;
2835 }
2836 if (*in == '&') {
2837 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002838 }
2839 SHRINK;
2840 GROW;
2841 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002842 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002843 nbchar = 0;
2844 }
Daniel Veillard50582112001-03-26 22:52:16 +00002845 ctxt->input->line = line;
2846 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002847 xmlParseCharDataComplex(ctxt, cdata);
2848}
2849
Daniel Veillard01c13b52002-12-10 15:19:08 +00002850/**
2851 * xmlParseCharDataComplex:
2852 * @ctxt: an XML parser context
2853 * @cdata: int indicating whether we are within a CDATA section
2854 *
2855 * parse a CharData section.this is the fallback function
2856 * of xmlParseCharData() when the parsing requires handling
2857 * of non-ASCII characters.
2858 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002859void
2860xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002861 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2862 int nbchar = 0;
2863 int cur, l;
2864 int count = 0;
2865
2866 SHRINK;
2867 GROW;
2868 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002869 while ((cur != '<') && /* checked */
2870 (cur != '&') &&
2871 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002872 if ((cur == ']') && (NXT(1) == ']') &&
2873 (NXT(2) == '>')) {
2874 if (cdata) break;
2875 else {
2876 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2878 ctxt->sax->error(ctxt->userData,
2879 "Sequence ']]>' not allowed in content\n");
2880 /* Should this be relaxed ??? I see a "must here */
2881 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002882 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002883 }
2884 }
2885 COPY_BUF(l,buf,nbchar,cur);
2886 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2887 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002888 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002889 */
2890 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2891 if (areBlanks(ctxt, buf, nbchar)) {
2892 if (ctxt->sax->ignorableWhitespace != NULL)
2893 ctxt->sax->ignorableWhitespace(ctxt->userData,
2894 buf, nbchar);
2895 } else {
2896 if (ctxt->sax->characters != NULL)
2897 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2898 }
2899 }
2900 nbchar = 0;
2901 }
2902 count++;
2903 if (count > 50) {
2904 GROW;
2905 count = 0;
2906 }
2907 NEXTL(l);
2908 cur = CUR_CHAR(l);
2909 }
2910 if (nbchar != 0) {
2911 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002912 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002913 */
2914 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2915 if (areBlanks(ctxt, buf, nbchar)) {
2916 if (ctxt->sax->ignorableWhitespace != NULL)
2917 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2918 } else {
2919 if (ctxt->sax->characters != NULL)
2920 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2921 }
2922 }
2923 }
2924}
2925
2926/**
2927 * xmlParseExternalID:
2928 * @ctxt: an XML parser context
2929 * @publicID: a xmlChar** receiving PubidLiteral
2930 * @strict: indicate whether we should restrict parsing to only
2931 * production [75], see NOTE below
2932 *
2933 * Parse an External ID or a Public ID
2934 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002935 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002936 * 'PUBLIC' S PubidLiteral S SystemLiteral
2937 *
2938 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2939 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2940 *
2941 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2942 *
2943 * Returns the function returns SystemLiteral and in the second
2944 * case publicID receives PubidLiteral, is strict is off
2945 * it is possible to return NULL and have publicID set.
2946 */
2947
2948xmlChar *
2949xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2950 xmlChar *URI = NULL;
2951
2952 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002953
2954 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002955 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2956 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2957 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2958 SKIP(6);
2959 if (!IS_BLANK(CUR)) {
2960 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2961 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2962 ctxt->sax->error(ctxt->userData,
2963 "Space required after 'SYSTEM'\n");
2964 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002965 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002966 }
2967 SKIP_BLANKS;
2968 URI = xmlParseSystemLiteral(ctxt);
2969 if (URI == NULL) {
2970 ctxt->errNo = XML_ERR_URI_REQUIRED;
2971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2972 ctxt->sax->error(ctxt->userData,
2973 "xmlParseExternalID: SYSTEM, no URI\n");
2974 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002975 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002976 }
2977 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2978 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2979 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2980 SKIP(6);
2981 if (!IS_BLANK(CUR)) {
2982 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2984 ctxt->sax->error(ctxt->userData,
2985 "Space required after 'PUBLIC'\n");
2986 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002987 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002988 }
2989 SKIP_BLANKS;
2990 *publicID = xmlParsePubidLiteral(ctxt);
2991 if (*publicID == NULL) {
2992 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2994 ctxt->sax->error(ctxt->userData,
2995 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2996 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002997 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002998 }
2999 if (strict) {
3000 /*
3001 * We don't handle [83] so "S SystemLiteral" is required.
3002 */
3003 if (!IS_BLANK(CUR)) {
3004 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3006 ctxt->sax->error(ctxt->userData,
3007 "Space required after the Public Identifier\n");
3008 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003009 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003010 }
3011 } else {
3012 /*
3013 * We handle [83] so we return immediately, if
3014 * "S SystemLiteral" is not detected. From a purely parsing
3015 * point of view that's a nice mess.
3016 */
3017 const xmlChar *ptr;
3018 GROW;
3019
3020 ptr = CUR_PTR;
3021 if (!IS_BLANK(*ptr)) return(NULL);
3022
3023 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3024 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3025 }
3026 SKIP_BLANKS;
3027 URI = xmlParseSystemLiteral(ctxt);
3028 if (URI == NULL) {
3029 ctxt->errNo = XML_ERR_URI_REQUIRED;
3030 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3031 ctxt->sax->error(ctxt->userData,
3032 "xmlParseExternalID: PUBLIC, no URI\n");
3033 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003034 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003035 }
3036 }
3037 return(URI);
3038}
3039
3040/**
3041 * xmlParseComment:
3042 * @ctxt: an XML parser context
3043 *
3044 * Skip an XML (SGML) comment <!-- .... -->
3045 * The spec says that "For compatibility, the string "--" (double-hyphen)
3046 * must not occur within comments. "
3047 *
3048 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3049 */
3050void
3051xmlParseComment(xmlParserCtxtPtr ctxt) {
3052 xmlChar *buf = NULL;
3053 int len;
3054 int size = XML_PARSER_BUFFER_SIZE;
3055 int q, ql;
3056 int r, rl;
3057 int cur, l;
3058 xmlParserInputState state;
3059 xmlParserInputPtr input = ctxt->input;
3060 int count = 0;
3061
3062 /*
3063 * Check that there is a comment right here.
3064 */
3065 if ((RAW != '<') || (NXT(1) != '!') ||
3066 (NXT(2) != '-') || (NXT(3) != '-')) return;
3067
3068 state = ctxt->instate;
3069 ctxt->instate = XML_PARSER_COMMENT;
3070 SHRINK;
3071 SKIP(4);
3072 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3073 if (buf == NULL) {
3074 xmlGenericError(xmlGenericErrorContext,
3075 "malloc of %d byte failed\n", size);
3076 ctxt->instate = state;
3077 return;
3078 }
3079 q = CUR_CHAR(ql);
3080 NEXTL(ql);
3081 r = CUR_CHAR(rl);
3082 NEXTL(rl);
3083 cur = CUR_CHAR(l);
3084 len = 0;
3085 while (IS_CHAR(cur) && /* checked */
3086 ((cur != '>') ||
3087 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003088 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003089 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3091 ctxt->sax->error(ctxt->userData,
3092 "Comment must not contain '--' (double-hyphen)`\n");
3093 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003094 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003095 }
3096 if (len + 5 >= size) {
3097 size *= 2;
3098 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3099 if (buf == NULL) {
3100 xmlGenericError(xmlGenericErrorContext,
3101 "realloc of %d byte failed\n", size);
3102 ctxt->instate = state;
3103 return;
3104 }
3105 }
3106 COPY_BUF(ql,buf,len,q);
3107 q = r;
3108 ql = rl;
3109 r = cur;
3110 rl = l;
3111
3112 count++;
3113 if (count > 50) {
3114 GROW;
3115 count = 0;
3116 }
3117 NEXTL(l);
3118 cur = CUR_CHAR(l);
3119 if (cur == 0) {
3120 SHRINK;
3121 GROW;
3122 cur = CUR_CHAR(l);
3123 }
3124 }
3125 buf[len] = 0;
3126 if (!IS_CHAR(cur)) {
3127 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3129 ctxt->sax->error(ctxt->userData,
3130 "Comment not terminated \n<!--%.50s\n", buf);
3131 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003132 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003133 xmlFree(buf);
3134 } else {
3135 if (input != ctxt->input) {
3136 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3138 ctxt->sax->error(ctxt->userData,
3139"Comment doesn't start and stop in the same entity\n");
3140 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003141 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003142 }
3143 NEXT;
3144 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3145 (!ctxt->disableSAX))
3146 ctxt->sax->comment(ctxt->userData, buf);
3147 xmlFree(buf);
3148 }
3149 ctxt->instate = state;
3150}
3151
3152/**
3153 * xmlParsePITarget:
3154 * @ctxt: an XML parser context
3155 *
3156 * parse the name of a PI
3157 *
3158 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3159 *
3160 * Returns the PITarget name or NULL
3161 */
3162
3163xmlChar *
3164xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3165 xmlChar *name;
3166
3167 name = xmlParseName(ctxt);
3168 if ((name != NULL) &&
3169 ((name[0] == 'x') || (name[0] == 'X')) &&
3170 ((name[1] == 'm') || (name[1] == 'M')) &&
3171 ((name[2] == 'l') || (name[2] == 'L'))) {
3172 int i;
3173 if ((name[0] == 'x') && (name[1] == 'm') &&
3174 (name[2] == 'l') && (name[3] == 0)) {
3175 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3177 ctxt->sax->error(ctxt->userData,
3178 "XML declaration allowed only at the start of the document\n");
3179 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003180 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003181 return(name);
3182 } else if (name[3] == 0) {
3183 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3185 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3186 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003187 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003188 return(name);
3189 }
3190 for (i = 0;;i++) {
3191 if (xmlW3CPIs[i] == NULL) break;
3192 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3193 return(name);
3194 }
3195 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3196 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3197 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003198 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003199 }
3200 }
3201 return(name);
3202}
3203
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003204#ifdef LIBXML_CATALOG_ENABLED
3205/**
3206 * xmlParseCatalogPI:
3207 * @ctxt: an XML parser context
3208 * @catalog: the PI value string
3209 *
3210 * parse an XML Catalog Processing Instruction.
3211 *
3212 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3213 *
3214 * Occurs only if allowed by the user and if happening in the Misc
3215 * part of the document before any doctype informations
3216 * This will add the given catalog to the parsing context in order
3217 * to be used if there is a resolution need further down in the document
3218 */
3219
3220static void
3221xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3222 xmlChar *URL = NULL;
3223 const xmlChar *tmp, *base;
3224 xmlChar marker;
3225
3226 tmp = catalog;
3227 while (IS_BLANK(*tmp)) tmp++;
3228 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3229 goto error;
3230 tmp += 7;
3231 while (IS_BLANK(*tmp)) tmp++;
3232 if (*tmp != '=') {
3233 return;
3234 }
3235 tmp++;
3236 while (IS_BLANK(*tmp)) tmp++;
3237 marker = *tmp;
3238 if ((marker != '\'') && (marker != '"'))
3239 goto error;
3240 tmp++;
3241 base = tmp;
3242 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3243 if (*tmp == 0)
3244 goto error;
3245 URL = xmlStrndup(base, tmp - base);
3246 tmp++;
3247 while (IS_BLANK(*tmp)) tmp++;
3248 if (*tmp != 0)
3249 goto error;
3250
3251 if (URL != NULL) {
3252 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3253 xmlFree(URL);
3254 }
3255 return;
3256
3257error:
3258 ctxt->errNo = XML_WAR_CATALOG_PI;
3259 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3260 ctxt->sax->warning(ctxt->userData,
3261 "Catalog PI syntax error: %s\n", catalog);
3262 if (URL != NULL)
3263 xmlFree(URL);
3264}
3265#endif
3266
Owen Taylor3473f882001-02-23 17:55:21 +00003267/**
3268 * xmlParsePI:
3269 * @ctxt: an XML parser context
3270 *
3271 * parse an XML Processing Instruction.
3272 *
3273 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3274 *
3275 * The processing is transfered to SAX once parsed.
3276 */
3277
3278void
3279xmlParsePI(xmlParserCtxtPtr ctxt) {
3280 xmlChar *buf = NULL;
3281 int len = 0;
3282 int size = XML_PARSER_BUFFER_SIZE;
3283 int cur, l;
3284 xmlChar *target;
3285 xmlParserInputState state;
3286 int count = 0;
3287
3288 if ((RAW == '<') && (NXT(1) == '?')) {
3289 xmlParserInputPtr input = ctxt->input;
3290 state = ctxt->instate;
3291 ctxt->instate = XML_PARSER_PI;
3292 /*
3293 * this is a Processing Instruction.
3294 */
3295 SKIP(2);
3296 SHRINK;
3297
3298 /*
3299 * Parse the target name and check for special support like
3300 * namespace.
3301 */
3302 target = xmlParsePITarget(ctxt);
3303 if (target != NULL) {
3304 if ((RAW == '?') && (NXT(1) == '>')) {
3305 if (input != ctxt->input) {
3306 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3307 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3308 ctxt->sax->error(ctxt->userData,
3309 "PI declaration doesn't start and stop in the same entity\n");
3310 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003311 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003312 }
3313 SKIP(2);
3314
3315 /*
3316 * SAX: PI detected.
3317 */
3318 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3319 (ctxt->sax->processingInstruction != NULL))
3320 ctxt->sax->processingInstruction(ctxt->userData,
3321 target, NULL);
3322 ctxt->instate = state;
3323 xmlFree(target);
3324 return;
3325 }
3326 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3327 if (buf == NULL) {
3328 xmlGenericError(xmlGenericErrorContext,
3329 "malloc of %d byte failed\n", size);
3330 ctxt->instate = state;
3331 return;
3332 }
3333 cur = CUR;
3334 if (!IS_BLANK(cur)) {
3335 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3337 ctxt->sax->error(ctxt->userData,
3338 "xmlParsePI: PI %s space expected\n", target);
3339 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003340 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003341 }
3342 SKIP_BLANKS;
3343 cur = CUR_CHAR(l);
3344 while (IS_CHAR(cur) && /* checked */
3345 ((cur != '?') || (NXT(1) != '>'))) {
3346 if (len + 5 >= size) {
3347 size *= 2;
3348 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3349 if (buf == NULL) {
3350 xmlGenericError(xmlGenericErrorContext,
3351 "realloc of %d byte failed\n", size);
3352 ctxt->instate = state;
3353 return;
3354 }
3355 }
3356 count++;
3357 if (count > 50) {
3358 GROW;
3359 count = 0;
3360 }
3361 COPY_BUF(l,buf,len,cur);
3362 NEXTL(l);
3363 cur = CUR_CHAR(l);
3364 if (cur == 0) {
3365 SHRINK;
3366 GROW;
3367 cur = CUR_CHAR(l);
3368 }
3369 }
3370 buf[len] = 0;
3371 if (cur != '?') {
3372 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3374 ctxt->sax->error(ctxt->userData,
3375 "xmlParsePI: PI %s never end ...\n", target);
3376 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003377 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003378 } else {
3379 if (input != ctxt->input) {
3380 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3382 ctxt->sax->error(ctxt->userData,
3383 "PI declaration doesn't start and stop in the same entity\n");
3384 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003385 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003386 }
3387 SKIP(2);
3388
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003389#ifdef LIBXML_CATALOG_ENABLED
3390 if (((state == XML_PARSER_MISC) ||
3391 (state == XML_PARSER_START)) &&
3392 (xmlStrEqual(target, XML_CATALOG_PI))) {
3393 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3394 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3395 (allow == XML_CATA_ALLOW_ALL))
3396 xmlParseCatalogPI(ctxt, buf);
3397 }
3398#endif
3399
3400
Owen Taylor3473f882001-02-23 17:55:21 +00003401 /*
3402 * SAX: PI detected.
3403 */
3404 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3405 (ctxt->sax->processingInstruction != NULL))
3406 ctxt->sax->processingInstruction(ctxt->userData,
3407 target, buf);
3408 }
3409 xmlFree(buf);
3410 xmlFree(target);
3411 } else {
3412 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3414 ctxt->sax->error(ctxt->userData,
3415 "xmlParsePI : no target name\n");
3416 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003417 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003418 }
3419 ctxt->instate = state;
3420 }
3421}
3422
3423/**
3424 * xmlParseNotationDecl:
3425 * @ctxt: an XML parser context
3426 *
3427 * parse a notation declaration
3428 *
3429 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3430 *
3431 * Hence there is actually 3 choices:
3432 * 'PUBLIC' S PubidLiteral
3433 * 'PUBLIC' S PubidLiteral S SystemLiteral
3434 * and 'SYSTEM' S SystemLiteral
3435 *
3436 * See the NOTE on xmlParseExternalID().
3437 */
3438
3439void
3440xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3441 xmlChar *name;
3442 xmlChar *Pubid;
3443 xmlChar *Systemid;
3444
3445 if ((RAW == '<') && (NXT(1) == '!') &&
3446 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3447 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3448 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3449 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3450 xmlParserInputPtr input = ctxt->input;
3451 SHRINK;
3452 SKIP(10);
3453 if (!IS_BLANK(CUR)) {
3454 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3456 ctxt->sax->error(ctxt->userData,
3457 "Space required after '<!NOTATION'\n");
3458 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003459 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003460 return;
3461 }
3462 SKIP_BLANKS;
3463
Daniel Veillard76d66f42001-05-16 21:05:17 +00003464 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003465 if (name == NULL) {
3466 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3468 ctxt->sax->error(ctxt->userData,
3469 "NOTATION: Name expected here\n");
3470 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003471 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003472 return;
3473 }
3474 if (!IS_BLANK(CUR)) {
3475 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3477 ctxt->sax->error(ctxt->userData,
3478 "Space required after the NOTATION name'\n");
3479 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003480 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003481 return;
3482 }
3483 SKIP_BLANKS;
3484
3485 /*
3486 * Parse the IDs.
3487 */
3488 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3489 SKIP_BLANKS;
3490
3491 if (RAW == '>') {
3492 if (input != ctxt->input) {
3493 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3495 ctxt->sax->error(ctxt->userData,
3496"Notation declaration doesn't start and stop in the same entity\n");
3497 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003498 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003499 }
3500 NEXT;
3501 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3502 (ctxt->sax->notationDecl != NULL))
3503 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3504 } else {
3505 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3507 ctxt->sax->error(ctxt->userData,
3508 "'>' required to close NOTATION declaration\n");
3509 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003510 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003511 }
3512 xmlFree(name);
3513 if (Systemid != NULL) xmlFree(Systemid);
3514 if (Pubid != NULL) xmlFree(Pubid);
3515 }
3516}
3517
3518/**
3519 * xmlParseEntityDecl:
3520 * @ctxt: an XML parser context
3521 *
3522 * parse <!ENTITY declarations
3523 *
3524 * [70] EntityDecl ::= GEDecl | PEDecl
3525 *
3526 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3527 *
3528 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3529 *
3530 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3531 *
3532 * [74] PEDef ::= EntityValue | ExternalID
3533 *
3534 * [76] NDataDecl ::= S 'NDATA' S Name
3535 *
3536 * [ VC: Notation Declared ]
3537 * The Name must match the declared name of a notation.
3538 */
3539
3540void
3541xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3542 xmlChar *name = NULL;
3543 xmlChar *value = NULL;
3544 xmlChar *URI = NULL, *literal = NULL;
3545 xmlChar *ndata = NULL;
3546 int isParameter = 0;
3547 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003548 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003549
3550 GROW;
3551 if ((RAW == '<') && (NXT(1) == '!') &&
3552 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3553 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3554 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3555 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003556 SHRINK;
3557 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003558 skipped = SKIP_BLANKS;
3559 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003560 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3562 ctxt->sax->error(ctxt->userData,
3563 "Space required after '<!ENTITY'\n");
3564 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003565 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003566 }
Owen Taylor3473f882001-02-23 17:55:21 +00003567
3568 if (RAW == '%') {
3569 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003570 skipped = SKIP_BLANKS;
3571 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003572 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3574 ctxt->sax->error(ctxt->userData,
3575 "Space required after '%'\n");
3576 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003577 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003578 }
Owen Taylor3473f882001-02-23 17:55:21 +00003579 isParameter = 1;
3580 }
3581
Daniel Veillard76d66f42001-05-16 21:05:17 +00003582 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003583 if (name == NULL) {
3584 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3586 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3587 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003588 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003589 return;
3590 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003591 skipped = SKIP_BLANKS;
3592 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003593 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3595 ctxt->sax->error(ctxt->userData,
3596 "Space required after the entity name\n");
3597 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003599 }
Owen Taylor3473f882001-02-23 17:55:21 +00003600
Daniel Veillardf5582f12002-06-11 10:08:16 +00003601 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003602 /*
3603 * handle the various case of definitions...
3604 */
3605 if (isParameter) {
3606 if ((RAW == '"') || (RAW == '\'')) {
3607 value = xmlParseEntityValue(ctxt, &orig);
3608 if (value) {
3609 if ((ctxt->sax != NULL) &&
3610 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3611 ctxt->sax->entityDecl(ctxt->userData, name,
3612 XML_INTERNAL_PARAMETER_ENTITY,
3613 NULL, NULL, value);
3614 }
3615 } else {
3616 URI = xmlParseExternalID(ctxt, &literal, 1);
3617 if ((URI == NULL) && (literal == NULL)) {
3618 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3620 ctxt->sax->error(ctxt->userData,
3621 "Entity value required\n");
3622 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003623 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003624 }
3625 if (URI) {
3626 xmlURIPtr uri;
3627
3628 uri = xmlParseURI((const char *) URI);
3629 if (uri == NULL) {
3630 ctxt->errNo = XML_ERR_INVALID_URI;
3631 if ((ctxt->sax != NULL) &&
3632 (!ctxt->disableSAX) &&
3633 (ctxt->sax->error != NULL))
3634 ctxt->sax->error(ctxt->userData,
3635 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003636 /*
3637 * This really ought to be a well formedness error
3638 * but the XML Core WG decided otherwise c.f. issue
3639 * E26 of the XML erratas.
3640 */
Owen Taylor3473f882001-02-23 17:55:21 +00003641 } else {
3642 if (uri->fragment != NULL) {
3643 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3644 if ((ctxt->sax != NULL) &&
3645 (!ctxt->disableSAX) &&
3646 (ctxt->sax->error != NULL))
3647 ctxt->sax->error(ctxt->userData,
3648 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003649 /*
3650 * Okay this is foolish to block those but not
3651 * invalid URIs.
3652 */
Owen Taylor3473f882001-02-23 17:55:21 +00003653 ctxt->wellFormed = 0;
3654 } else {
3655 if ((ctxt->sax != NULL) &&
3656 (!ctxt->disableSAX) &&
3657 (ctxt->sax->entityDecl != NULL))
3658 ctxt->sax->entityDecl(ctxt->userData, name,
3659 XML_EXTERNAL_PARAMETER_ENTITY,
3660 literal, URI, NULL);
3661 }
3662 xmlFreeURI(uri);
3663 }
3664 }
3665 }
3666 } else {
3667 if ((RAW == '"') || (RAW == '\'')) {
3668 value = xmlParseEntityValue(ctxt, &orig);
3669 if ((ctxt->sax != NULL) &&
3670 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3671 ctxt->sax->entityDecl(ctxt->userData, name,
3672 XML_INTERNAL_GENERAL_ENTITY,
3673 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003674 /*
3675 * For expat compatibility in SAX mode.
3676 */
3677 if ((ctxt->myDoc == NULL) ||
3678 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3679 if (ctxt->myDoc == NULL) {
3680 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3681 }
3682 if (ctxt->myDoc->intSubset == NULL)
3683 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3684 BAD_CAST "fake", NULL, NULL);
3685
3686 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3687 NULL, NULL, value);
3688 }
Owen Taylor3473f882001-02-23 17:55:21 +00003689 } else {
3690 URI = xmlParseExternalID(ctxt, &literal, 1);
3691 if ((URI == NULL) && (literal == NULL)) {
3692 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3694 ctxt->sax->error(ctxt->userData,
3695 "Entity value required\n");
3696 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003697 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003698 }
3699 if (URI) {
3700 xmlURIPtr uri;
3701
3702 uri = xmlParseURI((const char *)URI);
3703 if (uri == NULL) {
3704 ctxt->errNo = XML_ERR_INVALID_URI;
3705 if ((ctxt->sax != NULL) &&
3706 (!ctxt->disableSAX) &&
3707 (ctxt->sax->error != NULL))
3708 ctxt->sax->error(ctxt->userData,
3709 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003710 /*
3711 * This really ought to be a well formedness error
3712 * but the XML Core WG decided otherwise c.f. issue
3713 * E26 of the XML erratas.
3714 */
Owen Taylor3473f882001-02-23 17:55:21 +00003715 } else {
3716 if (uri->fragment != NULL) {
3717 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3718 if ((ctxt->sax != NULL) &&
3719 (!ctxt->disableSAX) &&
3720 (ctxt->sax->error != NULL))
3721 ctxt->sax->error(ctxt->userData,
3722 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003723 /*
3724 * Okay this is foolish to block those but not
3725 * invalid URIs.
3726 */
Owen Taylor3473f882001-02-23 17:55:21 +00003727 ctxt->wellFormed = 0;
3728 }
3729 xmlFreeURI(uri);
3730 }
3731 }
3732 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3733 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3735 ctxt->sax->error(ctxt->userData,
3736 "Space required before 'NDATA'\n");
3737 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003738 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003739 }
3740 SKIP_BLANKS;
3741 if ((RAW == 'N') && (NXT(1) == 'D') &&
3742 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3743 (NXT(4) == 'A')) {
3744 SKIP(5);
3745 if (!IS_BLANK(CUR)) {
3746 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3748 ctxt->sax->error(ctxt->userData,
3749 "Space required after 'NDATA'\n");
3750 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003751 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003752 }
3753 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003754 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003755 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3756 (ctxt->sax->unparsedEntityDecl != NULL))
3757 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3758 literal, URI, ndata);
3759 } else {
3760 if ((ctxt->sax != NULL) &&
3761 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3762 ctxt->sax->entityDecl(ctxt->userData, name,
3763 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3764 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003765 /*
3766 * For expat compatibility in SAX mode.
3767 * assuming the entity repalcement was asked for
3768 */
3769 if ((ctxt->replaceEntities != 0) &&
3770 ((ctxt->myDoc == NULL) ||
3771 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3772 if (ctxt->myDoc == NULL) {
3773 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3774 }
3775
3776 if (ctxt->myDoc->intSubset == NULL)
3777 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3778 BAD_CAST "fake", NULL, NULL);
3779 entityDecl(ctxt, name,
3780 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3781 literal, URI, NULL);
3782 }
Owen Taylor3473f882001-02-23 17:55:21 +00003783 }
3784 }
3785 }
3786 SKIP_BLANKS;
3787 if (RAW != '>') {
3788 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3790 ctxt->sax->error(ctxt->userData,
3791 "xmlParseEntityDecl: entity %s not terminated\n", name);
3792 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003793 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003794 } else {
3795 if (input != ctxt->input) {
3796 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3798 ctxt->sax->error(ctxt->userData,
3799"Entity declaration doesn't start and stop in the same entity\n");
3800 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003801 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003802 }
3803 NEXT;
3804 }
3805 if (orig != NULL) {
3806 /*
3807 * Ugly mechanism to save the raw entity value.
3808 */
3809 xmlEntityPtr cur = NULL;
3810
3811 if (isParameter) {
3812 if ((ctxt->sax != NULL) &&
3813 (ctxt->sax->getParameterEntity != NULL))
3814 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3815 } else {
3816 if ((ctxt->sax != NULL) &&
3817 (ctxt->sax->getEntity != NULL))
3818 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003819 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3820 cur = getEntity(ctxt, name);
3821 }
Owen Taylor3473f882001-02-23 17:55:21 +00003822 }
3823 if (cur != NULL) {
3824 if (cur->orig != NULL)
3825 xmlFree(orig);
3826 else
3827 cur->orig = orig;
3828 } else
3829 xmlFree(orig);
3830 }
3831 if (name != NULL) xmlFree(name);
3832 if (value != NULL) xmlFree(value);
3833 if (URI != NULL) xmlFree(URI);
3834 if (literal != NULL) xmlFree(literal);
3835 if (ndata != NULL) xmlFree(ndata);
3836 }
3837}
3838
3839/**
3840 * xmlParseDefaultDecl:
3841 * @ctxt: an XML parser context
3842 * @value: Receive a possible fixed default value for the attribute
3843 *
3844 * Parse an attribute default declaration
3845 *
3846 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3847 *
3848 * [ VC: Required Attribute ]
3849 * if the default declaration is the keyword #REQUIRED, then the
3850 * attribute must be specified for all elements of the type in the
3851 * attribute-list declaration.
3852 *
3853 * [ VC: Attribute Default Legal ]
3854 * The declared default value must meet the lexical constraints of
3855 * the declared attribute type c.f. xmlValidateAttributeDecl()
3856 *
3857 * [ VC: Fixed Attribute Default ]
3858 * if an attribute has a default value declared with the #FIXED
3859 * keyword, instances of that attribute must match the default value.
3860 *
3861 * [ WFC: No < in Attribute Values ]
3862 * handled in xmlParseAttValue()
3863 *
3864 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3865 * or XML_ATTRIBUTE_FIXED.
3866 */
3867
3868int
3869xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3870 int val;
3871 xmlChar *ret;
3872
3873 *value = NULL;
3874 if ((RAW == '#') && (NXT(1) == 'R') &&
3875 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3876 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3877 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3878 (NXT(8) == 'D')) {
3879 SKIP(9);
3880 return(XML_ATTRIBUTE_REQUIRED);
3881 }
3882 if ((RAW == '#') && (NXT(1) == 'I') &&
3883 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3884 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3885 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3886 SKIP(8);
3887 return(XML_ATTRIBUTE_IMPLIED);
3888 }
3889 val = XML_ATTRIBUTE_NONE;
3890 if ((RAW == '#') && (NXT(1) == 'F') &&
3891 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3892 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3893 SKIP(6);
3894 val = XML_ATTRIBUTE_FIXED;
3895 if (!IS_BLANK(CUR)) {
3896 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3898 ctxt->sax->error(ctxt->userData,
3899 "Space required after '#FIXED'\n");
3900 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003901 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003902 }
3903 SKIP_BLANKS;
3904 }
3905 ret = xmlParseAttValue(ctxt);
3906 ctxt->instate = XML_PARSER_DTD;
3907 if (ret == NULL) {
3908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3909 ctxt->sax->error(ctxt->userData,
3910 "Attribute default value declaration error\n");
3911 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003912 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003913 } else
3914 *value = ret;
3915 return(val);
3916}
3917
3918/**
3919 * xmlParseNotationType:
3920 * @ctxt: an XML parser context
3921 *
3922 * parse an Notation attribute type.
3923 *
3924 * Note: the leading 'NOTATION' S part has already being parsed...
3925 *
3926 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3927 *
3928 * [ VC: Notation Attributes ]
3929 * Values of this type must match one of the notation names included
3930 * in the declaration; all notation names in the declaration must be declared.
3931 *
3932 * Returns: the notation attribute tree built while parsing
3933 */
3934
3935xmlEnumerationPtr
3936xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3937 xmlChar *name;
3938 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3939
3940 if (RAW != '(') {
3941 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3943 ctxt->sax->error(ctxt->userData,
3944 "'(' required to start 'NOTATION'\n");
3945 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003946 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003947 return(NULL);
3948 }
3949 SHRINK;
3950 do {
3951 NEXT;
3952 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003953 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003954 if (name == NULL) {
3955 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3957 ctxt->sax->error(ctxt->userData,
3958 "Name expected in NOTATION declaration\n");
3959 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003960 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003961 return(ret);
3962 }
3963 cur = xmlCreateEnumeration(name);
3964 xmlFree(name);
3965 if (cur == NULL) return(ret);
3966 if (last == NULL) ret = last = cur;
3967 else {
3968 last->next = cur;
3969 last = cur;
3970 }
3971 SKIP_BLANKS;
3972 } while (RAW == '|');
3973 if (RAW != ')') {
3974 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3976 ctxt->sax->error(ctxt->userData,
3977 "')' required to finish NOTATION declaration\n");
3978 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003979 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003980 if ((last != NULL) && (last != ret))
3981 xmlFreeEnumeration(last);
3982 return(ret);
3983 }
3984 NEXT;
3985 return(ret);
3986}
3987
3988/**
3989 * xmlParseEnumerationType:
3990 * @ctxt: an XML parser context
3991 *
3992 * parse an Enumeration attribute type.
3993 *
3994 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3995 *
3996 * [ VC: Enumeration ]
3997 * Values of this type must match one of the Nmtoken tokens in
3998 * the declaration
3999 *
4000 * Returns: the enumeration attribute tree built while parsing
4001 */
4002
4003xmlEnumerationPtr
4004xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4005 xmlChar *name;
4006 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4007
4008 if (RAW != '(') {
4009 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4011 ctxt->sax->error(ctxt->userData,
4012 "'(' required to start ATTLIST enumeration\n");
4013 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004014 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004015 return(NULL);
4016 }
4017 SHRINK;
4018 do {
4019 NEXT;
4020 SKIP_BLANKS;
4021 name = xmlParseNmtoken(ctxt);
4022 if (name == NULL) {
4023 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4025 ctxt->sax->error(ctxt->userData,
4026 "NmToken expected in ATTLIST enumeration\n");
4027 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004028 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004029 return(ret);
4030 }
4031 cur = xmlCreateEnumeration(name);
4032 xmlFree(name);
4033 if (cur == NULL) return(ret);
4034 if (last == NULL) ret = last = cur;
4035 else {
4036 last->next = cur;
4037 last = cur;
4038 }
4039 SKIP_BLANKS;
4040 } while (RAW == '|');
4041 if (RAW != ')') {
4042 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4044 ctxt->sax->error(ctxt->userData,
4045 "')' required to finish ATTLIST enumeration\n");
4046 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004047 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004048 return(ret);
4049 }
4050 NEXT;
4051 return(ret);
4052}
4053
4054/**
4055 * xmlParseEnumeratedType:
4056 * @ctxt: an XML parser context
4057 * @tree: the enumeration tree built while parsing
4058 *
4059 * parse an Enumerated attribute type.
4060 *
4061 * [57] EnumeratedType ::= NotationType | Enumeration
4062 *
4063 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4064 *
4065 *
4066 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4067 */
4068
4069int
4070xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4071 if ((RAW == 'N') && (NXT(1) == 'O') &&
4072 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4073 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4074 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4075 SKIP(8);
4076 if (!IS_BLANK(CUR)) {
4077 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4079 ctxt->sax->error(ctxt->userData,
4080 "Space required after 'NOTATION'\n");
4081 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004082 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004083 return(0);
4084 }
4085 SKIP_BLANKS;
4086 *tree = xmlParseNotationType(ctxt);
4087 if (*tree == NULL) return(0);
4088 return(XML_ATTRIBUTE_NOTATION);
4089 }
4090 *tree = xmlParseEnumerationType(ctxt);
4091 if (*tree == NULL) return(0);
4092 return(XML_ATTRIBUTE_ENUMERATION);
4093}
4094
4095/**
4096 * xmlParseAttributeType:
4097 * @ctxt: an XML parser context
4098 * @tree: the enumeration tree built while parsing
4099 *
4100 * parse the Attribute list def for an element
4101 *
4102 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4103 *
4104 * [55] StringType ::= 'CDATA'
4105 *
4106 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4107 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4108 *
4109 * Validity constraints for attribute values syntax are checked in
4110 * xmlValidateAttributeValue()
4111 *
4112 * [ VC: ID ]
4113 * Values of type ID must match the Name production. A name must not
4114 * appear more than once in an XML document as a value of this type;
4115 * i.e., ID values must uniquely identify the elements which bear them.
4116 *
4117 * [ VC: One ID per Element Type ]
4118 * No element type may have more than one ID attribute specified.
4119 *
4120 * [ VC: ID Attribute Default ]
4121 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4122 *
4123 * [ VC: IDREF ]
4124 * Values of type IDREF must match the Name production, and values
4125 * of type IDREFS must match Names; each IDREF Name must match the value
4126 * of an ID attribute on some element in the XML document; i.e. IDREF
4127 * values must match the value of some ID attribute.
4128 *
4129 * [ VC: Entity Name ]
4130 * Values of type ENTITY must match the Name production, values
4131 * of type ENTITIES must match Names; each Entity Name must match the
4132 * name of an unparsed entity declared in the DTD.
4133 *
4134 * [ VC: Name Token ]
4135 * Values of type NMTOKEN must match the Nmtoken production; values
4136 * of type NMTOKENS must match Nmtokens.
4137 *
4138 * Returns the attribute type
4139 */
4140int
4141xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4142 SHRINK;
4143 if ((RAW == 'C') && (NXT(1) == 'D') &&
4144 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4145 (NXT(4) == 'A')) {
4146 SKIP(5);
4147 return(XML_ATTRIBUTE_CDATA);
4148 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4149 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4150 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4151 SKIP(6);
4152 return(XML_ATTRIBUTE_IDREFS);
4153 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4154 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4155 (NXT(4) == 'F')) {
4156 SKIP(5);
4157 return(XML_ATTRIBUTE_IDREF);
4158 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4159 SKIP(2);
4160 return(XML_ATTRIBUTE_ID);
4161 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4162 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4163 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4164 SKIP(6);
4165 return(XML_ATTRIBUTE_ENTITY);
4166 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4167 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4168 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4169 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4170 SKIP(8);
4171 return(XML_ATTRIBUTE_ENTITIES);
4172 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4173 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4174 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4175 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4176 SKIP(8);
4177 return(XML_ATTRIBUTE_NMTOKENS);
4178 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4179 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4180 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4181 (NXT(6) == 'N')) {
4182 SKIP(7);
4183 return(XML_ATTRIBUTE_NMTOKEN);
4184 }
4185 return(xmlParseEnumeratedType(ctxt, tree));
4186}
4187
4188/**
4189 * xmlParseAttributeListDecl:
4190 * @ctxt: an XML parser context
4191 *
4192 * : parse the Attribute list def for an element
4193 *
4194 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4195 *
4196 * [53] AttDef ::= S Name S AttType S DefaultDecl
4197 *
4198 */
4199void
4200xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4201 xmlChar *elemName;
4202 xmlChar *attrName;
4203 xmlEnumerationPtr tree;
4204
4205 if ((RAW == '<') && (NXT(1) == '!') &&
4206 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4207 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4208 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4209 (NXT(8) == 'T')) {
4210 xmlParserInputPtr input = ctxt->input;
4211
4212 SKIP(9);
4213 if (!IS_BLANK(CUR)) {
4214 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4216 ctxt->sax->error(ctxt->userData,
4217 "Space required after '<!ATTLIST'\n");
4218 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004219 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004220 }
4221 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004222 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004223 if (elemName == NULL) {
4224 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4226 ctxt->sax->error(ctxt->userData,
4227 "ATTLIST: no name for Element\n");
4228 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004229 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004230 return;
4231 }
4232 SKIP_BLANKS;
4233 GROW;
4234 while (RAW != '>') {
4235 const xmlChar *check = CUR_PTR;
4236 int type;
4237 int def;
4238 xmlChar *defaultValue = NULL;
4239
4240 GROW;
4241 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004242 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004243 if (attrName == NULL) {
4244 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4245 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4246 ctxt->sax->error(ctxt->userData,
4247 "ATTLIST: no name for Attribute\n");
4248 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004249 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004250 break;
4251 }
4252 GROW;
4253 if (!IS_BLANK(CUR)) {
4254 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4256 ctxt->sax->error(ctxt->userData,
4257 "Space required after the attribute name\n");
4258 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004259 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004260 if (attrName != NULL)
4261 xmlFree(attrName);
4262 if (defaultValue != NULL)
4263 xmlFree(defaultValue);
4264 break;
4265 }
4266 SKIP_BLANKS;
4267
4268 type = xmlParseAttributeType(ctxt, &tree);
4269 if (type <= 0) {
4270 if (attrName != NULL)
4271 xmlFree(attrName);
4272 if (defaultValue != NULL)
4273 xmlFree(defaultValue);
4274 break;
4275 }
4276
4277 GROW;
4278 if (!IS_BLANK(CUR)) {
4279 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4281 ctxt->sax->error(ctxt->userData,
4282 "Space required after the attribute type\n");
4283 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004284 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004285 if (attrName != NULL)
4286 xmlFree(attrName);
4287 if (defaultValue != NULL)
4288 xmlFree(defaultValue);
4289 if (tree != NULL)
4290 xmlFreeEnumeration(tree);
4291 break;
4292 }
4293 SKIP_BLANKS;
4294
4295 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4296 if (def <= 0) {
4297 if (attrName != NULL)
4298 xmlFree(attrName);
4299 if (defaultValue != NULL)
4300 xmlFree(defaultValue);
4301 if (tree != NULL)
4302 xmlFreeEnumeration(tree);
4303 break;
4304 }
4305
4306 GROW;
4307 if (RAW != '>') {
4308 if (!IS_BLANK(CUR)) {
4309 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4311 ctxt->sax->error(ctxt->userData,
4312 "Space required after the attribute default value\n");
4313 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004314 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004315 if (attrName != NULL)
4316 xmlFree(attrName);
4317 if (defaultValue != NULL)
4318 xmlFree(defaultValue);
4319 if (tree != NULL)
4320 xmlFreeEnumeration(tree);
4321 break;
4322 }
4323 SKIP_BLANKS;
4324 }
4325 if (check == CUR_PTR) {
4326 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4328 ctxt->sax->error(ctxt->userData,
4329 "xmlParseAttributeListDecl: detected internal error\n");
4330 if (attrName != NULL)
4331 xmlFree(attrName);
4332 if (defaultValue != NULL)
4333 xmlFree(defaultValue);
4334 if (tree != NULL)
4335 xmlFreeEnumeration(tree);
4336 break;
4337 }
4338 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4339 (ctxt->sax->attributeDecl != NULL))
4340 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4341 type, def, defaultValue, tree);
4342 if (attrName != NULL)
4343 xmlFree(attrName);
4344 if (defaultValue != NULL)
4345 xmlFree(defaultValue);
4346 GROW;
4347 }
4348 if (RAW == '>') {
4349 if (input != ctxt->input) {
4350 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4352 ctxt->sax->error(ctxt->userData,
4353"Attribute list declaration doesn't start and stop in the same entity\n");
4354 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004355 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004356 }
4357 NEXT;
4358 }
4359
4360 xmlFree(elemName);
4361 }
4362}
4363
4364/**
4365 * xmlParseElementMixedContentDecl:
4366 * @ctxt: an XML parser context
4367 *
4368 * parse the declaration for a Mixed Element content
4369 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4370 *
4371 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4372 * '(' S? '#PCDATA' S? ')'
4373 *
4374 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4375 *
4376 * [ VC: No Duplicate Types ]
4377 * The same name must not appear more than once in a single
4378 * mixed-content declaration.
4379 *
4380 * returns: the list of the xmlElementContentPtr describing the element choices
4381 */
4382xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004383xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004384 xmlElementContentPtr ret = NULL, cur = NULL, n;
4385 xmlChar *elem = NULL;
4386
4387 GROW;
4388 if ((RAW == '#') && (NXT(1) == 'P') &&
4389 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4390 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4391 (NXT(6) == 'A')) {
4392 SKIP(7);
4393 SKIP_BLANKS;
4394 SHRINK;
4395 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004396 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4397 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4398 if (ctxt->vctxt.error != NULL)
4399 ctxt->vctxt.error(ctxt->vctxt.userData,
4400"Element content declaration doesn't start and stop in the same entity\n");
4401 ctxt->valid = 0;
4402 }
Owen Taylor3473f882001-02-23 17:55:21 +00004403 NEXT;
4404 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4405 if (RAW == '*') {
4406 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4407 NEXT;
4408 }
4409 return(ret);
4410 }
4411 if ((RAW == '(') || (RAW == '|')) {
4412 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4413 if (ret == NULL) return(NULL);
4414 }
4415 while (RAW == '|') {
4416 NEXT;
4417 if (elem == NULL) {
4418 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4419 if (ret == NULL) return(NULL);
4420 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004421 if (cur != NULL)
4422 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004423 cur = ret;
4424 } else {
4425 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4426 if (n == NULL) return(NULL);
4427 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004428 if (n->c1 != NULL)
4429 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004430 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004431 if (n != NULL)
4432 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004433 cur = n;
4434 xmlFree(elem);
4435 }
4436 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004437 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004438 if (elem == NULL) {
4439 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4440 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4441 ctxt->sax->error(ctxt->userData,
4442 "xmlParseElementMixedContentDecl : Name expected\n");
4443 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004444 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004445 xmlFreeElementContent(cur);
4446 return(NULL);
4447 }
4448 SKIP_BLANKS;
4449 GROW;
4450 }
4451 if ((RAW == ')') && (NXT(1) == '*')) {
4452 if (elem != NULL) {
4453 cur->c2 = xmlNewElementContent(elem,
4454 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004455 if (cur->c2 != NULL)
4456 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004457 xmlFree(elem);
4458 }
4459 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004460 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4461 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4462 if (ctxt->vctxt.error != NULL)
4463 ctxt->vctxt.error(ctxt->vctxt.userData,
4464"Element content declaration doesn't start and stop in the same entity\n");
4465 ctxt->valid = 0;
4466 }
Owen Taylor3473f882001-02-23 17:55:21 +00004467 SKIP(2);
4468 } else {
4469 if (elem != NULL) xmlFree(elem);
4470 xmlFreeElementContent(ret);
4471 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4473 ctxt->sax->error(ctxt->userData,
4474 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4475 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004476 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004477 return(NULL);
4478 }
4479
4480 } else {
4481 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4483 ctxt->sax->error(ctxt->userData,
4484 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4485 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004486 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004487 }
4488 return(ret);
4489}
4490
4491/**
4492 * xmlParseElementChildrenContentDecl:
4493 * @ctxt: an XML parser context
4494 *
4495 * parse the declaration for a Mixed Element content
4496 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4497 *
4498 *
4499 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4500 *
4501 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4502 *
4503 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4504 *
4505 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4506 *
4507 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4508 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004509 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004510 * opening or closing parentheses in a choice, seq, or Mixed
4511 * construct is contained in the replacement text for a parameter
4512 * entity, both must be contained in the same replacement text. For
4513 * interoperability, if a parameter-entity reference appears in a
4514 * choice, seq, or Mixed construct, its replacement text should not
4515 * be empty, and neither the first nor last non-blank character of
4516 * the replacement text should be a connector (| or ,).
4517 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004518 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004519 * hierarchy.
4520 */
4521xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004522xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004523(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004524 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4525 xmlChar *elem;
4526 xmlChar type = 0;
4527
4528 SKIP_BLANKS;
4529 GROW;
4530 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004531 xmlParserInputPtr input = ctxt->input;
4532
Owen Taylor3473f882001-02-23 17:55:21 +00004533 /* Recurse on first child */
4534 NEXT;
4535 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004536 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004537 SKIP_BLANKS;
4538 GROW;
4539 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004540 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004541 if (elem == NULL) {
4542 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4544 ctxt->sax->error(ctxt->userData,
4545 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4546 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004547 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004548 return(NULL);
4549 }
4550 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4551 GROW;
4552 if (RAW == '?') {
4553 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4554 NEXT;
4555 } else if (RAW == '*') {
4556 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4557 NEXT;
4558 } else if (RAW == '+') {
4559 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4560 NEXT;
4561 } else {
4562 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4563 }
4564 xmlFree(elem);
4565 GROW;
4566 }
4567 SKIP_BLANKS;
4568 SHRINK;
4569 while (RAW != ')') {
4570 /*
4571 * Each loop we parse one separator and one element.
4572 */
4573 if (RAW == ',') {
4574 if (type == 0) type = CUR;
4575
4576 /*
4577 * Detect "Name | Name , Name" error
4578 */
4579 else if (type != CUR) {
4580 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4582 ctxt->sax->error(ctxt->userData,
4583 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4584 type);
4585 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004586 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004587 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004588 xmlFreeElementContent(last);
4589 if (ret != NULL)
4590 xmlFreeElementContent(ret);
4591 return(NULL);
4592 }
4593 NEXT;
4594
4595 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4596 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004597 if ((last != NULL) && (last != ret))
4598 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004599 xmlFreeElementContent(ret);
4600 return(NULL);
4601 }
4602 if (last == NULL) {
4603 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004604 if (ret != NULL)
4605 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004606 ret = cur = op;
4607 } else {
4608 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004609 if (op != NULL)
4610 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004611 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004612 if (last != NULL)
4613 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004614 cur =op;
4615 last = NULL;
4616 }
4617 } else if (RAW == '|') {
4618 if (type == 0) type = CUR;
4619
4620 /*
4621 * Detect "Name , Name | Name" error
4622 */
4623 else if (type != CUR) {
4624 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4626 ctxt->sax->error(ctxt->userData,
4627 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4628 type);
4629 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004630 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004631 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004632 xmlFreeElementContent(last);
4633 if (ret != NULL)
4634 xmlFreeElementContent(ret);
4635 return(NULL);
4636 }
4637 NEXT;
4638
4639 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4640 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004641 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004642 xmlFreeElementContent(last);
4643 if (ret != NULL)
4644 xmlFreeElementContent(ret);
4645 return(NULL);
4646 }
4647 if (last == NULL) {
4648 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004649 if (ret != NULL)
4650 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004651 ret = cur = op;
4652 } else {
4653 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004654 if (op != NULL)
4655 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004656 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004657 if (last != NULL)
4658 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004659 cur =op;
4660 last = NULL;
4661 }
4662 } else {
4663 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4665 ctxt->sax->error(ctxt->userData,
4666 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4667 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004668 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004669 if (ret != NULL)
4670 xmlFreeElementContent(ret);
4671 return(NULL);
4672 }
4673 GROW;
4674 SKIP_BLANKS;
4675 GROW;
4676 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004677 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004678 /* Recurse on second child */
4679 NEXT;
4680 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004681 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004682 SKIP_BLANKS;
4683 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004684 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004685 if (elem == NULL) {
4686 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4688 ctxt->sax->error(ctxt->userData,
4689 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4690 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004691 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004692 if (ret != NULL)
4693 xmlFreeElementContent(ret);
4694 return(NULL);
4695 }
4696 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4697 xmlFree(elem);
4698 if (RAW == '?') {
4699 last->ocur = XML_ELEMENT_CONTENT_OPT;
4700 NEXT;
4701 } else if (RAW == '*') {
4702 last->ocur = XML_ELEMENT_CONTENT_MULT;
4703 NEXT;
4704 } else if (RAW == '+') {
4705 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4706 NEXT;
4707 } else {
4708 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4709 }
4710 }
4711 SKIP_BLANKS;
4712 GROW;
4713 }
4714 if ((cur != NULL) && (last != NULL)) {
4715 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004716 if (last != NULL)
4717 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004718 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004719 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4720 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4721 if (ctxt->vctxt.error != NULL)
4722 ctxt->vctxt.error(ctxt->vctxt.userData,
4723"Element content declaration doesn't start and stop in the same entity\n");
4724 ctxt->valid = 0;
4725 }
Owen Taylor3473f882001-02-23 17:55:21 +00004726 NEXT;
4727 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004728 if (ret != NULL)
4729 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004730 NEXT;
4731 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004732 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004733 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004734 cur = ret;
4735 /*
4736 * Some normalization:
4737 * (a | b* | c?)* == (a | b | c)*
4738 */
4739 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4740 if ((cur->c1 != NULL) &&
4741 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4742 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4743 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4744 if ((cur->c2 != NULL) &&
4745 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4746 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4747 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4748 cur = cur->c2;
4749 }
4750 }
Owen Taylor3473f882001-02-23 17:55:21 +00004751 NEXT;
4752 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004753 if (ret != NULL) {
4754 int found = 0;
4755
Daniel Veillarde470df72001-04-18 21:41:07 +00004756 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004757 /*
4758 * Some normalization:
4759 * (a | b*)+ == (a | b)*
4760 * (a | b?)+ == (a | b)*
4761 */
4762 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4763 if ((cur->c1 != NULL) &&
4764 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4765 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4766 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4767 found = 1;
4768 }
4769 if ((cur->c2 != NULL) &&
4770 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4771 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4772 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4773 found = 1;
4774 }
4775 cur = cur->c2;
4776 }
4777 if (found)
4778 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4779 }
Owen Taylor3473f882001-02-23 17:55:21 +00004780 NEXT;
4781 }
4782 return(ret);
4783}
4784
4785/**
4786 * xmlParseElementContentDecl:
4787 * @ctxt: an XML parser context
4788 * @name: the name of the element being defined.
4789 * @result: the Element Content pointer will be stored here if any
4790 *
4791 * parse the declaration for an Element content either Mixed or Children,
4792 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4793 *
4794 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4795 *
4796 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4797 */
4798
4799int
4800xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4801 xmlElementContentPtr *result) {
4802
4803 xmlElementContentPtr tree = NULL;
4804 xmlParserInputPtr input = ctxt->input;
4805 int res;
4806
4807 *result = NULL;
4808
4809 if (RAW != '(') {
4810 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4812 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004813 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004814 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004815 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004816 return(-1);
4817 }
4818 NEXT;
4819 GROW;
4820 SKIP_BLANKS;
4821 if ((RAW == '#') && (NXT(1) == 'P') &&
4822 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4823 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4824 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004825 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004826 res = XML_ELEMENT_TYPE_MIXED;
4827 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004828 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004829 res = XML_ELEMENT_TYPE_ELEMENT;
4830 }
Owen Taylor3473f882001-02-23 17:55:21 +00004831 SKIP_BLANKS;
4832 *result = tree;
4833 return(res);
4834}
4835
4836/**
4837 * xmlParseElementDecl:
4838 * @ctxt: an XML parser context
4839 *
4840 * parse an Element declaration.
4841 *
4842 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4843 *
4844 * [ VC: Unique Element Type Declaration ]
4845 * No element type may be declared more than once
4846 *
4847 * Returns the type of the element, or -1 in case of error
4848 */
4849int
4850xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4851 xmlChar *name;
4852 int ret = -1;
4853 xmlElementContentPtr content = NULL;
4854
4855 GROW;
4856 if ((RAW == '<') && (NXT(1) == '!') &&
4857 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4858 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4859 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4860 (NXT(8) == 'T')) {
4861 xmlParserInputPtr input = ctxt->input;
4862
4863 SKIP(9);
4864 if (!IS_BLANK(CUR)) {
4865 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4867 ctxt->sax->error(ctxt->userData,
4868 "Space required after 'ELEMENT'\n");
4869 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004870 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004871 }
4872 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004873 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004874 if (name == NULL) {
4875 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4876 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4877 ctxt->sax->error(ctxt->userData,
4878 "xmlParseElementDecl: no name for Element\n");
4879 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004880 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004881 return(-1);
4882 }
4883 while ((RAW == 0) && (ctxt->inputNr > 1))
4884 xmlPopInput(ctxt);
4885 if (!IS_BLANK(CUR)) {
4886 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4888 ctxt->sax->error(ctxt->userData,
4889 "Space required after the element name\n");
4890 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004891 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004892 }
4893 SKIP_BLANKS;
4894 if ((RAW == 'E') && (NXT(1) == 'M') &&
4895 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4896 (NXT(4) == 'Y')) {
4897 SKIP(5);
4898 /*
4899 * Element must always be empty.
4900 */
4901 ret = XML_ELEMENT_TYPE_EMPTY;
4902 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4903 (NXT(2) == 'Y')) {
4904 SKIP(3);
4905 /*
4906 * Element is a generic container.
4907 */
4908 ret = XML_ELEMENT_TYPE_ANY;
4909 } else if (RAW == '(') {
4910 ret = xmlParseElementContentDecl(ctxt, name, &content);
4911 } else {
4912 /*
4913 * [ WFC: PEs in Internal Subset ] error handling.
4914 */
4915 if ((RAW == '%') && (ctxt->external == 0) &&
4916 (ctxt->inputNr == 1)) {
4917 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4919 ctxt->sax->error(ctxt->userData,
4920 "PEReference: forbidden within markup decl in internal subset\n");
4921 } else {
4922 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4924 ctxt->sax->error(ctxt->userData,
4925 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4926 }
4927 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004928 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004929 if (name != NULL) xmlFree(name);
4930 return(-1);
4931 }
4932
4933 SKIP_BLANKS;
4934 /*
4935 * Pop-up of finished entities.
4936 */
4937 while ((RAW == 0) && (ctxt->inputNr > 1))
4938 xmlPopInput(ctxt);
4939 SKIP_BLANKS;
4940
4941 if (RAW != '>') {
4942 ctxt->errNo = XML_ERR_GT_REQUIRED;
4943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4944 ctxt->sax->error(ctxt->userData,
4945 "xmlParseElementDecl: expected '>' at the end\n");
4946 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004947 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004948 } else {
4949 if (input != ctxt->input) {
4950 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4952 ctxt->sax->error(ctxt->userData,
4953"Element declaration doesn't start and stop in the same entity\n");
4954 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004955 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004956 }
4957
4958 NEXT;
4959 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4960 (ctxt->sax->elementDecl != NULL))
4961 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4962 content);
4963 }
4964 if (content != NULL) {
4965 xmlFreeElementContent(content);
4966 }
4967 if (name != NULL) {
4968 xmlFree(name);
4969 }
4970 }
4971 return(ret);
4972}
4973
4974/**
Owen Taylor3473f882001-02-23 17:55:21 +00004975 * xmlParseConditionalSections
4976 * @ctxt: an XML parser context
4977 *
4978 * [61] conditionalSect ::= includeSect | ignoreSect
4979 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4980 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4981 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4982 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4983 */
4984
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004985static void
Owen Taylor3473f882001-02-23 17:55:21 +00004986xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4987 SKIP(3);
4988 SKIP_BLANKS;
4989 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4990 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4991 (NXT(6) == 'E')) {
4992 SKIP(7);
4993 SKIP_BLANKS;
4994 if (RAW != '[') {
4995 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4997 ctxt->sax->error(ctxt->userData,
4998 "XML conditional section '[' expected\n");
4999 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005000 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005001 } else {
5002 NEXT;
5003 }
5004 if (xmlParserDebugEntities) {
5005 if ((ctxt->input != NULL) && (ctxt->input->filename))
5006 xmlGenericError(xmlGenericErrorContext,
5007 "%s(%d): ", ctxt->input->filename,
5008 ctxt->input->line);
5009 xmlGenericError(xmlGenericErrorContext,
5010 "Entering INCLUDE Conditional Section\n");
5011 }
5012
5013 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5014 (NXT(2) != '>'))) {
5015 const xmlChar *check = CUR_PTR;
5016 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005017
5018 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5019 xmlParseConditionalSections(ctxt);
5020 } else if (IS_BLANK(CUR)) {
5021 NEXT;
5022 } else if (RAW == '%') {
5023 xmlParsePEReference(ctxt);
5024 } else
5025 xmlParseMarkupDecl(ctxt);
5026
5027 /*
5028 * Pop-up of finished entities.
5029 */
5030 while ((RAW == 0) && (ctxt->inputNr > 1))
5031 xmlPopInput(ctxt);
5032
Daniel Veillardfdc91562002-07-01 21:52:03 +00005033 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005034 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5036 ctxt->sax->error(ctxt->userData,
5037 "Content error in the external subset\n");
5038 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005039 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005040 break;
5041 }
5042 }
5043 if (xmlParserDebugEntities) {
5044 if ((ctxt->input != NULL) && (ctxt->input->filename))
5045 xmlGenericError(xmlGenericErrorContext,
5046 "%s(%d): ", ctxt->input->filename,
5047 ctxt->input->line);
5048 xmlGenericError(xmlGenericErrorContext,
5049 "Leaving INCLUDE Conditional Section\n");
5050 }
5051
5052 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5053 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5054 int state;
5055 int instate;
5056 int depth = 0;
5057
5058 SKIP(6);
5059 SKIP_BLANKS;
5060 if (RAW != '[') {
5061 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5062 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5063 ctxt->sax->error(ctxt->userData,
5064 "XML conditional section '[' expected\n");
5065 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005066 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005067 } else {
5068 NEXT;
5069 }
5070 if (xmlParserDebugEntities) {
5071 if ((ctxt->input != NULL) && (ctxt->input->filename))
5072 xmlGenericError(xmlGenericErrorContext,
5073 "%s(%d): ", ctxt->input->filename,
5074 ctxt->input->line);
5075 xmlGenericError(xmlGenericErrorContext,
5076 "Entering IGNORE Conditional Section\n");
5077 }
5078
5079 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005080 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005081 * But disable SAX event generating DTD building in the meantime
5082 */
5083 state = ctxt->disableSAX;
5084 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005085 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005086 ctxt->instate = XML_PARSER_IGNORE;
5087
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005088 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005089 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5090 depth++;
5091 SKIP(3);
5092 continue;
5093 }
5094 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5095 if (--depth >= 0) SKIP(3);
5096 continue;
5097 }
5098 NEXT;
5099 continue;
5100 }
5101
5102 ctxt->disableSAX = state;
5103 ctxt->instate = instate;
5104
5105 if (xmlParserDebugEntities) {
5106 if ((ctxt->input != NULL) && (ctxt->input->filename))
5107 xmlGenericError(xmlGenericErrorContext,
5108 "%s(%d): ", ctxt->input->filename,
5109 ctxt->input->line);
5110 xmlGenericError(xmlGenericErrorContext,
5111 "Leaving IGNORE Conditional Section\n");
5112 }
5113
5114 } else {
5115 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5116 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5117 ctxt->sax->error(ctxt->userData,
5118 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5119 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005120 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005121 }
5122
5123 if (RAW == 0)
5124 SHRINK;
5125
5126 if (RAW == 0) {
5127 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5129 ctxt->sax->error(ctxt->userData,
5130 "XML conditional section not closed\n");
5131 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005132 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005133 } else {
5134 SKIP(3);
5135 }
5136}
5137
5138/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005139 * xmlParseMarkupDecl:
5140 * @ctxt: an XML parser context
5141 *
5142 * parse Markup declarations
5143 *
5144 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5145 * NotationDecl | PI | Comment
5146 *
5147 * [ VC: Proper Declaration/PE Nesting ]
5148 * Parameter-entity replacement text must be properly nested with
5149 * markup declarations. That is to say, if either the first character
5150 * or the last character of a markup declaration (markupdecl above) is
5151 * contained in the replacement text for a parameter-entity reference,
5152 * both must be contained in the same replacement text.
5153 *
5154 * [ WFC: PEs in Internal Subset ]
5155 * In the internal DTD subset, parameter-entity references can occur
5156 * only where markup declarations can occur, not within markup declarations.
5157 * (This does not apply to references that occur in external parameter
5158 * entities or to the external subset.)
5159 */
5160void
5161xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5162 GROW;
5163 xmlParseElementDecl(ctxt);
5164 xmlParseAttributeListDecl(ctxt);
5165 xmlParseEntityDecl(ctxt);
5166 xmlParseNotationDecl(ctxt);
5167 xmlParsePI(ctxt);
5168 xmlParseComment(ctxt);
5169 /*
5170 * This is only for internal subset. On external entities,
5171 * the replacement is done before parsing stage
5172 */
5173 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5174 xmlParsePEReference(ctxt);
5175
5176 /*
5177 * Conditional sections are allowed from entities included
5178 * by PE References in the internal subset.
5179 */
5180 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5181 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5182 xmlParseConditionalSections(ctxt);
5183 }
5184 }
5185
5186 ctxt->instate = XML_PARSER_DTD;
5187}
5188
5189/**
5190 * xmlParseTextDecl:
5191 * @ctxt: an XML parser context
5192 *
5193 * parse an XML declaration header for external entities
5194 *
5195 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5196 *
5197 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5198 */
5199
5200void
5201xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5202 xmlChar *version;
5203
5204 /*
5205 * We know that '<?xml' is here.
5206 */
5207 if ((RAW == '<') && (NXT(1) == '?') &&
5208 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5209 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5210 SKIP(5);
5211 } else {
5212 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5213 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5214 ctxt->sax->error(ctxt->userData,
5215 "Text declaration '<?xml' required\n");
5216 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005217 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005218
5219 return;
5220 }
5221
5222 if (!IS_BLANK(CUR)) {
5223 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5224 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5225 ctxt->sax->error(ctxt->userData,
5226 "Space needed after '<?xml'\n");
5227 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005228 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005229 }
5230 SKIP_BLANKS;
5231
5232 /*
5233 * We may have the VersionInfo here.
5234 */
5235 version = xmlParseVersionInfo(ctxt);
5236 if (version == NULL)
5237 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005238 else {
5239 if (!IS_BLANK(CUR)) {
5240 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5242 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5243 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005244 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005245 }
5246 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005247 ctxt->input->version = version;
5248
5249 /*
5250 * We must have the encoding declaration
5251 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005252 xmlParseEncodingDecl(ctxt);
5253 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5254 /*
5255 * The XML REC instructs us to stop parsing right here
5256 */
5257 return;
5258 }
5259
5260 SKIP_BLANKS;
5261 if ((RAW == '?') && (NXT(1) == '>')) {
5262 SKIP(2);
5263 } else if (RAW == '>') {
5264 /* Deprecated old WD ... */
5265 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5267 ctxt->sax->error(ctxt->userData,
5268 "XML declaration must end-up with '?>'\n");
5269 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005270 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005271 NEXT;
5272 } else {
5273 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5274 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5275 ctxt->sax->error(ctxt->userData,
5276 "parsing XML declaration: '?>' expected\n");
5277 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005278 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005279 MOVETO_ENDTAG(CUR_PTR);
5280 NEXT;
5281 }
5282}
5283
5284/**
Owen Taylor3473f882001-02-23 17:55:21 +00005285 * xmlParseExternalSubset:
5286 * @ctxt: an XML parser context
5287 * @ExternalID: the external identifier
5288 * @SystemID: the system identifier (or URL)
5289 *
5290 * parse Markup declarations from an external subset
5291 *
5292 * [30] extSubset ::= textDecl? extSubsetDecl
5293 *
5294 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5295 */
5296void
5297xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5298 const xmlChar *SystemID) {
5299 GROW;
5300 if ((RAW == '<') && (NXT(1) == '?') &&
5301 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5302 (NXT(4) == 'l')) {
5303 xmlParseTextDecl(ctxt);
5304 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5305 /*
5306 * The XML REC instructs us to stop parsing right here
5307 */
5308 ctxt->instate = XML_PARSER_EOF;
5309 return;
5310 }
5311 }
5312 if (ctxt->myDoc == NULL) {
5313 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5314 }
5315 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5316 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5317
5318 ctxt->instate = XML_PARSER_DTD;
5319 ctxt->external = 1;
5320 while (((RAW == '<') && (NXT(1) == '?')) ||
5321 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005322 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005323 const xmlChar *check = CUR_PTR;
5324 int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005325
5326 GROW;
5327 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5328 xmlParseConditionalSections(ctxt);
5329 } else if (IS_BLANK(CUR)) {
5330 NEXT;
5331 } else if (RAW == '%') {
5332 xmlParsePEReference(ctxt);
5333 } else
5334 xmlParseMarkupDecl(ctxt);
5335
5336 /*
5337 * Pop-up of finished entities.
5338 */
5339 while ((RAW == 0) && (ctxt->inputNr > 1))
5340 xmlPopInput(ctxt);
5341
Daniel Veillardfdc91562002-07-01 21:52:03 +00005342 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005343 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5344 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5345 ctxt->sax->error(ctxt->userData,
5346 "Content error in the external subset\n");
5347 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005348 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005349 break;
5350 }
5351 }
5352
5353 if (RAW != 0) {
5354 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5356 ctxt->sax->error(ctxt->userData,
5357 "Extra content at the end of the document\n");
5358 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005359 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005360 }
5361
5362}
5363
5364/**
5365 * xmlParseReference:
5366 * @ctxt: an XML parser context
5367 *
5368 * parse and handle entity references in content, depending on the SAX
5369 * interface, this may end-up in a call to character() if this is a
5370 * CharRef, a predefined entity, if there is no reference() callback.
5371 * or if the parser was asked to switch to that mode.
5372 *
5373 * [67] Reference ::= EntityRef | CharRef
5374 */
5375void
5376xmlParseReference(xmlParserCtxtPtr ctxt) {
5377 xmlEntityPtr ent;
5378 xmlChar *val;
5379 if (RAW != '&') return;
5380
5381 if (NXT(1) == '#') {
5382 int i = 0;
5383 xmlChar out[10];
5384 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005385 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005386
5387 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5388 /*
5389 * So we are using non-UTF-8 buffers
5390 * Check that the char fit on 8bits, if not
5391 * generate a CharRef.
5392 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005393 if (value <= 0xFF) {
5394 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005395 out[1] = 0;
5396 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5397 (!ctxt->disableSAX))
5398 ctxt->sax->characters(ctxt->userData, out, 1);
5399 } else {
5400 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005401 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005402 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005403 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005404 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5405 (!ctxt->disableSAX))
5406 ctxt->sax->reference(ctxt->userData, out);
5407 }
5408 } else {
5409 /*
5410 * Just encode the value in UTF-8
5411 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005412 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005413 out[i] = 0;
5414 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5415 (!ctxt->disableSAX))
5416 ctxt->sax->characters(ctxt->userData, out, i);
5417 }
5418 } else {
5419 ent = xmlParseEntityRef(ctxt);
5420 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005421 if (!ctxt->wellFormed)
5422 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005423 if ((ent->name != NULL) &&
5424 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5425 xmlNodePtr list = NULL;
5426 int ret;
5427
5428
5429 /*
5430 * The first reference to the entity trigger a parsing phase
5431 * where the ent->children is filled with the result from
5432 * the parsing.
5433 */
5434 if (ent->children == NULL) {
5435 xmlChar *value;
5436 value = ent->content;
5437
5438 /*
5439 * Check that this entity is well formed
5440 */
5441 if ((value != NULL) &&
5442 (value[1] == 0) && (value[0] == '<') &&
5443 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5444 /*
5445 * DONE: get definite answer on this !!!
5446 * Lots of entity decls are used to declare a single
5447 * char
5448 * <!ENTITY lt "<">
5449 * Which seems to be valid since
5450 * 2.4: The ampersand character (&) and the left angle
5451 * bracket (<) may appear in their literal form only
5452 * when used ... They are also legal within the literal
5453 * entity value of an internal entity declaration;i
5454 * see "4.3.2 Well-Formed Parsed Entities".
5455 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5456 * Looking at the OASIS test suite and James Clark
5457 * tests, this is broken. However the XML REC uses
5458 * it. Is the XML REC not well-formed ????
5459 * This is a hack to avoid this problem
5460 *
5461 * ANSWER: since lt gt amp .. are already defined,
5462 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005463 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005464 * is lousy but acceptable.
5465 */
5466 list = xmlNewDocText(ctxt->myDoc, value);
5467 if (list != NULL) {
5468 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5469 (ent->children == NULL)) {
5470 ent->children = list;
5471 ent->last = list;
5472 list->parent = (xmlNodePtr) ent;
5473 } else {
5474 xmlFreeNodeList(list);
5475 }
5476 } else if (list != NULL) {
5477 xmlFreeNodeList(list);
5478 }
5479 } else {
5480 /*
5481 * 4.3.2: An internal general parsed entity is well-formed
5482 * if its replacement text matches the production labeled
5483 * content.
5484 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005485
5486 void *user_data;
5487 /*
5488 * This is a bit hackish but this seems the best
5489 * way to make sure both SAX and DOM entity support
5490 * behaves okay.
5491 */
5492 if (ctxt->userData == ctxt)
5493 user_data = NULL;
5494 else
5495 user_data = ctxt->userData;
5496
Owen Taylor3473f882001-02-23 17:55:21 +00005497 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5498 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005499 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5500 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005501 ctxt->depth--;
5502 } else if (ent->etype ==
5503 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5504 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005505 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005506 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005507 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005508 ctxt->depth--;
5509 } else {
5510 ret = -1;
5511 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5512 ctxt->sax->error(ctxt->userData,
5513 "Internal: invalid entity type\n");
5514 }
5515 if (ret == XML_ERR_ENTITY_LOOP) {
5516 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5518 ctxt->sax->error(ctxt->userData,
5519 "Detected entity reference loop\n");
5520 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005521 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005522 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005523 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005524 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5525 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005526 (ent->children == NULL)) {
5527 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005528 if (ctxt->replaceEntities) {
5529 /*
5530 * Prune it directly in the generated document
5531 * except for single text nodes.
5532 */
5533 if ((list->type == XML_TEXT_NODE) &&
5534 (list->next == NULL)) {
5535 list->parent = (xmlNodePtr) ent;
5536 list = NULL;
5537 } else {
5538 while (list != NULL) {
5539 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005540 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005541 if (list->next == NULL)
5542 ent->last = list;
5543 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005544 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005545 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005546 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5547 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005548 }
5549 } else {
5550 while (list != NULL) {
5551 list->parent = (xmlNodePtr) ent;
5552 if (list->next == NULL)
5553 ent->last = list;
5554 list = list->next;
5555 }
Owen Taylor3473f882001-02-23 17:55:21 +00005556 }
5557 } else {
5558 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005559 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005560 }
5561 } else if (ret > 0) {
5562 ctxt->errNo = ret;
5563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5564 ctxt->sax->error(ctxt->userData,
5565 "Entity value required\n");
5566 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005567 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005568 } else if (list != NULL) {
5569 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005570 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005571 }
5572 }
5573 }
5574 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5575 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5576 /*
5577 * Create a node.
5578 */
5579 ctxt->sax->reference(ctxt->userData, ent->name);
5580 return;
5581 } else if (ctxt->replaceEntities) {
5582 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5583 /*
5584 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005585 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005586 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005587 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005588 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005589 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005590 cur = ent->children;
5591 while (cur != NULL) {
5592 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005593 if (firstChild == NULL){
5594 firstChild = new;
5595 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005596 xmlAddChild(ctxt->node, new);
5597 if (cur == ent->last)
5598 break;
5599 cur = cur->next;
5600 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005601 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5602 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005603 } else {
5604 /*
5605 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005606 * node with a possible previous text one which
5607 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005608 */
5609 if (ent->children->type == XML_TEXT_NODE)
5610 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5611 if ((ent->last != ent->children) &&
5612 (ent->last->type == XML_TEXT_NODE))
5613 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5614 xmlAddChildList(ctxt->node, ent->children);
5615 }
5616
Owen Taylor3473f882001-02-23 17:55:21 +00005617 /*
5618 * This is to avoid a nasty side effect, see
5619 * characters() in SAX.c
5620 */
5621 ctxt->nodemem = 0;
5622 ctxt->nodelen = 0;
5623 return;
5624 } else {
5625 /*
5626 * Probably running in SAX mode
5627 */
5628 xmlParserInputPtr input;
5629
5630 input = xmlNewEntityInputStream(ctxt, ent);
5631 xmlPushInput(ctxt, input);
5632 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5633 (RAW == '<') && (NXT(1) == '?') &&
5634 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5635 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5636 xmlParseTextDecl(ctxt);
5637 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5638 /*
5639 * The XML REC instructs us to stop parsing right here
5640 */
5641 ctxt->instate = XML_PARSER_EOF;
5642 return;
5643 }
5644 if (input->standalone == 1) {
5645 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5647 ctxt->sax->error(ctxt->userData,
5648 "external parsed entities cannot be standalone\n");
5649 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005650 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005651 }
5652 }
5653 return;
5654 }
5655 }
5656 } else {
5657 val = ent->content;
5658 if (val == NULL) return;
5659 /*
5660 * inline the entity.
5661 */
5662 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5663 (!ctxt->disableSAX))
5664 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5665 }
5666 }
5667}
5668
5669/**
5670 * xmlParseEntityRef:
5671 * @ctxt: an XML parser context
5672 *
5673 * parse ENTITY references declarations
5674 *
5675 * [68] EntityRef ::= '&' Name ';'
5676 *
5677 * [ WFC: Entity Declared ]
5678 * In a document without any DTD, a document with only an internal DTD
5679 * subset which contains no parameter entity references, or a document
5680 * with "standalone='yes'", the Name given in the entity reference
5681 * must match that in an entity declaration, except that well-formed
5682 * documents need not declare any of the following entities: amp, lt,
5683 * gt, apos, quot. The declaration of a parameter entity must precede
5684 * any reference to it. Similarly, the declaration of a general entity
5685 * must precede any reference to it which appears in a default value in an
5686 * attribute-list declaration. Note that if entities are declared in the
5687 * external subset or in external parameter entities, a non-validating
5688 * processor is not obligated to read and process their declarations;
5689 * for such documents, the rule that an entity must be declared is a
5690 * well-formedness constraint only if standalone='yes'.
5691 *
5692 * [ WFC: Parsed Entity ]
5693 * An entity reference must not contain the name of an unparsed entity
5694 *
5695 * Returns the xmlEntityPtr if found, or NULL otherwise.
5696 */
5697xmlEntityPtr
5698xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5699 xmlChar *name;
5700 xmlEntityPtr ent = NULL;
5701
5702 GROW;
5703
5704 if (RAW == '&') {
5705 NEXT;
5706 name = xmlParseName(ctxt);
5707 if (name == NULL) {
5708 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5710 ctxt->sax->error(ctxt->userData,
5711 "xmlParseEntityRef: no name\n");
5712 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005713 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005714 } else {
5715 if (RAW == ';') {
5716 NEXT;
5717 /*
5718 * Ask first SAX for entity resolution, otherwise try the
5719 * predefined set.
5720 */
5721 if (ctxt->sax != NULL) {
5722 if (ctxt->sax->getEntity != NULL)
5723 ent = ctxt->sax->getEntity(ctxt->userData, name);
5724 if (ent == NULL)
5725 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005726 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5727 ent = getEntity(ctxt, name);
5728 }
Owen Taylor3473f882001-02-23 17:55:21 +00005729 }
5730 /*
5731 * [ WFC: Entity Declared ]
5732 * In a document without any DTD, a document with only an
5733 * internal DTD subset which contains no parameter entity
5734 * references, or a document with "standalone='yes'", the
5735 * Name given in the entity reference must match that in an
5736 * entity declaration, except that well-formed documents
5737 * need not declare any of the following entities: amp, lt,
5738 * gt, apos, quot.
5739 * The declaration of a parameter entity must precede any
5740 * reference to it.
5741 * Similarly, the declaration of a general entity must
5742 * precede any reference to it which appears in a default
5743 * value in an attribute-list declaration. Note that if
5744 * entities are declared in the external subset or in
5745 * external parameter entities, a non-validating processor
5746 * is not obligated to read and process their declarations;
5747 * for such documents, the rule that an entity must be
5748 * declared is a well-formedness constraint only if
5749 * standalone='yes'.
5750 */
5751 if (ent == NULL) {
5752 if ((ctxt->standalone == 1) ||
5753 ((ctxt->hasExternalSubset == 0) &&
5754 (ctxt->hasPErefs == 0))) {
5755 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5757 ctxt->sax->error(ctxt->userData,
5758 "Entity '%s' not defined\n", name);
5759 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005760 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005761 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005762 } else {
5763 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005765 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005766 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005767 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005768 }
5769 }
5770
5771 /*
5772 * [ WFC: Parsed Entity ]
5773 * An entity reference must not contain the name of an
5774 * unparsed entity
5775 */
5776 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5777 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5779 ctxt->sax->error(ctxt->userData,
5780 "Entity reference to unparsed entity %s\n", name);
5781 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005782 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005783 }
5784
5785 /*
5786 * [ WFC: No External Entity References ]
5787 * Attribute values cannot contain direct or indirect
5788 * entity references to external entities.
5789 */
5790 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5791 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5792 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5794 ctxt->sax->error(ctxt->userData,
5795 "Attribute references external entity '%s'\n", name);
5796 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005797 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005798 }
5799 /*
5800 * [ WFC: No < in Attribute Values ]
5801 * The replacement text of any entity referred to directly or
5802 * indirectly in an attribute value (other than "&lt;") must
5803 * not contain a <.
5804 */
5805 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5806 (ent != NULL) &&
5807 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5808 (ent->content != NULL) &&
5809 (xmlStrchr(ent->content, '<'))) {
5810 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5812 ctxt->sax->error(ctxt->userData,
5813 "'<' in entity '%s' is not allowed in attributes values\n", name);
5814 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005815 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005816 }
5817
5818 /*
5819 * Internal check, no parameter entities here ...
5820 */
5821 else {
5822 switch (ent->etype) {
5823 case XML_INTERNAL_PARAMETER_ENTITY:
5824 case XML_EXTERNAL_PARAMETER_ENTITY:
5825 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5827 ctxt->sax->error(ctxt->userData,
5828 "Attempt to reference the parameter entity '%s'\n", name);
5829 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005830 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005831 break;
5832 default:
5833 break;
5834 }
5835 }
5836
5837 /*
5838 * [ WFC: No Recursion ]
5839 * A parsed entity must not contain a recursive reference
5840 * to itself, either directly or indirectly.
5841 * Done somewhere else
5842 */
5843
5844 } else {
5845 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5847 ctxt->sax->error(ctxt->userData,
5848 "xmlParseEntityRef: expecting ';'\n");
5849 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005850 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005851 }
5852 xmlFree(name);
5853 }
5854 }
5855 return(ent);
5856}
5857
5858/**
5859 * xmlParseStringEntityRef:
5860 * @ctxt: an XML parser context
5861 * @str: a pointer to an index in the string
5862 *
5863 * parse ENTITY references declarations, but this version parses it from
5864 * a string value.
5865 *
5866 * [68] EntityRef ::= '&' Name ';'
5867 *
5868 * [ WFC: Entity Declared ]
5869 * In a document without any DTD, a document with only an internal DTD
5870 * subset which contains no parameter entity references, or a document
5871 * with "standalone='yes'", the Name given in the entity reference
5872 * must match that in an entity declaration, except that well-formed
5873 * documents need not declare any of the following entities: amp, lt,
5874 * gt, apos, quot. The declaration of a parameter entity must precede
5875 * any reference to it. Similarly, the declaration of a general entity
5876 * must precede any reference to it which appears in a default value in an
5877 * attribute-list declaration. Note that if entities are declared in the
5878 * external subset or in external parameter entities, a non-validating
5879 * processor is not obligated to read and process their declarations;
5880 * for such documents, the rule that an entity must be declared is a
5881 * well-formedness constraint only if standalone='yes'.
5882 *
5883 * [ WFC: Parsed Entity ]
5884 * An entity reference must not contain the name of an unparsed entity
5885 *
5886 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5887 * is updated to the current location in the string.
5888 */
5889xmlEntityPtr
5890xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5891 xmlChar *name;
5892 const xmlChar *ptr;
5893 xmlChar cur;
5894 xmlEntityPtr ent = NULL;
5895
5896 if ((str == NULL) || (*str == NULL))
5897 return(NULL);
5898 ptr = *str;
5899 cur = *ptr;
5900 if (cur == '&') {
5901 ptr++;
5902 cur = *ptr;
5903 name = xmlParseStringName(ctxt, &ptr);
5904 if (name == NULL) {
5905 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5907 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005908 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005909 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005910 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005911 } else {
5912 if (*ptr == ';') {
5913 ptr++;
5914 /*
5915 * Ask first SAX for entity resolution, otherwise try the
5916 * predefined set.
5917 */
5918 if (ctxt->sax != NULL) {
5919 if (ctxt->sax->getEntity != NULL)
5920 ent = ctxt->sax->getEntity(ctxt->userData, name);
5921 if (ent == NULL)
5922 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005923 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5924 ent = getEntity(ctxt, name);
5925 }
Owen Taylor3473f882001-02-23 17:55:21 +00005926 }
5927 /*
5928 * [ WFC: Entity Declared ]
5929 * In a document without any DTD, a document with only an
5930 * internal DTD subset which contains no parameter entity
5931 * references, or a document with "standalone='yes'", the
5932 * Name given in the entity reference must match that in an
5933 * entity declaration, except that well-formed documents
5934 * need not declare any of the following entities: amp, lt,
5935 * gt, apos, quot.
5936 * The declaration of a parameter entity must precede any
5937 * reference to it.
5938 * Similarly, the declaration of a general entity must
5939 * precede any reference to it which appears in a default
5940 * value in an attribute-list declaration. Note that if
5941 * entities are declared in the external subset or in
5942 * external parameter entities, a non-validating processor
5943 * is not obligated to read and process their declarations;
5944 * for such documents, the rule that an entity must be
5945 * declared is a well-formedness constraint only if
5946 * standalone='yes'.
5947 */
5948 if (ent == NULL) {
5949 if ((ctxt->standalone == 1) ||
5950 ((ctxt->hasExternalSubset == 0) &&
5951 (ctxt->hasPErefs == 0))) {
5952 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5954 ctxt->sax->error(ctxt->userData,
5955 "Entity '%s' not defined\n", name);
5956 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005957 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005958 } else {
5959 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5960 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5961 ctxt->sax->warning(ctxt->userData,
5962 "Entity '%s' not defined\n", name);
5963 }
5964 }
5965
5966 /*
5967 * [ WFC: Parsed Entity ]
5968 * An entity reference must not contain the name of an
5969 * unparsed entity
5970 */
5971 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5972 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5974 ctxt->sax->error(ctxt->userData,
5975 "Entity reference to unparsed entity %s\n", name);
5976 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005977 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005978 }
5979
5980 /*
5981 * [ WFC: No External Entity References ]
5982 * Attribute values cannot contain direct or indirect
5983 * entity references to external entities.
5984 */
5985 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5986 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5987 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5988 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5989 ctxt->sax->error(ctxt->userData,
5990 "Attribute references external entity '%s'\n", name);
5991 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005992 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005993 }
5994 /*
5995 * [ WFC: No < in Attribute Values ]
5996 * The replacement text of any entity referred to directly or
5997 * indirectly in an attribute value (other than "&lt;") must
5998 * not contain a <.
5999 */
6000 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6001 (ent != NULL) &&
6002 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6003 (ent->content != NULL) &&
6004 (xmlStrchr(ent->content, '<'))) {
6005 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6007 ctxt->sax->error(ctxt->userData,
6008 "'<' in entity '%s' is not allowed in attributes values\n", name);
6009 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006010 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006011 }
6012
6013 /*
6014 * Internal check, no parameter entities here ...
6015 */
6016 else {
6017 switch (ent->etype) {
6018 case XML_INTERNAL_PARAMETER_ENTITY:
6019 case XML_EXTERNAL_PARAMETER_ENTITY:
6020 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6022 ctxt->sax->error(ctxt->userData,
6023 "Attempt to reference the parameter entity '%s'\n", name);
6024 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006025 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006026 break;
6027 default:
6028 break;
6029 }
6030 }
6031
6032 /*
6033 * [ WFC: No Recursion ]
6034 * A parsed entity must not contain a recursive reference
6035 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006036 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006037 */
6038
6039 } else {
6040 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6042 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006043 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006044 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006045 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006046 }
6047 xmlFree(name);
6048 }
6049 }
6050 *str = ptr;
6051 return(ent);
6052}
6053
6054/**
6055 * xmlParsePEReference:
6056 * @ctxt: an XML parser context
6057 *
6058 * parse PEReference declarations
6059 * The entity content is handled directly by pushing it's content as
6060 * a new input stream.
6061 *
6062 * [69] PEReference ::= '%' Name ';'
6063 *
6064 * [ WFC: No Recursion ]
6065 * A parsed entity must not contain a recursive
6066 * reference to itself, either directly or indirectly.
6067 *
6068 * [ WFC: Entity Declared ]
6069 * In a document without any DTD, a document with only an internal DTD
6070 * subset which contains no parameter entity references, or a document
6071 * with "standalone='yes'", ... ... The declaration of a parameter
6072 * entity must precede any reference to it...
6073 *
6074 * [ VC: Entity Declared ]
6075 * In a document with an external subset or external parameter entities
6076 * with "standalone='no'", ... ... The declaration of a parameter entity
6077 * must precede any reference to it...
6078 *
6079 * [ WFC: In DTD ]
6080 * Parameter-entity references may only appear in the DTD.
6081 * NOTE: misleading but this is handled.
6082 */
6083void
6084xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6085 xmlChar *name;
6086 xmlEntityPtr entity = NULL;
6087 xmlParserInputPtr input;
6088
6089 if (RAW == '%') {
6090 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006091 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006092 if (name == NULL) {
6093 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6095 ctxt->sax->error(ctxt->userData,
6096 "xmlParsePEReference: no name\n");
6097 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006098 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006099 } else {
6100 if (RAW == ';') {
6101 NEXT;
6102 if ((ctxt->sax != NULL) &&
6103 (ctxt->sax->getParameterEntity != NULL))
6104 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6105 name);
6106 if (entity == NULL) {
6107 /*
6108 * [ WFC: Entity Declared ]
6109 * In a document without any DTD, a document with only an
6110 * internal DTD subset which contains no parameter entity
6111 * references, or a document with "standalone='yes'", ...
6112 * ... The declaration of a parameter entity must precede
6113 * any reference to it...
6114 */
6115 if ((ctxt->standalone == 1) ||
6116 ((ctxt->hasExternalSubset == 0) &&
6117 (ctxt->hasPErefs == 0))) {
6118 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6119 if ((!ctxt->disableSAX) &&
6120 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6121 ctxt->sax->error(ctxt->userData,
6122 "PEReference: %%%s; not found\n", name);
6123 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006124 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006125 } else {
6126 /*
6127 * [ VC: Entity Declared ]
6128 * In a document with an external subset or external
6129 * parameter entities with "standalone='no'", ...
6130 * ... The declaration of a parameter entity must precede
6131 * any reference to it...
6132 */
6133 if ((!ctxt->disableSAX) &&
6134 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6135 ctxt->sax->warning(ctxt->userData,
6136 "PEReference: %%%s; not found\n", name);
6137 ctxt->valid = 0;
6138 }
6139 } else {
6140 /*
6141 * Internal checking in case the entity quest barfed
6142 */
6143 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6144 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6145 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6146 ctxt->sax->warning(ctxt->userData,
6147 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006148 } else if (ctxt->input->free != deallocblankswrapper) {
6149 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6150 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006151 } else {
6152 /*
6153 * TODO !!!
6154 * handle the extra spaces added before and after
6155 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6156 */
6157 input = xmlNewEntityInputStream(ctxt, entity);
6158 xmlPushInput(ctxt, input);
6159 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6160 (RAW == '<') && (NXT(1) == '?') &&
6161 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6162 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6163 xmlParseTextDecl(ctxt);
6164 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6165 /*
6166 * The XML REC instructs us to stop parsing
6167 * right here
6168 */
6169 ctxt->instate = XML_PARSER_EOF;
6170 xmlFree(name);
6171 return;
6172 }
6173 }
Owen Taylor3473f882001-02-23 17:55:21 +00006174 }
6175 }
6176 ctxt->hasPErefs = 1;
6177 } else {
6178 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6179 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6180 ctxt->sax->error(ctxt->userData,
6181 "xmlParsePEReference: expecting ';'\n");
6182 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006183 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006184 }
6185 xmlFree(name);
6186 }
6187 }
6188}
6189
6190/**
6191 * xmlParseStringPEReference:
6192 * @ctxt: an XML parser context
6193 * @str: a pointer to an index in the string
6194 *
6195 * parse PEReference declarations
6196 *
6197 * [69] PEReference ::= '%' Name ';'
6198 *
6199 * [ WFC: No Recursion ]
6200 * A parsed entity must not contain a recursive
6201 * reference to itself, either directly or indirectly.
6202 *
6203 * [ WFC: Entity Declared ]
6204 * In a document without any DTD, a document with only an internal DTD
6205 * subset which contains no parameter entity references, or a document
6206 * with "standalone='yes'", ... ... The declaration of a parameter
6207 * entity must precede any reference to it...
6208 *
6209 * [ VC: Entity Declared ]
6210 * In a document with an external subset or external parameter entities
6211 * with "standalone='no'", ... ... The declaration of a parameter entity
6212 * must precede any reference to it...
6213 *
6214 * [ WFC: In DTD ]
6215 * Parameter-entity references may only appear in the DTD.
6216 * NOTE: misleading but this is handled.
6217 *
6218 * Returns the string of the entity content.
6219 * str is updated to the current value of the index
6220 */
6221xmlEntityPtr
6222xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6223 const xmlChar *ptr;
6224 xmlChar cur;
6225 xmlChar *name;
6226 xmlEntityPtr entity = NULL;
6227
6228 if ((str == NULL) || (*str == NULL)) return(NULL);
6229 ptr = *str;
6230 cur = *ptr;
6231 if (cur == '%') {
6232 ptr++;
6233 cur = *ptr;
6234 name = xmlParseStringName(ctxt, &ptr);
6235 if (name == NULL) {
6236 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6238 ctxt->sax->error(ctxt->userData,
6239 "xmlParseStringPEReference: no name\n");
6240 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006241 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006242 } else {
6243 cur = *ptr;
6244 if (cur == ';') {
6245 ptr++;
6246 cur = *ptr;
6247 if ((ctxt->sax != NULL) &&
6248 (ctxt->sax->getParameterEntity != NULL))
6249 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6250 name);
6251 if (entity == NULL) {
6252 /*
6253 * [ WFC: Entity Declared ]
6254 * In a document without any DTD, a document with only an
6255 * internal DTD subset which contains no parameter entity
6256 * references, or a document with "standalone='yes'", ...
6257 * ... The declaration of a parameter entity must precede
6258 * any reference to it...
6259 */
6260 if ((ctxt->standalone == 1) ||
6261 ((ctxt->hasExternalSubset == 0) &&
6262 (ctxt->hasPErefs == 0))) {
6263 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6264 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6265 ctxt->sax->error(ctxt->userData,
6266 "PEReference: %%%s; not found\n", name);
6267 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006268 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006269 } else {
6270 /*
6271 * [ VC: Entity Declared ]
6272 * In a document with an external subset or external
6273 * parameter entities with "standalone='no'", ...
6274 * ... The declaration of a parameter entity must
6275 * precede any reference to it...
6276 */
6277 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6278 ctxt->sax->warning(ctxt->userData,
6279 "PEReference: %%%s; not found\n", name);
6280 ctxt->valid = 0;
6281 }
6282 } else {
6283 /*
6284 * Internal checking in case the entity quest barfed
6285 */
6286 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6287 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6288 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6289 ctxt->sax->warning(ctxt->userData,
6290 "Internal: %%%s; is not a parameter entity\n", name);
6291 }
6292 }
6293 ctxt->hasPErefs = 1;
6294 } else {
6295 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6296 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6297 ctxt->sax->error(ctxt->userData,
6298 "xmlParseStringPEReference: expecting ';'\n");
6299 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006300 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006301 }
6302 xmlFree(name);
6303 }
6304 }
6305 *str = ptr;
6306 return(entity);
6307}
6308
6309/**
6310 * xmlParseDocTypeDecl:
6311 * @ctxt: an XML parser context
6312 *
6313 * parse a DOCTYPE declaration
6314 *
6315 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6316 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6317 *
6318 * [ VC: Root Element Type ]
6319 * The Name in the document type declaration must match the element
6320 * type of the root element.
6321 */
6322
6323void
6324xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6325 xmlChar *name = NULL;
6326 xmlChar *ExternalID = NULL;
6327 xmlChar *URI = NULL;
6328
6329 /*
6330 * We know that '<!DOCTYPE' has been detected.
6331 */
6332 SKIP(9);
6333
6334 SKIP_BLANKS;
6335
6336 /*
6337 * Parse the DOCTYPE name.
6338 */
6339 name = xmlParseName(ctxt);
6340 if (name == NULL) {
6341 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6343 ctxt->sax->error(ctxt->userData,
6344 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6345 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006346 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006347 }
6348 ctxt->intSubName = name;
6349
6350 SKIP_BLANKS;
6351
6352 /*
6353 * Check for SystemID and ExternalID
6354 */
6355 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6356
6357 if ((URI != NULL) || (ExternalID != NULL)) {
6358 ctxt->hasExternalSubset = 1;
6359 }
6360 ctxt->extSubURI = URI;
6361 ctxt->extSubSystem = ExternalID;
6362
6363 SKIP_BLANKS;
6364
6365 /*
6366 * Create and update the internal subset.
6367 */
6368 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6369 (!ctxt->disableSAX))
6370 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6371
6372 /*
6373 * Is there any internal subset declarations ?
6374 * they are handled separately in xmlParseInternalSubset()
6375 */
6376 if (RAW == '[')
6377 return;
6378
6379 /*
6380 * We should be at the end of the DOCTYPE declaration.
6381 */
6382 if (RAW != '>') {
6383 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006385 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006386 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006387 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006388 }
6389 NEXT;
6390}
6391
6392/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006393 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006394 * @ctxt: an XML parser context
6395 *
6396 * parse the internal subset declaration
6397 *
6398 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6399 */
6400
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006401static void
Owen Taylor3473f882001-02-23 17:55:21 +00006402xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6403 /*
6404 * Is there any DTD definition ?
6405 */
6406 if (RAW == '[') {
6407 ctxt->instate = XML_PARSER_DTD;
6408 NEXT;
6409 /*
6410 * Parse the succession of Markup declarations and
6411 * PEReferences.
6412 * Subsequence (markupdecl | PEReference | S)*
6413 */
6414 while (RAW != ']') {
6415 const xmlChar *check = CUR_PTR;
6416 int cons = ctxt->input->consumed;
6417
6418 SKIP_BLANKS;
6419 xmlParseMarkupDecl(ctxt);
6420 xmlParsePEReference(ctxt);
6421
6422 /*
6423 * Pop-up of finished entities.
6424 */
6425 while ((RAW == 0) && (ctxt->inputNr > 1))
6426 xmlPopInput(ctxt);
6427
6428 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6429 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6431 ctxt->sax->error(ctxt->userData,
6432 "xmlParseInternalSubset: error detected in Markup declaration\n");
6433 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006434 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006435 break;
6436 }
6437 }
6438 if (RAW == ']') {
6439 NEXT;
6440 SKIP_BLANKS;
6441 }
6442 }
6443
6444 /*
6445 * We should be at the end of the DOCTYPE declaration.
6446 */
6447 if (RAW != '>') {
6448 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006450 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006451 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006452 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006453 }
6454 NEXT;
6455}
6456
6457/**
6458 * xmlParseAttribute:
6459 * @ctxt: an XML parser context
6460 * @value: a xmlChar ** used to store the value of the attribute
6461 *
6462 * parse an attribute
6463 *
6464 * [41] Attribute ::= Name Eq AttValue
6465 *
6466 * [ WFC: No External Entity References ]
6467 * Attribute values cannot contain direct or indirect entity references
6468 * to external entities.
6469 *
6470 * [ WFC: No < in Attribute Values ]
6471 * The replacement text of any entity referred to directly or indirectly in
6472 * an attribute value (other than "&lt;") must not contain a <.
6473 *
6474 * [ VC: Attribute Value Type ]
6475 * The attribute must have been declared; the value must be of the type
6476 * declared for it.
6477 *
6478 * [25] Eq ::= S? '=' S?
6479 *
6480 * With namespace:
6481 *
6482 * [NS 11] Attribute ::= QName Eq AttValue
6483 *
6484 * Also the case QName == xmlns:??? is handled independently as a namespace
6485 * definition.
6486 *
6487 * Returns the attribute name, and the value in *value.
6488 */
6489
6490xmlChar *
6491xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6492 xmlChar *name, *val;
6493
6494 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006495 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006496 name = xmlParseName(ctxt);
6497 if (name == NULL) {
6498 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6500 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6501 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006502 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006503 return(NULL);
6504 }
6505
6506 /*
6507 * read the value
6508 */
6509 SKIP_BLANKS;
6510 if (RAW == '=') {
6511 NEXT;
6512 SKIP_BLANKS;
6513 val = xmlParseAttValue(ctxt);
6514 ctxt->instate = XML_PARSER_CONTENT;
6515 } else {
6516 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6518 ctxt->sax->error(ctxt->userData,
6519 "Specification mandate value for attribute %s\n", name);
6520 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006521 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006522 xmlFree(name);
6523 return(NULL);
6524 }
6525
6526 /*
6527 * Check that xml:lang conforms to the specification
6528 * No more registered as an error, just generate a warning now
6529 * since this was deprecated in XML second edition
6530 */
6531 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6532 if (!xmlCheckLanguageID(val)) {
6533 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6534 ctxt->sax->warning(ctxt->userData,
6535 "Malformed value for xml:lang : %s\n", val);
6536 }
6537 }
6538
6539 /*
6540 * Check that xml:space conforms to the specification
6541 */
6542 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6543 if (xmlStrEqual(val, BAD_CAST "default"))
6544 *(ctxt->space) = 0;
6545 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6546 *(ctxt->space) = 1;
6547 else {
6548 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6550 ctxt->sax->error(ctxt->userData,
6551"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6552 val);
6553 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006554 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006555 }
6556 }
6557
6558 *value = val;
6559 return(name);
6560}
6561
6562/**
6563 * xmlParseStartTag:
6564 * @ctxt: an XML parser context
6565 *
6566 * parse a start of tag either for rule element or
6567 * EmptyElement. In both case we don't parse the tag closing chars.
6568 *
6569 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6570 *
6571 * [ WFC: Unique Att Spec ]
6572 * No attribute name may appear more than once in the same start-tag or
6573 * empty-element tag.
6574 *
6575 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6576 *
6577 * [ WFC: Unique Att Spec ]
6578 * No attribute name may appear more than once in the same start-tag or
6579 * empty-element tag.
6580 *
6581 * With namespace:
6582 *
6583 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6584 *
6585 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6586 *
6587 * Returns the element name parsed
6588 */
6589
6590xmlChar *
6591xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6592 xmlChar *name;
6593 xmlChar *attname;
6594 xmlChar *attvalue;
6595 const xmlChar **atts = NULL;
6596 int nbatts = 0;
6597 int maxatts = 0;
6598 int i;
6599
6600 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006601 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006602
6603 name = xmlParseName(ctxt);
6604 if (name == NULL) {
6605 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6607 ctxt->sax->error(ctxt->userData,
6608 "xmlParseStartTag: invalid element name\n");
6609 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006610 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006611 return(NULL);
6612 }
6613
6614 /*
6615 * Now parse the attributes, it ends up with the ending
6616 *
6617 * (S Attribute)* S?
6618 */
6619 SKIP_BLANKS;
6620 GROW;
6621
Daniel Veillard21a0f912001-02-25 19:54:14 +00006622 while ((RAW != '>') &&
6623 ((RAW != '/') || (NXT(1) != '>')) &&
6624 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006625 const xmlChar *q = CUR_PTR;
6626 int cons = ctxt->input->consumed;
6627
6628 attname = xmlParseAttribute(ctxt, &attvalue);
6629 if ((attname != NULL) && (attvalue != NULL)) {
6630 /*
6631 * [ WFC: Unique Att Spec ]
6632 * No attribute name may appear more than once in the same
6633 * start-tag or empty-element tag.
6634 */
6635 for (i = 0; i < nbatts;i += 2) {
6636 if (xmlStrEqual(atts[i], attname)) {
6637 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6639 ctxt->sax->error(ctxt->userData,
6640 "Attribute %s redefined\n",
6641 attname);
6642 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006643 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006644 xmlFree(attname);
6645 xmlFree(attvalue);
6646 goto failed;
6647 }
6648 }
6649
6650 /*
6651 * Add the pair to atts
6652 */
6653 if (atts == NULL) {
6654 maxatts = 10;
6655 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6656 if (atts == NULL) {
6657 xmlGenericError(xmlGenericErrorContext,
6658 "malloc of %ld byte failed\n",
6659 maxatts * (long)sizeof(xmlChar *));
6660 return(NULL);
6661 }
6662 } else if (nbatts + 4 > maxatts) {
6663 maxatts *= 2;
6664 atts = (const xmlChar **) xmlRealloc((void *) atts,
6665 maxatts * sizeof(xmlChar *));
6666 if (atts == NULL) {
6667 xmlGenericError(xmlGenericErrorContext,
6668 "realloc of %ld byte failed\n",
6669 maxatts * (long)sizeof(xmlChar *));
6670 return(NULL);
6671 }
6672 }
6673 atts[nbatts++] = attname;
6674 atts[nbatts++] = attvalue;
6675 atts[nbatts] = NULL;
6676 atts[nbatts + 1] = NULL;
6677 } else {
6678 if (attname != NULL)
6679 xmlFree(attname);
6680 if (attvalue != NULL)
6681 xmlFree(attvalue);
6682 }
6683
6684failed:
6685
6686 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6687 break;
6688 if (!IS_BLANK(RAW)) {
6689 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6690 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6691 ctxt->sax->error(ctxt->userData,
6692 "attributes construct error\n");
6693 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006694 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006695 }
6696 SKIP_BLANKS;
6697 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6698 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6699 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6700 ctxt->sax->error(ctxt->userData,
6701 "xmlParseStartTag: problem parsing attributes\n");
6702 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006703 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006704 break;
6705 }
6706 GROW;
6707 }
6708
6709 /*
6710 * SAX: Start of Element !
6711 */
6712 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6713 (!ctxt->disableSAX))
6714 ctxt->sax->startElement(ctxt->userData, name, atts);
6715
6716 if (atts != NULL) {
6717 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6718 xmlFree((void *) atts);
6719 }
6720 return(name);
6721}
6722
6723/**
6724 * xmlParseEndTag:
6725 * @ctxt: an XML parser context
6726 *
6727 * parse an end of tag
6728 *
6729 * [42] ETag ::= '</' Name S? '>'
6730 *
6731 * With namespace
6732 *
6733 * [NS 9] ETag ::= '</' QName S? '>'
6734 */
6735
6736void
6737xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6738 xmlChar *name;
6739 xmlChar *oldname;
6740
6741 GROW;
6742 if ((RAW != '<') || (NXT(1) != '/')) {
6743 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6745 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6746 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006747 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006748 return;
6749 }
6750 SKIP(2);
6751
Daniel Veillard46de64e2002-05-29 08:21:33 +00006752 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006753
6754 /*
6755 * We should definitely be at the ending "S? '>'" part
6756 */
6757 GROW;
6758 SKIP_BLANKS;
6759 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6760 ctxt->errNo = XML_ERR_GT_REQUIRED;
6761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6762 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6763 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006764 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006765 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006766 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006767
6768 /*
6769 * [ WFC: Element Type Match ]
6770 * The Name in an element's end-tag must match the element type in the
6771 * start-tag.
6772 *
6773 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006774 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006775 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006777 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006778 ctxt->sax->error(ctxt->userData,
6779 "Opening and ending tag mismatch: %s and %s\n",
6780 ctxt->name, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006781 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006782 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006783 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006784 }
6785
6786 }
6787 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006788 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6789#if 0
6790 else {
6791 /*
6792 * Recover in case of one missing close
6793 */
6794 if ((ctxt->nameNr > 2) &&
6795 (xmlStrEqual(ctxt->nameTab[ctxt->nameNr -2], name))) {
6796 namePop(ctxt);
6797 spacePop(ctxt);
6798 }
6799 }
6800#endif
6801 if (name != NULL)
6802 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006803 }
6804
6805 /*
6806 * SAX: End of Tag
6807 */
6808 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6809 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006810 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006811
Owen Taylor3473f882001-02-23 17:55:21 +00006812 oldname = namePop(ctxt);
6813 spacePop(ctxt);
6814 if (oldname != NULL) {
6815#ifdef DEBUG_STACK
6816 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6817#endif
6818 xmlFree(oldname);
6819 }
6820 return;
6821}
6822
6823/**
6824 * xmlParseCDSect:
6825 * @ctxt: an XML parser context
6826 *
6827 * Parse escaped pure raw content.
6828 *
6829 * [18] CDSect ::= CDStart CData CDEnd
6830 *
6831 * [19] CDStart ::= '<![CDATA['
6832 *
6833 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6834 *
6835 * [21] CDEnd ::= ']]>'
6836 */
6837void
6838xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6839 xmlChar *buf = NULL;
6840 int len = 0;
6841 int size = XML_PARSER_BUFFER_SIZE;
6842 int r, rl;
6843 int s, sl;
6844 int cur, l;
6845 int count = 0;
6846
6847 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6848 (NXT(2) == '[') && (NXT(3) == 'C') &&
6849 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6850 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6851 (NXT(8) == '[')) {
6852 SKIP(9);
6853 } else
6854 return;
6855
6856 ctxt->instate = XML_PARSER_CDATA_SECTION;
6857 r = CUR_CHAR(rl);
6858 if (!IS_CHAR(r)) {
6859 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6861 ctxt->sax->error(ctxt->userData,
6862 "CData section not finished\n");
6863 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006864 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006865 ctxt->instate = XML_PARSER_CONTENT;
6866 return;
6867 }
6868 NEXTL(rl);
6869 s = CUR_CHAR(sl);
6870 if (!IS_CHAR(s)) {
6871 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6873 ctxt->sax->error(ctxt->userData,
6874 "CData section not finished\n");
6875 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006876 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006877 ctxt->instate = XML_PARSER_CONTENT;
6878 return;
6879 }
6880 NEXTL(sl);
6881 cur = CUR_CHAR(l);
6882 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6883 if (buf == NULL) {
6884 xmlGenericError(xmlGenericErrorContext,
6885 "malloc of %d byte failed\n", size);
6886 return;
6887 }
6888 while (IS_CHAR(cur) &&
6889 ((r != ']') || (s != ']') || (cur != '>'))) {
6890 if (len + 5 >= size) {
6891 size *= 2;
6892 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6893 if (buf == NULL) {
6894 xmlGenericError(xmlGenericErrorContext,
6895 "realloc of %d byte failed\n", size);
6896 return;
6897 }
6898 }
6899 COPY_BUF(rl,buf,len,r);
6900 r = s;
6901 rl = sl;
6902 s = cur;
6903 sl = l;
6904 count++;
6905 if (count > 50) {
6906 GROW;
6907 count = 0;
6908 }
6909 NEXTL(l);
6910 cur = CUR_CHAR(l);
6911 }
6912 buf[len] = 0;
6913 ctxt->instate = XML_PARSER_CONTENT;
6914 if (cur != '>') {
6915 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6917 ctxt->sax->error(ctxt->userData,
6918 "CData section not finished\n%.50s\n", buf);
6919 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006920 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006921 xmlFree(buf);
6922 return;
6923 }
6924 NEXTL(l);
6925
6926 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006927 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006928 */
6929 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6930 if (ctxt->sax->cdataBlock != NULL)
6931 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006932 else if (ctxt->sax->characters != NULL)
6933 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006934 }
6935 xmlFree(buf);
6936}
6937
6938/**
6939 * xmlParseContent:
6940 * @ctxt: an XML parser context
6941 *
6942 * Parse a content:
6943 *
6944 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6945 */
6946
6947void
6948xmlParseContent(xmlParserCtxtPtr ctxt) {
6949 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00006950 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006951 ((RAW != '<') || (NXT(1) != '/'))) {
6952 const xmlChar *test = CUR_PTR;
6953 int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006954 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006955
6956 /*
Owen Taylor3473f882001-02-23 17:55:21 +00006957 * First case : a Processing Instruction.
6958 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00006959 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006960 xmlParsePI(ctxt);
6961 }
6962
6963 /*
6964 * Second case : a CDSection
6965 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006966 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006967 (NXT(2) == '[') && (NXT(3) == 'C') &&
6968 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6969 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6970 (NXT(8) == '[')) {
6971 xmlParseCDSect(ctxt);
6972 }
6973
6974 /*
6975 * Third case : a comment
6976 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006977 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006978 (NXT(2) == '-') && (NXT(3) == '-')) {
6979 xmlParseComment(ctxt);
6980 ctxt->instate = XML_PARSER_CONTENT;
6981 }
6982
6983 /*
6984 * Fourth case : a sub-element.
6985 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006986 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006987 xmlParseElement(ctxt);
6988 }
6989
6990 /*
6991 * Fifth case : a reference. If if has not been resolved,
6992 * parsing returns it's Name, create the node
6993 */
6994
Daniel Veillard21a0f912001-02-25 19:54:14 +00006995 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006996 xmlParseReference(ctxt);
6997 }
6998
6999 /*
7000 * Last case, text. Note that References are handled directly.
7001 */
7002 else {
7003 xmlParseCharData(ctxt, 0);
7004 }
7005
7006 GROW;
7007 /*
7008 * Pop-up of finished entities.
7009 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007010 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007011 xmlPopInput(ctxt);
7012 SHRINK;
7013
Daniel Veillardfdc91562002-07-01 21:52:03 +00007014 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007015 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7017 ctxt->sax->error(ctxt->userData,
7018 "detected an error in element content\n");
7019 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007020 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007021 ctxt->instate = XML_PARSER_EOF;
7022 break;
7023 }
7024 }
7025}
7026
7027/**
7028 * xmlParseElement:
7029 * @ctxt: an XML parser context
7030 *
7031 * parse an XML element, this is highly recursive
7032 *
7033 * [39] element ::= EmptyElemTag | STag content ETag
7034 *
7035 * [ WFC: Element Type Match ]
7036 * The Name in an element's end-tag must match the element type in the
7037 * start-tag.
7038 *
7039 * [ VC: Element Valid ]
7040 * An element is valid if there is a declaration matching elementdecl
7041 * where the Name matches the element type and one of the following holds:
7042 * - The declaration matches EMPTY and the element has no content.
7043 * - The declaration matches children and the sequence of child elements
7044 * belongs to the language generated by the regular expression in the
7045 * content model, with optional white space (characters matching the
7046 * nonterminal S) between each pair of child elements.
7047 * - The declaration matches Mixed and the content consists of character
7048 * data and child elements whose types match names in the content model.
7049 * - The declaration matches ANY, and the types of any child elements have
7050 * been declared.
7051 */
7052
7053void
7054xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007055 xmlChar *name;
7056 xmlChar *oldname;
7057 xmlParserNodeInfo node_info;
7058 xmlNodePtr ret;
7059
7060 /* Capture start position */
7061 if (ctxt->record_info) {
7062 node_info.begin_pos = ctxt->input->consumed +
7063 (CUR_PTR - ctxt->input->base);
7064 node_info.begin_line = ctxt->input->line;
7065 }
7066
7067 if (ctxt->spaceNr == 0)
7068 spacePush(ctxt, -1);
7069 else
7070 spacePush(ctxt, *ctxt->space);
7071
7072 name = xmlParseStartTag(ctxt);
7073 if (name == NULL) {
7074 spacePop(ctxt);
7075 return;
7076 }
7077 namePush(ctxt, name);
7078 ret = ctxt->node;
7079
7080 /*
7081 * [ VC: Root Element Type ]
7082 * The Name in the document type declaration must match the element
7083 * type of the root element.
7084 */
7085 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7086 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7087 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7088
7089 /*
7090 * Check for an Empty Element.
7091 */
7092 if ((RAW == '/') && (NXT(1) == '>')) {
7093 SKIP(2);
7094 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7095 (!ctxt->disableSAX))
7096 ctxt->sax->endElement(ctxt->userData, name);
7097 oldname = namePop(ctxt);
7098 spacePop(ctxt);
7099 if (oldname != NULL) {
7100#ifdef DEBUG_STACK
7101 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7102#endif
7103 xmlFree(oldname);
7104 }
7105 if ( ret != NULL && ctxt->record_info ) {
7106 node_info.end_pos = ctxt->input->consumed +
7107 (CUR_PTR - ctxt->input->base);
7108 node_info.end_line = ctxt->input->line;
7109 node_info.node = ret;
7110 xmlParserAddNodeInfo(ctxt, &node_info);
7111 }
7112 return;
7113 }
7114 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007115 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007116 } else {
7117 ctxt->errNo = XML_ERR_GT_REQUIRED;
7118 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7119 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007120 "Couldn't find end of Start Tag %s\n",
7121 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007122 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007123 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007124
7125 /*
7126 * end of parsing of this node.
7127 */
7128 nodePop(ctxt);
7129 oldname = namePop(ctxt);
7130 spacePop(ctxt);
7131 if (oldname != NULL) {
7132#ifdef DEBUG_STACK
7133 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7134#endif
7135 xmlFree(oldname);
7136 }
7137
7138 /*
7139 * Capture end position and add node
7140 */
7141 if ( ret != NULL && ctxt->record_info ) {
7142 node_info.end_pos = ctxt->input->consumed +
7143 (CUR_PTR - ctxt->input->base);
7144 node_info.end_line = ctxt->input->line;
7145 node_info.node = ret;
7146 xmlParserAddNodeInfo(ctxt, &node_info);
7147 }
7148 return;
7149 }
7150
7151 /*
7152 * Parse the content of the element:
7153 */
7154 xmlParseContent(ctxt);
7155 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007156 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7158 ctxt->sax->error(ctxt->userData,
Daniel Veillard38d80e22002-06-11 07:24:56 +00007159 "Premature end of data in tag %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007160 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007161 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007162
7163 /*
7164 * end of parsing of this node.
7165 */
7166 nodePop(ctxt);
7167 oldname = namePop(ctxt);
7168 spacePop(ctxt);
7169 if (oldname != NULL) {
7170#ifdef DEBUG_STACK
7171 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7172#endif
7173 xmlFree(oldname);
7174 }
7175 return;
7176 }
7177
7178 /*
7179 * parse the end of tag: '</' should be here.
7180 */
7181 xmlParseEndTag(ctxt);
7182
7183 /*
7184 * Capture end position and add node
7185 */
7186 if ( ret != NULL && ctxt->record_info ) {
7187 node_info.end_pos = ctxt->input->consumed +
7188 (CUR_PTR - ctxt->input->base);
7189 node_info.end_line = ctxt->input->line;
7190 node_info.node = ret;
7191 xmlParserAddNodeInfo(ctxt, &node_info);
7192 }
7193}
7194
7195/**
7196 * xmlParseVersionNum:
7197 * @ctxt: an XML parser context
7198 *
7199 * parse the XML version value.
7200 *
7201 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7202 *
7203 * Returns the string giving the XML version number, or NULL
7204 */
7205xmlChar *
7206xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7207 xmlChar *buf = NULL;
7208 int len = 0;
7209 int size = 10;
7210 xmlChar cur;
7211
7212 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7213 if (buf == NULL) {
7214 xmlGenericError(xmlGenericErrorContext,
7215 "malloc of %d byte failed\n", size);
7216 return(NULL);
7217 }
7218 cur = CUR;
7219 while (((cur >= 'a') && (cur <= 'z')) ||
7220 ((cur >= 'A') && (cur <= 'Z')) ||
7221 ((cur >= '0') && (cur <= '9')) ||
7222 (cur == '_') || (cur == '.') ||
7223 (cur == ':') || (cur == '-')) {
7224 if (len + 1 >= size) {
7225 size *= 2;
7226 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7227 if (buf == NULL) {
7228 xmlGenericError(xmlGenericErrorContext,
7229 "realloc of %d byte failed\n", size);
7230 return(NULL);
7231 }
7232 }
7233 buf[len++] = cur;
7234 NEXT;
7235 cur=CUR;
7236 }
7237 buf[len] = 0;
7238 return(buf);
7239}
7240
7241/**
7242 * xmlParseVersionInfo:
7243 * @ctxt: an XML parser context
7244 *
7245 * parse the XML version.
7246 *
7247 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7248 *
7249 * [25] Eq ::= S? '=' S?
7250 *
7251 * Returns the version string, e.g. "1.0"
7252 */
7253
7254xmlChar *
7255xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7256 xmlChar *version = NULL;
7257 const xmlChar *q;
7258
7259 if ((RAW == 'v') && (NXT(1) == 'e') &&
7260 (NXT(2) == 'r') && (NXT(3) == 's') &&
7261 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7262 (NXT(6) == 'n')) {
7263 SKIP(7);
7264 SKIP_BLANKS;
7265 if (RAW != '=') {
7266 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7267 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7268 ctxt->sax->error(ctxt->userData,
7269 "xmlParseVersionInfo : expected '='\n");
7270 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007271 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007272 return(NULL);
7273 }
7274 NEXT;
7275 SKIP_BLANKS;
7276 if (RAW == '"') {
7277 NEXT;
7278 q = CUR_PTR;
7279 version = xmlParseVersionNum(ctxt);
7280 if (RAW != '"') {
7281 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7282 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7283 ctxt->sax->error(ctxt->userData,
7284 "String not closed\n%.50s\n", q);
7285 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007286 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007287 } else
7288 NEXT;
7289 } else if (RAW == '\''){
7290 NEXT;
7291 q = CUR_PTR;
7292 version = xmlParseVersionNum(ctxt);
7293 if (RAW != '\'') {
7294 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7295 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7296 ctxt->sax->error(ctxt->userData,
7297 "String not closed\n%.50s\n", q);
7298 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007299 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007300 } else
7301 NEXT;
7302 } else {
7303 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7304 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7305 ctxt->sax->error(ctxt->userData,
7306 "xmlParseVersionInfo : expected ' or \"\n");
7307 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007308 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007309 }
7310 }
7311 return(version);
7312}
7313
7314/**
7315 * xmlParseEncName:
7316 * @ctxt: an XML parser context
7317 *
7318 * parse the XML encoding name
7319 *
7320 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7321 *
7322 * Returns the encoding name value or NULL
7323 */
7324xmlChar *
7325xmlParseEncName(xmlParserCtxtPtr ctxt) {
7326 xmlChar *buf = NULL;
7327 int len = 0;
7328 int size = 10;
7329 xmlChar cur;
7330
7331 cur = CUR;
7332 if (((cur >= 'a') && (cur <= 'z')) ||
7333 ((cur >= 'A') && (cur <= 'Z'))) {
7334 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7335 if (buf == NULL) {
7336 xmlGenericError(xmlGenericErrorContext,
7337 "malloc of %d byte failed\n", size);
7338 return(NULL);
7339 }
7340
7341 buf[len++] = cur;
7342 NEXT;
7343 cur = CUR;
7344 while (((cur >= 'a') && (cur <= 'z')) ||
7345 ((cur >= 'A') && (cur <= 'Z')) ||
7346 ((cur >= '0') && (cur <= '9')) ||
7347 (cur == '.') || (cur == '_') ||
7348 (cur == '-')) {
7349 if (len + 1 >= size) {
7350 size *= 2;
7351 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7352 if (buf == NULL) {
7353 xmlGenericError(xmlGenericErrorContext,
7354 "realloc of %d byte failed\n", size);
7355 return(NULL);
7356 }
7357 }
7358 buf[len++] = cur;
7359 NEXT;
7360 cur = CUR;
7361 if (cur == 0) {
7362 SHRINK;
7363 GROW;
7364 cur = CUR;
7365 }
7366 }
7367 buf[len] = 0;
7368 } else {
7369 ctxt->errNo = XML_ERR_ENCODING_NAME;
7370 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7371 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7372 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007373 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007374 }
7375 return(buf);
7376}
7377
7378/**
7379 * xmlParseEncodingDecl:
7380 * @ctxt: an XML parser context
7381 *
7382 * parse the XML encoding declaration
7383 *
7384 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7385 *
7386 * this setups the conversion filters.
7387 *
7388 * Returns the encoding value or NULL
7389 */
7390
7391xmlChar *
7392xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7393 xmlChar *encoding = NULL;
7394 const xmlChar *q;
7395
7396 SKIP_BLANKS;
7397 if ((RAW == 'e') && (NXT(1) == 'n') &&
7398 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7399 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7400 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7401 SKIP(8);
7402 SKIP_BLANKS;
7403 if (RAW != '=') {
7404 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7406 ctxt->sax->error(ctxt->userData,
7407 "xmlParseEncodingDecl : expected '='\n");
7408 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007409 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007410 return(NULL);
7411 }
7412 NEXT;
7413 SKIP_BLANKS;
7414 if (RAW == '"') {
7415 NEXT;
7416 q = CUR_PTR;
7417 encoding = xmlParseEncName(ctxt);
7418 if (RAW != '"') {
7419 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7420 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7421 ctxt->sax->error(ctxt->userData,
7422 "String not closed\n%.50s\n", q);
7423 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007424 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007425 } else
7426 NEXT;
7427 } else if (RAW == '\''){
7428 NEXT;
7429 q = CUR_PTR;
7430 encoding = xmlParseEncName(ctxt);
7431 if (RAW != '\'') {
7432 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7433 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7434 ctxt->sax->error(ctxt->userData,
7435 "String not closed\n%.50s\n", q);
7436 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007437 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007438 } else
7439 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007440 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007441 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7443 ctxt->sax->error(ctxt->userData,
7444 "xmlParseEncodingDecl : expected ' or \"\n");
7445 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007446 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007447 }
7448 if (encoding != NULL) {
7449 xmlCharEncoding enc;
7450 xmlCharEncodingHandlerPtr handler;
7451
7452 if (ctxt->input->encoding != NULL)
7453 xmlFree((xmlChar *) ctxt->input->encoding);
7454 ctxt->input->encoding = encoding;
7455
7456 enc = xmlParseCharEncoding((const char *) encoding);
7457 /*
7458 * registered set of known encodings
7459 */
7460 if (enc != XML_CHAR_ENCODING_ERROR) {
7461 xmlSwitchEncoding(ctxt, enc);
7462 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007463 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007464 xmlFree(encoding);
7465 return(NULL);
7466 }
7467 } else {
7468 /*
7469 * fallback for unknown encodings
7470 */
7471 handler = xmlFindCharEncodingHandler((const char *) encoding);
7472 if (handler != NULL) {
7473 xmlSwitchToEncoding(ctxt, handler);
7474 } else {
7475 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7477 ctxt->sax->error(ctxt->userData,
7478 "Unsupported encoding %s\n", encoding);
7479 return(NULL);
7480 }
7481 }
7482 }
7483 }
7484 return(encoding);
7485}
7486
7487/**
7488 * xmlParseSDDecl:
7489 * @ctxt: an XML parser context
7490 *
7491 * parse the XML standalone declaration
7492 *
7493 * [32] SDDecl ::= S 'standalone' Eq
7494 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7495 *
7496 * [ VC: Standalone Document Declaration ]
7497 * TODO The standalone document declaration must have the value "no"
7498 * if any external markup declarations contain declarations of:
7499 * - attributes with default values, if elements to which these
7500 * attributes apply appear in the document without specifications
7501 * of values for these attributes, or
7502 * - entities (other than amp, lt, gt, apos, quot), if references
7503 * to those entities appear in the document, or
7504 * - attributes with values subject to normalization, where the
7505 * attribute appears in the document with a value which will change
7506 * as a result of normalization, or
7507 * - element types with element content, if white space occurs directly
7508 * within any instance of those types.
7509 *
7510 * Returns 1 if standalone, 0 otherwise
7511 */
7512
7513int
7514xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7515 int standalone = -1;
7516
7517 SKIP_BLANKS;
7518 if ((RAW == 's') && (NXT(1) == 't') &&
7519 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7520 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7521 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7522 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7523 SKIP(10);
7524 SKIP_BLANKS;
7525 if (RAW != '=') {
7526 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7527 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7528 ctxt->sax->error(ctxt->userData,
7529 "XML standalone declaration : expected '='\n");
7530 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007531 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007532 return(standalone);
7533 }
7534 NEXT;
7535 SKIP_BLANKS;
7536 if (RAW == '\''){
7537 NEXT;
7538 if ((RAW == 'n') && (NXT(1) == 'o')) {
7539 standalone = 0;
7540 SKIP(2);
7541 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7542 (NXT(2) == 's')) {
7543 standalone = 1;
7544 SKIP(3);
7545 } else {
7546 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7548 ctxt->sax->error(ctxt->userData,
7549 "standalone accepts only 'yes' or 'no'\n");
7550 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007551 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007552 }
7553 if (RAW != '\'') {
7554 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7556 ctxt->sax->error(ctxt->userData, "String not closed\n");
7557 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007558 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007559 } else
7560 NEXT;
7561 } else if (RAW == '"'){
7562 NEXT;
7563 if ((RAW == 'n') && (NXT(1) == 'o')) {
7564 standalone = 0;
7565 SKIP(2);
7566 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7567 (NXT(2) == 's')) {
7568 standalone = 1;
7569 SKIP(3);
7570 } else {
7571 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7573 ctxt->sax->error(ctxt->userData,
7574 "standalone accepts only 'yes' or 'no'\n");
7575 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007576 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007577 }
7578 if (RAW != '"') {
7579 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7581 ctxt->sax->error(ctxt->userData, "String not closed\n");
7582 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007583 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007584 } else
7585 NEXT;
7586 } else {
7587 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7589 ctxt->sax->error(ctxt->userData,
7590 "Standalone value not found\n");
7591 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007592 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007593 }
7594 }
7595 return(standalone);
7596}
7597
7598/**
7599 * xmlParseXMLDecl:
7600 * @ctxt: an XML parser context
7601 *
7602 * parse an XML declaration header
7603 *
7604 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7605 */
7606
7607void
7608xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7609 xmlChar *version;
7610
7611 /*
7612 * We know that '<?xml' is here.
7613 */
7614 SKIP(5);
7615
7616 if (!IS_BLANK(RAW)) {
7617 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7619 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7620 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007621 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007622 }
7623 SKIP_BLANKS;
7624
7625 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007626 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007627 */
7628 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007629 if (version == NULL) {
7630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7631 ctxt->sax->error(ctxt->userData,
7632 "Malformed declaration expecting version\n");
7633 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007634 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007635 } else {
7636 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7637 /*
7638 * TODO: Blueberry should be detected here
7639 */
7640 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7641 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7642 version);
7643 }
7644 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007645 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007646 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007647 }
Owen Taylor3473f882001-02-23 17:55:21 +00007648
7649 /*
7650 * We may have the encoding declaration
7651 */
7652 if (!IS_BLANK(RAW)) {
7653 if ((RAW == '?') && (NXT(1) == '>')) {
7654 SKIP(2);
7655 return;
7656 }
7657 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7659 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7660 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007661 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007662 }
7663 xmlParseEncodingDecl(ctxt);
7664 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7665 /*
7666 * The XML REC instructs us to stop parsing right here
7667 */
7668 return;
7669 }
7670
7671 /*
7672 * We may have the standalone status.
7673 */
7674 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7675 if ((RAW == '?') && (NXT(1) == '>')) {
7676 SKIP(2);
7677 return;
7678 }
7679 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7681 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7682 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007683 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007684 }
7685 SKIP_BLANKS;
7686 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7687
7688 SKIP_BLANKS;
7689 if ((RAW == '?') && (NXT(1) == '>')) {
7690 SKIP(2);
7691 } else if (RAW == '>') {
7692 /* Deprecated old WD ... */
7693 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7695 ctxt->sax->error(ctxt->userData,
7696 "XML declaration must end-up with '?>'\n");
7697 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007698 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007699 NEXT;
7700 } else {
7701 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7703 ctxt->sax->error(ctxt->userData,
7704 "parsing XML declaration: '?>' expected\n");
7705 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007706 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007707 MOVETO_ENDTAG(CUR_PTR);
7708 NEXT;
7709 }
7710}
7711
7712/**
7713 * xmlParseMisc:
7714 * @ctxt: an XML parser context
7715 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007716 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007717 *
7718 * [27] Misc ::= Comment | PI | S
7719 */
7720
7721void
7722xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007723 while (((RAW == '<') && (NXT(1) == '?')) ||
7724 ((RAW == '<') && (NXT(1) == '!') &&
7725 (NXT(2) == '-') && (NXT(3) == '-')) ||
7726 IS_BLANK(CUR)) {
7727 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007728 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007729 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007730 NEXT;
7731 } else
7732 xmlParseComment(ctxt);
7733 }
7734}
7735
7736/**
7737 * xmlParseDocument:
7738 * @ctxt: an XML parser context
7739 *
7740 * parse an XML document (and build a tree if using the standard SAX
7741 * interface).
7742 *
7743 * [1] document ::= prolog element Misc*
7744 *
7745 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7746 *
7747 * Returns 0, -1 in case of error. the parser context is augmented
7748 * as a result of the parsing.
7749 */
7750
7751int
7752xmlParseDocument(xmlParserCtxtPtr ctxt) {
7753 xmlChar start[4];
7754 xmlCharEncoding enc;
7755
7756 xmlInitParser();
7757
7758 GROW;
7759
7760 /*
7761 * SAX: beginning of the document processing.
7762 */
7763 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7764 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7765
Daniel Veillard50f34372001-08-03 12:06:36 +00007766 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007767 /*
7768 * Get the 4 first bytes and decode the charset
7769 * if enc != XML_CHAR_ENCODING_NONE
7770 * plug some encoding conversion routines.
7771 */
7772 start[0] = RAW;
7773 start[1] = NXT(1);
7774 start[2] = NXT(2);
7775 start[3] = NXT(3);
7776 enc = xmlDetectCharEncoding(start, 4);
7777 if (enc != XML_CHAR_ENCODING_NONE) {
7778 xmlSwitchEncoding(ctxt, enc);
7779 }
Owen Taylor3473f882001-02-23 17:55:21 +00007780 }
7781
7782
7783 if (CUR == 0) {
7784 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7786 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7787 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007788 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007789 }
7790
7791 /*
7792 * Check for the XMLDecl in the Prolog.
7793 */
7794 GROW;
7795 if ((RAW == '<') && (NXT(1) == '?') &&
7796 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7797 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7798
7799 /*
7800 * Note that we will switch encoding on the fly.
7801 */
7802 xmlParseXMLDecl(ctxt);
7803 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7804 /*
7805 * The XML REC instructs us to stop parsing right here
7806 */
7807 return(-1);
7808 }
7809 ctxt->standalone = ctxt->input->standalone;
7810 SKIP_BLANKS;
7811 } else {
7812 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7813 }
7814 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7815 ctxt->sax->startDocument(ctxt->userData);
7816
7817 /*
7818 * The Misc part of the Prolog
7819 */
7820 GROW;
7821 xmlParseMisc(ctxt);
7822
7823 /*
7824 * Then possibly doc type declaration(s) and more Misc
7825 * (doctypedecl Misc*)?
7826 */
7827 GROW;
7828 if ((RAW == '<') && (NXT(1) == '!') &&
7829 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7830 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7831 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7832 (NXT(8) == 'E')) {
7833
7834 ctxt->inSubset = 1;
7835 xmlParseDocTypeDecl(ctxt);
7836 if (RAW == '[') {
7837 ctxt->instate = XML_PARSER_DTD;
7838 xmlParseInternalSubset(ctxt);
7839 }
7840
7841 /*
7842 * Create and update the external subset.
7843 */
7844 ctxt->inSubset = 2;
7845 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7846 (!ctxt->disableSAX))
7847 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7848 ctxt->extSubSystem, ctxt->extSubURI);
7849 ctxt->inSubset = 0;
7850
7851
7852 ctxt->instate = XML_PARSER_PROLOG;
7853 xmlParseMisc(ctxt);
7854 }
7855
7856 /*
7857 * Time to start parsing the tree itself
7858 */
7859 GROW;
7860 if (RAW != '<') {
7861 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7863 ctxt->sax->error(ctxt->userData,
7864 "Start tag expected, '<' not found\n");
7865 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007866 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007867 ctxt->instate = XML_PARSER_EOF;
7868 } else {
7869 ctxt->instate = XML_PARSER_CONTENT;
7870 xmlParseElement(ctxt);
7871 ctxt->instate = XML_PARSER_EPILOG;
7872
7873
7874 /*
7875 * The Misc part at the end
7876 */
7877 xmlParseMisc(ctxt);
7878
Daniel Veillard561b7f82002-03-20 21:55:57 +00007879 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007880 ctxt->errNo = XML_ERR_DOCUMENT_END;
7881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7882 ctxt->sax->error(ctxt->userData,
7883 "Extra content at the end of the document\n");
7884 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007885 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007886 }
7887 ctxt->instate = XML_PARSER_EOF;
7888 }
7889
7890 /*
7891 * SAX: end of the document processing.
7892 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007893 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007894 ctxt->sax->endDocument(ctxt->userData);
7895
Daniel Veillard5997aca2002-03-18 18:36:20 +00007896 /*
7897 * Remove locally kept entity definitions if the tree was not built
7898 */
7899 if ((ctxt->myDoc != NULL) &&
7900 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7901 xmlFreeDoc(ctxt->myDoc);
7902 ctxt->myDoc = NULL;
7903 }
7904
Daniel Veillardc7612992002-02-17 22:47:37 +00007905 if (! ctxt->wellFormed) {
7906 ctxt->valid = 0;
7907 return(-1);
7908 }
Owen Taylor3473f882001-02-23 17:55:21 +00007909 return(0);
7910}
7911
7912/**
7913 * xmlParseExtParsedEnt:
7914 * @ctxt: an XML parser context
7915 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007916 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007917 * An external general parsed entity is well-formed if it matches the
7918 * production labeled extParsedEnt.
7919 *
7920 * [78] extParsedEnt ::= TextDecl? content
7921 *
7922 * Returns 0, -1 in case of error. the parser context is augmented
7923 * as a result of the parsing.
7924 */
7925
7926int
7927xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7928 xmlChar start[4];
7929 xmlCharEncoding enc;
7930
7931 xmlDefaultSAXHandlerInit();
7932
7933 GROW;
7934
7935 /*
7936 * SAX: beginning of the document processing.
7937 */
7938 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7939 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7940
7941 /*
7942 * Get the 4 first bytes and decode the charset
7943 * if enc != XML_CHAR_ENCODING_NONE
7944 * plug some encoding conversion routines.
7945 */
7946 start[0] = RAW;
7947 start[1] = NXT(1);
7948 start[2] = NXT(2);
7949 start[3] = NXT(3);
7950 enc = xmlDetectCharEncoding(start, 4);
7951 if (enc != XML_CHAR_ENCODING_NONE) {
7952 xmlSwitchEncoding(ctxt, enc);
7953 }
7954
7955
7956 if (CUR == 0) {
7957 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7959 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7960 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007961 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007962 }
7963
7964 /*
7965 * Check for the XMLDecl in the Prolog.
7966 */
7967 GROW;
7968 if ((RAW == '<') && (NXT(1) == '?') &&
7969 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7970 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7971
7972 /*
7973 * Note that we will switch encoding on the fly.
7974 */
7975 xmlParseXMLDecl(ctxt);
7976 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7977 /*
7978 * The XML REC instructs us to stop parsing right here
7979 */
7980 return(-1);
7981 }
7982 SKIP_BLANKS;
7983 } else {
7984 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7985 }
7986 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7987 ctxt->sax->startDocument(ctxt->userData);
7988
7989 /*
7990 * Doing validity checking on chunk doesn't make sense
7991 */
7992 ctxt->instate = XML_PARSER_CONTENT;
7993 ctxt->validate = 0;
7994 ctxt->loadsubset = 0;
7995 ctxt->depth = 0;
7996
7997 xmlParseContent(ctxt);
7998
7999 if ((RAW == '<') && (NXT(1) == '/')) {
8000 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8001 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8002 ctxt->sax->error(ctxt->userData,
8003 "chunk is not well balanced\n");
8004 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008005 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008006 } else if (RAW != 0) {
8007 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8009 ctxt->sax->error(ctxt->userData,
8010 "extra content at the end of well balanced chunk\n");
8011 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008012 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008013 }
8014
8015 /*
8016 * SAX: end of the document processing.
8017 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008018 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008019 ctxt->sax->endDocument(ctxt->userData);
8020
8021 if (! ctxt->wellFormed) return(-1);
8022 return(0);
8023}
8024
8025/************************************************************************
8026 * *
8027 * Progressive parsing interfaces *
8028 * *
8029 ************************************************************************/
8030
8031/**
8032 * xmlParseLookupSequence:
8033 * @ctxt: an XML parser context
8034 * @first: the first char to lookup
8035 * @next: the next char to lookup or zero
8036 * @third: the next char to lookup or zero
8037 *
8038 * Try to find if a sequence (first, next, third) or just (first next) or
8039 * (first) is available in the input stream.
8040 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8041 * to avoid rescanning sequences of bytes, it DOES change the state of the
8042 * parser, do not use liberally.
8043 *
8044 * Returns the index to the current parsing point if the full sequence
8045 * is available, -1 otherwise.
8046 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008047static int
Owen Taylor3473f882001-02-23 17:55:21 +00008048xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8049 xmlChar next, xmlChar third) {
8050 int base, len;
8051 xmlParserInputPtr in;
8052 const xmlChar *buf;
8053
8054 in = ctxt->input;
8055 if (in == NULL) return(-1);
8056 base = in->cur - in->base;
8057 if (base < 0) return(-1);
8058 if (ctxt->checkIndex > base)
8059 base = ctxt->checkIndex;
8060 if (in->buf == NULL) {
8061 buf = in->base;
8062 len = in->length;
8063 } else {
8064 buf = in->buf->buffer->content;
8065 len = in->buf->buffer->use;
8066 }
8067 /* take into account the sequence length */
8068 if (third) len -= 2;
8069 else if (next) len --;
8070 for (;base < len;base++) {
8071 if (buf[base] == first) {
8072 if (third != 0) {
8073 if ((buf[base + 1] != next) ||
8074 (buf[base + 2] != third)) continue;
8075 } else if (next != 0) {
8076 if (buf[base + 1] != next) continue;
8077 }
8078 ctxt->checkIndex = 0;
8079#ifdef DEBUG_PUSH
8080 if (next == 0)
8081 xmlGenericError(xmlGenericErrorContext,
8082 "PP: lookup '%c' found at %d\n",
8083 first, base);
8084 else if (third == 0)
8085 xmlGenericError(xmlGenericErrorContext,
8086 "PP: lookup '%c%c' found at %d\n",
8087 first, next, base);
8088 else
8089 xmlGenericError(xmlGenericErrorContext,
8090 "PP: lookup '%c%c%c' found at %d\n",
8091 first, next, third, base);
8092#endif
8093 return(base - (in->cur - in->base));
8094 }
8095 }
8096 ctxt->checkIndex = base;
8097#ifdef DEBUG_PUSH
8098 if (next == 0)
8099 xmlGenericError(xmlGenericErrorContext,
8100 "PP: lookup '%c' failed\n", first);
8101 else if (third == 0)
8102 xmlGenericError(xmlGenericErrorContext,
8103 "PP: lookup '%c%c' failed\n", first, next);
8104 else
8105 xmlGenericError(xmlGenericErrorContext,
8106 "PP: lookup '%c%c%c' failed\n", first, next, third);
8107#endif
8108 return(-1);
8109}
8110
8111/**
8112 * xmlParseTryOrFinish:
8113 * @ctxt: an XML parser context
8114 * @terminate: last chunk indicator
8115 *
8116 * Try to progress on parsing
8117 *
8118 * Returns zero if no parsing was possible
8119 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008120static int
Owen Taylor3473f882001-02-23 17:55:21 +00008121xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8122 int ret = 0;
8123 int avail;
8124 xmlChar cur, next;
8125
8126#ifdef DEBUG_PUSH
8127 switch (ctxt->instate) {
8128 case XML_PARSER_EOF:
8129 xmlGenericError(xmlGenericErrorContext,
8130 "PP: try EOF\n"); break;
8131 case XML_PARSER_START:
8132 xmlGenericError(xmlGenericErrorContext,
8133 "PP: try START\n"); break;
8134 case XML_PARSER_MISC:
8135 xmlGenericError(xmlGenericErrorContext,
8136 "PP: try MISC\n");break;
8137 case XML_PARSER_COMMENT:
8138 xmlGenericError(xmlGenericErrorContext,
8139 "PP: try COMMENT\n");break;
8140 case XML_PARSER_PROLOG:
8141 xmlGenericError(xmlGenericErrorContext,
8142 "PP: try PROLOG\n");break;
8143 case XML_PARSER_START_TAG:
8144 xmlGenericError(xmlGenericErrorContext,
8145 "PP: try START_TAG\n");break;
8146 case XML_PARSER_CONTENT:
8147 xmlGenericError(xmlGenericErrorContext,
8148 "PP: try CONTENT\n");break;
8149 case XML_PARSER_CDATA_SECTION:
8150 xmlGenericError(xmlGenericErrorContext,
8151 "PP: try CDATA_SECTION\n");break;
8152 case XML_PARSER_END_TAG:
8153 xmlGenericError(xmlGenericErrorContext,
8154 "PP: try END_TAG\n");break;
8155 case XML_PARSER_ENTITY_DECL:
8156 xmlGenericError(xmlGenericErrorContext,
8157 "PP: try ENTITY_DECL\n");break;
8158 case XML_PARSER_ENTITY_VALUE:
8159 xmlGenericError(xmlGenericErrorContext,
8160 "PP: try ENTITY_VALUE\n");break;
8161 case XML_PARSER_ATTRIBUTE_VALUE:
8162 xmlGenericError(xmlGenericErrorContext,
8163 "PP: try ATTRIBUTE_VALUE\n");break;
8164 case XML_PARSER_DTD:
8165 xmlGenericError(xmlGenericErrorContext,
8166 "PP: try DTD\n");break;
8167 case XML_PARSER_EPILOG:
8168 xmlGenericError(xmlGenericErrorContext,
8169 "PP: try EPILOG\n");break;
8170 case XML_PARSER_PI:
8171 xmlGenericError(xmlGenericErrorContext,
8172 "PP: try PI\n");break;
8173 case XML_PARSER_IGNORE:
8174 xmlGenericError(xmlGenericErrorContext,
8175 "PP: try IGNORE\n");break;
8176 }
8177#endif
8178
8179 while (1) {
Aleksey Sanine48a3182002-05-09 18:20:01 +00008180 SHRINK;
8181
Owen Taylor3473f882001-02-23 17:55:21 +00008182 /*
8183 * Pop-up of finished entities.
8184 */
8185 while ((RAW == 0) && (ctxt->inputNr > 1))
8186 xmlPopInput(ctxt);
8187
8188 if (ctxt->input ==NULL) break;
8189 if (ctxt->input->buf == NULL)
8190 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008191 else {
8192 /*
8193 * If we are operating on converted input, try to flush
8194 * remainng chars to avoid them stalling in the non-converted
8195 * buffer.
8196 */
8197 if ((ctxt->input->buf->raw != NULL) &&
8198 (ctxt->input->buf->raw->use > 0)) {
8199 int base = ctxt->input->base -
8200 ctxt->input->buf->buffer->content;
8201 int current = ctxt->input->cur - ctxt->input->base;
8202
8203 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8204 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8205 ctxt->input->cur = ctxt->input->base + current;
8206 ctxt->input->end =
8207 &ctxt->input->buf->buffer->content[
8208 ctxt->input->buf->buffer->use];
8209 }
8210 avail = ctxt->input->buf->buffer->use -
8211 (ctxt->input->cur - ctxt->input->base);
8212 }
Owen Taylor3473f882001-02-23 17:55:21 +00008213 if (avail < 1)
8214 goto done;
8215 switch (ctxt->instate) {
8216 case XML_PARSER_EOF:
8217 /*
8218 * Document parsing is done !
8219 */
8220 goto done;
8221 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008222 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8223 xmlChar start[4];
8224 xmlCharEncoding enc;
8225
8226 /*
8227 * Very first chars read from the document flow.
8228 */
8229 if (avail < 4)
8230 goto done;
8231
8232 /*
8233 * Get the 4 first bytes and decode the charset
8234 * if enc != XML_CHAR_ENCODING_NONE
8235 * plug some encoding conversion routines.
8236 */
8237 start[0] = RAW;
8238 start[1] = NXT(1);
8239 start[2] = NXT(2);
8240 start[3] = NXT(3);
8241 enc = xmlDetectCharEncoding(start, 4);
8242 if (enc != XML_CHAR_ENCODING_NONE) {
8243 xmlSwitchEncoding(ctxt, enc);
8244 }
8245 break;
8246 }
Owen Taylor3473f882001-02-23 17:55:21 +00008247
8248 cur = ctxt->input->cur[0];
8249 next = ctxt->input->cur[1];
8250 if (cur == 0) {
8251 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8252 ctxt->sax->setDocumentLocator(ctxt->userData,
8253 &xmlDefaultSAXLocator);
8254 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8256 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8257 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008258 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008259 ctxt->instate = XML_PARSER_EOF;
8260#ifdef DEBUG_PUSH
8261 xmlGenericError(xmlGenericErrorContext,
8262 "PP: entering EOF\n");
8263#endif
8264 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8265 ctxt->sax->endDocument(ctxt->userData);
8266 goto done;
8267 }
8268 if ((cur == '<') && (next == '?')) {
8269 /* PI or XML decl */
8270 if (avail < 5) return(ret);
8271 if ((!terminate) &&
8272 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8273 return(ret);
8274 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8275 ctxt->sax->setDocumentLocator(ctxt->userData,
8276 &xmlDefaultSAXLocator);
8277 if ((ctxt->input->cur[2] == 'x') &&
8278 (ctxt->input->cur[3] == 'm') &&
8279 (ctxt->input->cur[4] == 'l') &&
8280 (IS_BLANK(ctxt->input->cur[5]))) {
8281 ret += 5;
8282#ifdef DEBUG_PUSH
8283 xmlGenericError(xmlGenericErrorContext,
8284 "PP: Parsing XML Decl\n");
8285#endif
8286 xmlParseXMLDecl(ctxt);
8287 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8288 /*
8289 * The XML REC instructs us to stop parsing right
8290 * here
8291 */
8292 ctxt->instate = XML_PARSER_EOF;
8293 return(0);
8294 }
8295 ctxt->standalone = ctxt->input->standalone;
8296 if ((ctxt->encoding == NULL) &&
8297 (ctxt->input->encoding != NULL))
8298 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8299 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8300 (!ctxt->disableSAX))
8301 ctxt->sax->startDocument(ctxt->userData);
8302 ctxt->instate = XML_PARSER_MISC;
8303#ifdef DEBUG_PUSH
8304 xmlGenericError(xmlGenericErrorContext,
8305 "PP: entering MISC\n");
8306#endif
8307 } else {
8308 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8309 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8310 (!ctxt->disableSAX))
8311 ctxt->sax->startDocument(ctxt->userData);
8312 ctxt->instate = XML_PARSER_MISC;
8313#ifdef DEBUG_PUSH
8314 xmlGenericError(xmlGenericErrorContext,
8315 "PP: entering MISC\n");
8316#endif
8317 }
8318 } else {
8319 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8320 ctxt->sax->setDocumentLocator(ctxt->userData,
8321 &xmlDefaultSAXLocator);
8322 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8323 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8324 (!ctxt->disableSAX))
8325 ctxt->sax->startDocument(ctxt->userData);
8326 ctxt->instate = XML_PARSER_MISC;
8327#ifdef DEBUG_PUSH
8328 xmlGenericError(xmlGenericErrorContext,
8329 "PP: entering MISC\n");
8330#endif
8331 }
8332 break;
8333 case XML_PARSER_MISC:
8334 SKIP_BLANKS;
8335 if (ctxt->input->buf == NULL)
8336 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8337 else
8338 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8339 if (avail < 2)
8340 goto done;
8341 cur = ctxt->input->cur[0];
8342 next = ctxt->input->cur[1];
8343 if ((cur == '<') && (next == '?')) {
8344 if ((!terminate) &&
8345 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8346 goto done;
8347#ifdef DEBUG_PUSH
8348 xmlGenericError(xmlGenericErrorContext,
8349 "PP: Parsing PI\n");
8350#endif
8351 xmlParsePI(ctxt);
8352 } else if ((cur == '<') && (next == '!') &&
8353 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8354 if ((!terminate) &&
8355 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8356 goto done;
8357#ifdef DEBUG_PUSH
8358 xmlGenericError(xmlGenericErrorContext,
8359 "PP: Parsing Comment\n");
8360#endif
8361 xmlParseComment(ctxt);
8362 ctxt->instate = XML_PARSER_MISC;
8363 } else if ((cur == '<') && (next == '!') &&
8364 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8365 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8366 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8367 (ctxt->input->cur[8] == 'E')) {
8368 if ((!terminate) &&
8369 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8370 goto done;
8371#ifdef DEBUG_PUSH
8372 xmlGenericError(xmlGenericErrorContext,
8373 "PP: Parsing internal subset\n");
8374#endif
8375 ctxt->inSubset = 1;
8376 xmlParseDocTypeDecl(ctxt);
8377 if (RAW == '[') {
8378 ctxt->instate = XML_PARSER_DTD;
8379#ifdef DEBUG_PUSH
8380 xmlGenericError(xmlGenericErrorContext,
8381 "PP: entering DTD\n");
8382#endif
8383 } else {
8384 /*
8385 * Create and update the external subset.
8386 */
8387 ctxt->inSubset = 2;
8388 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8389 (ctxt->sax->externalSubset != NULL))
8390 ctxt->sax->externalSubset(ctxt->userData,
8391 ctxt->intSubName, ctxt->extSubSystem,
8392 ctxt->extSubURI);
8393 ctxt->inSubset = 0;
8394 ctxt->instate = XML_PARSER_PROLOG;
8395#ifdef DEBUG_PUSH
8396 xmlGenericError(xmlGenericErrorContext,
8397 "PP: entering PROLOG\n");
8398#endif
8399 }
8400 } else if ((cur == '<') && (next == '!') &&
8401 (avail < 9)) {
8402 goto done;
8403 } else {
8404 ctxt->instate = XML_PARSER_START_TAG;
8405#ifdef DEBUG_PUSH
8406 xmlGenericError(xmlGenericErrorContext,
8407 "PP: entering START_TAG\n");
8408#endif
8409 }
8410 break;
8411 case XML_PARSER_IGNORE:
8412 xmlGenericError(xmlGenericErrorContext,
8413 "PP: internal error, state == IGNORE");
8414 ctxt->instate = XML_PARSER_DTD;
8415#ifdef DEBUG_PUSH
8416 xmlGenericError(xmlGenericErrorContext,
8417 "PP: entering DTD\n");
8418#endif
8419 break;
8420 case XML_PARSER_PROLOG:
8421 SKIP_BLANKS;
8422 if (ctxt->input->buf == NULL)
8423 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8424 else
8425 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8426 if (avail < 2)
8427 goto done;
8428 cur = ctxt->input->cur[0];
8429 next = ctxt->input->cur[1];
8430 if ((cur == '<') && (next == '?')) {
8431 if ((!terminate) &&
8432 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8433 goto done;
8434#ifdef DEBUG_PUSH
8435 xmlGenericError(xmlGenericErrorContext,
8436 "PP: Parsing PI\n");
8437#endif
8438 xmlParsePI(ctxt);
8439 } else if ((cur == '<') && (next == '!') &&
8440 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8441 if ((!terminate) &&
8442 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8443 goto done;
8444#ifdef DEBUG_PUSH
8445 xmlGenericError(xmlGenericErrorContext,
8446 "PP: Parsing Comment\n");
8447#endif
8448 xmlParseComment(ctxt);
8449 ctxt->instate = XML_PARSER_PROLOG;
8450 } else if ((cur == '<') && (next == '!') &&
8451 (avail < 4)) {
8452 goto done;
8453 } else {
8454 ctxt->instate = XML_PARSER_START_TAG;
8455#ifdef DEBUG_PUSH
8456 xmlGenericError(xmlGenericErrorContext,
8457 "PP: entering START_TAG\n");
8458#endif
8459 }
8460 break;
8461 case XML_PARSER_EPILOG:
8462 SKIP_BLANKS;
8463 if (ctxt->input->buf == NULL)
8464 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8465 else
8466 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8467 if (avail < 2)
8468 goto done;
8469 cur = ctxt->input->cur[0];
8470 next = ctxt->input->cur[1];
8471 if ((cur == '<') && (next == '?')) {
8472 if ((!terminate) &&
8473 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8474 goto done;
8475#ifdef DEBUG_PUSH
8476 xmlGenericError(xmlGenericErrorContext,
8477 "PP: Parsing PI\n");
8478#endif
8479 xmlParsePI(ctxt);
8480 ctxt->instate = XML_PARSER_EPILOG;
8481 } else if ((cur == '<') && (next == '!') &&
8482 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8483 if ((!terminate) &&
8484 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8485 goto done;
8486#ifdef DEBUG_PUSH
8487 xmlGenericError(xmlGenericErrorContext,
8488 "PP: Parsing Comment\n");
8489#endif
8490 xmlParseComment(ctxt);
8491 ctxt->instate = XML_PARSER_EPILOG;
8492 } else if ((cur == '<') && (next == '!') &&
8493 (avail < 4)) {
8494 goto done;
8495 } else {
8496 ctxt->errNo = XML_ERR_DOCUMENT_END;
8497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8498 ctxt->sax->error(ctxt->userData,
8499 "Extra content at the end of the document\n");
8500 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008501 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008502 ctxt->instate = XML_PARSER_EOF;
8503#ifdef DEBUG_PUSH
8504 xmlGenericError(xmlGenericErrorContext,
8505 "PP: entering EOF\n");
8506#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008507 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008508 ctxt->sax->endDocument(ctxt->userData);
8509 goto done;
8510 }
8511 break;
8512 case XML_PARSER_START_TAG: {
8513 xmlChar *name, *oldname;
8514
8515 if ((avail < 2) && (ctxt->inputNr == 1))
8516 goto done;
8517 cur = ctxt->input->cur[0];
8518 if (cur != '<') {
8519 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8521 ctxt->sax->error(ctxt->userData,
8522 "Start tag expect, '<' not found\n");
8523 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008524 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008525 ctxt->instate = XML_PARSER_EOF;
8526#ifdef DEBUG_PUSH
8527 xmlGenericError(xmlGenericErrorContext,
8528 "PP: entering EOF\n");
8529#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008530 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008531 ctxt->sax->endDocument(ctxt->userData);
8532 goto done;
8533 }
8534 if ((!terminate) &&
8535 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8536 goto done;
8537 if (ctxt->spaceNr == 0)
8538 spacePush(ctxt, -1);
8539 else
8540 spacePush(ctxt, *ctxt->space);
8541 name = xmlParseStartTag(ctxt);
8542 if (name == NULL) {
8543 spacePop(ctxt);
8544 ctxt->instate = XML_PARSER_EOF;
8545#ifdef DEBUG_PUSH
8546 xmlGenericError(xmlGenericErrorContext,
8547 "PP: entering EOF\n");
8548#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008549 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008550 ctxt->sax->endDocument(ctxt->userData);
8551 goto done;
8552 }
8553 namePush(ctxt, xmlStrdup(name));
8554
8555 /*
8556 * [ VC: Root Element Type ]
8557 * The Name in the document type declaration must match
8558 * the element type of the root element.
8559 */
8560 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8561 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8562 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8563
8564 /*
8565 * Check for an Empty Element.
8566 */
8567 if ((RAW == '/') && (NXT(1) == '>')) {
8568 SKIP(2);
8569 if ((ctxt->sax != NULL) &&
8570 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8571 ctxt->sax->endElement(ctxt->userData, name);
8572 xmlFree(name);
8573 oldname = namePop(ctxt);
8574 spacePop(ctxt);
8575 if (oldname != NULL) {
8576#ifdef DEBUG_STACK
8577 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8578#endif
8579 xmlFree(oldname);
8580 }
8581 if (ctxt->name == NULL) {
8582 ctxt->instate = XML_PARSER_EPILOG;
8583#ifdef DEBUG_PUSH
8584 xmlGenericError(xmlGenericErrorContext,
8585 "PP: entering EPILOG\n");
8586#endif
8587 } else {
8588 ctxt->instate = XML_PARSER_CONTENT;
8589#ifdef DEBUG_PUSH
8590 xmlGenericError(xmlGenericErrorContext,
8591 "PP: entering CONTENT\n");
8592#endif
8593 }
8594 break;
8595 }
8596 if (RAW == '>') {
8597 NEXT;
8598 } else {
8599 ctxt->errNo = XML_ERR_GT_REQUIRED;
8600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8601 ctxt->sax->error(ctxt->userData,
8602 "Couldn't find end of Start Tag %s\n",
8603 name);
8604 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008605 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008606
8607 /*
8608 * end of parsing of this node.
8609 */
8610 nodePop(ctxt);
8611 oldname = namePop(ctxt);
8612 spacePop(ctxt);
8613 if (oldname != NULL) {
8614#ifdef DEBUG_STACK
8615 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8616#endif
8617 xmlFree(oldname);
8618 }
8619 }
8620 xmlFree(name);
8621 ctxt->instate = XML_PARSER_CONTENT;
8622#ifdef DEBUG_PUSH
8623 xmlGenericError(xmlGenericErrorContext,
8624 "PP: entering CONTENT\n");
8625#endif
8626 break;
8627 }
8628 case XML_PARSER_CONTENT: {
8629 const xmlChar *test;
8630 int cons;
Owen Taylor3473f882001-02-23 17:55:21 +00008631 if ((avail < 2) && (ctxt->inputNr == 1))
8632 goto done;
8633 cur = ctxt->input->cur[0];
8634 next = ctxt->input->cur[1];
8635
8636 test = CUR_PTR;
8637 cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008638 if ((cur == '<') && (next == '?')) {
8639 if ((!terminate) &&
8640 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8641 goto done;
8642#ifdef DEBUG_PUSH
8643 xmlGenericError(xmlGenericErrorContext,
8644 "PP: Parsing PI\n");
8645#endif
8646 xmlParsePI(ctxt);
8647 } else if ((cur == '<') && (next == '!') &&
8648 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8649 if ((!terminate) &&
8650 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8651 goto done;
8652#ifdef DEBUG_PUSH
8653 xmlGenericError(xmlGenericErrorContext,
8654 "PP: Parsing Comment\n");
8655#endif
8656 xmlParseComment(ctxt);
8657 ctxt->instate = XML_PARSER_CONTENT;
8658 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8659 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8660 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8661 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8662 (ctxt->input->cur[8] == '[')) {
8663 SKIP(9);
8664 ctxt->instate = XML_PARSER_CDATA_SECTION;
8665#ifdef DEBUG_PUSH
8666 xmlGenericError(xmlGenericErrorContext,
8667 "PP: entering CDATA_SECTION\n");
8668#endif
8669 break;
8670 } else if ((cur == '<') && (next == '!') &&
8671 (avail < 9)) {
8672 goto done;
8673 } else if ((cur == '<') && (next == '/')) {
8674 ctxt->instate = XML_PARSER_END_TAG;
8675#ifdef DEBUG_PUSH
8676 xmlGenericError(xmlGenericErrorContext,
8677 "PP: entering END_TAG\n");
8678#endif
8679 break;
8680 } else if (cur == '<') {
8681 ctxt->instate = XML_PARSER_START_TAG;
8682#ifdef DEBUG_PUSH
8683 xmlGenericError(xmlGenericErrorContext,
8684 "PP: entering START_TAG\n");
8685#endif
8686 break;
8687 } else if (cur == '&') {
8688 if ((!terminate) &&
8689 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8690 goto done;
8691#ifdef DEBUG_PUSH
8692 xmlGenericError(xmlGenericErrorContext,
8693 "PP: Parsing Reference\n");
8694#endif
8695 xmlParseReference(ctxt);
8696 } else {
8697 /* TODO Avoid the extra copy, handle directly !!! */
8698 /*
8699 * Goal of the following test is:
8700 * - minimize calls to the SAX 'character' callback
8701 * when they are mergeable
8702 * - handle an problem for isBlank when we only parse
8703 * a sequence of blank chars and the next one is
8704 * not available to check against '<' presence.
8705 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008706 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008707 * of the parser.
8708 */
8709 if ((ctxt->inputNr == 1) &&
8710 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8711 if ((!terminate) &&
8712 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8713 goto done;
8714 }
8715 ctxt->checkIndex = 0;
8716#ifdef DEBUG_PUSH
8717 xmlGenericError(xmlGenericErrorContext,
8718 "PP: Parsing char data\n");
8719#endif
8720 xmlParseCharData(ctxt, 0);
8721 }
8722 /*
8723 * Pop-up of finished entities.
8724 */
8725 while ((RAW == 0) && (ctxt->inputNr > 1))
8726 xmlPopInput(ctxt);
Daniel Veillardfdc91562002-07-01 21:52:03 +00008727 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008728 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8730 ctxt->sax->error(ctxt->userData,
8731 "detected an error in element content\n");
8732 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008733 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008734 ctxt->instate = XML_PARSER_EOF;
8735 break;
8736 }
8737 break;
8738 }
8739 case XML_PARSER_CDATA_SECTION: {
8740 /*
8741 * The Push mode need to have the SAX callback for
8742 * cdataBlock merge back contiguous callbacks.
8743 */
8744 int base;
8745
8746 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8747 if (base < 0) {
8748 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8749 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8750 if (ctxt->sax->cdataBlock != NULL)
8751 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8752 XML_PARSER_BIG_BUFFER_SIZE);
8753 }
8754 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8755 ctxt->checkIndex = 0;
8756 }
8757 goto done;
8758 } else {
8759 if ((ctxt->sax != NULL) && (base > 0) &&
8760 (!ctxt->disableSAX)) {
8761 if (ctxt->sax->cdataBlock != NULL)
8762 ctxt->sax->cdataBlock(ctxt->userData,
8763 ctxt->input->cur, base);
8764 }
8765 SKIP(base + 3);
8766 ctxt->checkIndex = 0;
8767 ctxt->instate = XML_PARSER_CONTENT;
8768#ifdef DEBUG_PUSH
8769 xmlGenericError(xmlGenericErrorContext,
8770 "PP: entering CONTENT\n");
8771#endif
8772 }
8773 break;
8774 }
8775 case XML_PARSER_END_TAG:
8776 if (avail < 2)
8777 goto done;
8778 if ((!terminate) &&
8779 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8780 goto done;
8781 xmlParseEndTag(ctxt);
8782 if (ctxt->name == NULL) {
8783 ctxt->instate = XML_PARSER_EPILOG;
8784#ifdef DEBUG_PUSH
8785 xmlGenericError(xmlGenericErrorContext,
8786 "PP: entering EPILOG\n");
8787#endif
8788 } else {
8789 ctxt->instate = XML_PARSER_CONTENT;
8790#ifdef DEBUG_PUSH
8791 xmlGenericError(xmlGenericErrorContext,
8792 "PP: entering CONTENT\n");
8793#endif
8794 }
8795 break;
8796 case XML_PARSER_DTD: {
8797 /*
8798 * Sorry but progressive parsing of the internal subset
8799 * is not expected to be supported. We first check that
8800 * the full content of the internal subset is available and
8801 * the parsing is launched only at that point.
8802 * Internal subset ends up with "']' S? '>'" in an unescaped
8803 * section and not in a ']]>' sequence which are conditional
8804 * sections (whoever argued to keep that crap in XML deserve
8805 * a place in hell !).
8806 */
8807 int base, i;
8808 xmlChar *buf;
8809 xmlChar quote = 0;
8810
8811 base = ctxt->input->cur - ctxt->input->base;
8812 if (base < 0) return(0);
8813 if (ctxt->checkIndex > base)
8814 base = ctxt->checkIndex;
8815 buf = ctxt->input->buf->buffer->content;
8816 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8817 base++) {
8818 if (quote != 0) {
8819 if (buf[base] == quote)
8820 quote = 0;
8821 continue;
8822 }
8823 if (buf[base] == '"') {
8824 quote = '"';
8825 continue;
8826 }
8827 if (buf[base] == '\'') {
8828 quote = '\'';
8829 continue;
8830 }
8831 if (buf[base] == ']') {
8832 if ((unsigned int) base +1 >=
8833 ctxt->input->buf->buffer->use)
8834 break;
8835 if (buf[base + 1] == ']') {
8836 /* conditional crap, skip both ']' ! */
8837 base++;
8838 continue;
8839 }
8840 for (i = 0;
8841 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8842 i++) {
8843 if (buf[base + i] == '>')
8844 goto found_end_int_subset;
8845 }
8846 break;
8847 }
8848 }
8849 /*
8850 * We didn't found the end of the Internal subset
8851 */
8852 if (quote == 0)
8853 ctxt->checkIndex = base;
8854#ifdef DEBUG_PUSH
8855 if (next == 0)
8856 xmlGenericError(xmlGenericErrorContext,
8857 "PP: lookup of int subset end filed\n");
8858#endif
8859 goto done;
8860
8861found_end_int_subset:
8862 xmlParseInternalSubset(ctxt);
8863 ctxt->inSubset = 2;
8864 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8865 (ctxt->sax->externalSubset != NULL))
8866 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8867 ctxt->extSubSystem, ctxt->extSubURI);
8868 ctxt->inSubset = 0;
8869 ctxt->instate = XML_PARSER_PROLOG;
8870 ctxt->checkIndex = 0;
8871#ifdef DEBUG_PUSH
8872 xmlGenericError(xmlGenericErrorContext,
8873 "PP: entering PROLOG\n");
8874#endif
8875 break;
8876 }
8877 case XML_PARSER_COMMENT:
8878 xmlGenericError(xmlGenericErrorContext,
8879 "PP: internal error, state == COMMENT\n");
8880 ctxt->instate = XML_PARSER_CONTENT;
8881#ifdef DEBUG_PUSH
8882 xmlGenericError(xmlGenericErrorContext,
8883 "PP: entering CONTENT\n");
8884#endif
8885 break;
8886 case XML_PARSER_PI:
8887 xmlGenericError(xmlGenericErrorContext,
8888 "PP: internal error, state == PI\n");
8889 ctxt->instate = XML_PARSER_CONTENT;
8890#ifdef DEBUG_PUSH
8891 xmlGenericError(xmlGenericErrorContext,
8892 "PP: entering CONTENT\n");
8893#endif
8894 break;
8895 case XML_PARSER_ENTITY_DECL:
8896 xmlGenericError(xmlGenericErrorContext,
8897 "PP: internal error, state == ENTITY_DECL\n");
8898 ctxt->instate = XML_PARSER_DTD;
8899#ifdef DEBUG_PUSH
8900 xmlGenericError(xmlGenericErrorContext,
8901 "PP: entering DTD\n");
8902#endif
8903 break;
8904 case XML_PARSER_ENTITY_VALUE:
8905 xmlGenericError(xmlGenericErrorContext,
8906 "PP: internal error, state == ENTITY_VALUE\n");
8907 ctxt->instate = XML_PARSER_CONTENT;
8908#ifdef DEBUG_PUSH
8909 xmlGenericError(xmlGenericErrorContext,
8910 "PP: entering DTD\n");
8911#endif
8912 break;
8913 case XML_PARSER_ATTRIBUTE_VALUE:
8914 xmlGenericError(xmlGenericErrorContext,
8915 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8916 ctxt->instate = XML_PARSER_START_TAG;
8917#ifdef DEBUG_PUSH
8918 xmlGenericError(xmlGenericErrorContext,
8919 "PP: entering START_TAG\n");
8920#endif
8921 break;
8922 case XML_PARSER_SYSTEM_LITERAL:
8923 xmlGenericError(xmlGenericErrorContext,
8924 "PP: internal error, state == SYSTEM_LITERAL\n");
8925 ctxt->instate = XML_PARSER_START_TAG;
8926#ifdef DEBUG_PUSH
8927 xmlGenericError(xmlGenericErrorContext,
8928 "PP: entering START_TAG\n");
8929#endif
8930 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008931 case XML_PARSER_PUBLIC_LITERAL:
8932 xmlGenericError(xmlGenericErrorContext,
8933 "PP: internal error, state == PUBLIC_LITERAL\n");
8934 ctxt->instate = XML_PARSER_START_TAG;
8935#ifdef DEBUG_PUSH
8936 xmlGenericError(xmlGenericErrorContext,
8937 "PP: entering START_TAG\n");
8938#endif
8939 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008940 }
8941 }
8942done:
8943#ifdef DEBUG_PUSH
8944 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8945#endif
8946 return(ret);
8947}
8948
8949/**
Owen Taylor3473f882001-02-23 17:55:21 +00008950 * xmlParseChunk:
8951 * @ctxt: an XML parser context
8952 * @chunk: an char array
8953 * @size: the size in byte of the chunk
8954 * @terminate: last chunk indicator
8955 *
8956 * Parse a Chunk of memory
8957 *
8958 * Returns zero if no error, the xmlParserErrors otherwise.
8959 */
8960int
8961xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8962 int terminate) {
8963 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8964 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8965 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8966 int cur = ctxt->input->cur - ctxt->input->base;
8967
8968 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8969 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8970 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008971 ctxt->input->end =
8972 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008973#ifdef DEBUG_PUSH
8974 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8975#endif
8976
8977 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8978 xmlParseTryOrFinish(ctxt, terminate);
8979 } else if (ctxt->instate != XML_PARSER_EOF) {
8980 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8981 xmlParserInputBufferPtr in = ctxt->input->buf;
8982 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8983 (in->raw != NULL)) {
8984 int nbchars;
8985
8986 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8987 if (nbchars < 0) {
8988 xmlGenericError(xmlGenericErrorContext,
8989 "xmlParseChunk: encoder error\n");
8990 return(XML_ERR_INVALID_ENCODING);
8991 }
8992 }
8993 }
8994 }
8995 xmlParseTryOrFinish(ctxt, terminate);
8996 if (terminate) {
8997 /*
8998 * Check for termination
8999 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009000 int avail = 0;
9001 if (ctxt->input->buf == NULL)
9002 avail = ctxt->input->length -
9003 (ctxt->input->cur - ctxt->input->base);
9004 else
9005 avail = ctxt->input->buf->buffer->use -
9006 (ctxt->input->cur - ctxt->input->base);
9007
Owen Taylor3473f882001-02-23 17:55:21 +00009008 if ((ctxt->instate != XML_PARSER_EOF) &&
9009 (ctxt->instate != XML_PARSER_EPILOG)) {
9010 ctxt->errNo = XML_ERR_DOCUMENT_END;
9011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9012 ctxt->sax->error(ctxt->userData,
9013 "Extra content at the end of the document\n");
9014 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009015 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009016 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009017 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9018 ctxt->errNo = XML_ERR_DOCUMENT_END;
9019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9020 ctxt->sax->error(ctxt->userData,
9021 "Extra content at the end of the document\n");
9022 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009023 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009024
9025 }
Owen Taylor3473f882001-02-23 17:55:21 +00009026 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009027 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009028 ctxt->sax->endDocument(ctxt->userData);
9029 }
9030 ctxt->instate = XML_PARSER_EOF;
9031 }
9032 return((xmlParserErrors) ctxt->errNo);
9033}
9034
9035/************************************************************************
9036 * *
9037 * I/O front end functions to the parser *
9038 * *
9039 ************************************************************************/
9040
9041/**
9042 * xmlStopParser:
9043 * @ctxt: an XML parser context
9044 *
9045 * Blocks further parser processing
9046 */
9047void
9048xmlStopParser(xmlParserCtxtPtr ctxt) {
9049 ctxt->instate = XML_PARSER_EOF;
9050 if (ctxt->input != NULL)
9051 ctxt->input->cur = BAD_CAST"";
9052}
9053
9054/**
9055 * xmlCreatePushParserCtxt:
9056 * @sax: a SAX handler
9057 * @user_data: The user data returned on SAX callbacks
9058 * @chunk: a pointer to an array of chars
9059 * @size: number of chars in the array
9060 * @filename: an optional file name or URI
9061 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009062 * Create a parser context for using the XML parser in push mode.
9063 * If @buffer and @size are non-NULL, the data is used to detect
9064 * the encoding. The remaining characters will be parsed so they
9065 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009066 * To allow content encoding detection, @size should be >= 4
9067 * The value of @filename is used for fetching external entities
9068 * and error/warning reports.
9069 *
9070 * Returns the new parser context or NULL
9071 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009072
Owen Taylor3473f882001-02-23 17:55:21 +00009073xmlParserCtxtPtr
9074xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9075 const char *chunk, int size, const char *filename) {
9076 xmlParserCtxtPtr ctxt;
9077 xmlParserInputPtr inputStream;
9078 xmlParserInputBufferPtr buf;
9079 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9080
9081 /*
9082 * plug some encoding conversion routines
9083 */
9084 if ((chunk != NULL) && (size >= 4))
9085 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9086
9087 buf = xmlAllocParserInputBuffer(enc);
9088 if (buf == NULL) return(NULL);
9089
9090 ctxt = xmlNewParserCtxt();
9091 if (ctxt == NULL) {
9092 xmlFree(buf);
9093 return(NULL);
9094 }
9095 if (sax != NULL) {
9096 if (ctxt->sax != &xmlDefaultSAXHandler)
9097 xmlFree(ctxt->sax);
9098 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9099 if (ctxt->sax == NULL) {
9100 xmlFree(buf);
9101 xmlFree(ctxt);
9102 return(NULL);
9103 }
9104 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9105 if (user_data != NULL)
9106 ctxt->userData = user_data;
9107 }
9108 if (filename == NULL) {
9109 ctxt->directory = NULL;
9110 } else {
9111 ctxt->directory = xmlParserGetDirectory(filename);
9112 }
9113
9114 inputStream = xmlNewInputStream(ctxt);
9115 if (inputStream == NULL) {
9116 xmlFreeParserCtxt(ctxt);
9117 return(NULL);
9118 }
9119
9120 if (filename == NULL)
9121 inputStream->filename = NULL;
9122 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009123 inputStream->filename = (char *)
9124 xmlNormalizeWindowsPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009125 inputStream->buf = buf;
9126 inputStream->base = inputStream->buf->buffer->content;
9127 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009128 inputStream->end =
9129 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009130
9131 inputPush(ctxt, inputStream);
9132
9133 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9134 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009135 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9136 int cur = ctxt->input->cur - ctxt->input->base;
9137
Owen Taylor3473f882001-02-23 17:55:21 +00009138 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009139
9140 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9141 ctxt->input->cur = ctxt->input->base + cur;
9142 ctxt->input->end =
9143 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009144#ifdef DEBUG_PUSH
9145 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9146#endif
9147 }
9148
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009149 if (enc != XML_CHAR_ENCODING_NONE) {
9150 xmlSwitchEncoding(ctxt, enc);
9151 }
9152
Owen Taylor3473f882001-02-23 17:55:21 +00009153 return(ctxt);
9154}
9155
9156/**
9157 * xmlCreateIOParserCtxt:
9158 * @sax: a SAX handler
9159 * @user_data: The user data returned on SAX callbacks
9160 * @ioread: an I/O read function
9161 * @ioclose: an I/O close function
9162 * @ioctx: an I/O handler
9163 * @enc: the charset encoding if known
9164 *
9165 * Create a parser context for using the XML parser with an existing
9166 * I/O stream
9167 *
9168 * Returns the new parser context or NULL
9169 */
9170xmlParserCtxtPtr
9171xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9172 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9173 void *ioctx, xmlCharEncoding enc) {
9174 xmlParserCtxtPtr ctxt;
9175 xmlParserInputPtr inputStream;
9176 xmlParserInputBufferPtr buf;
9177
9178 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9179 if (buf == NULL) return(NULL);
9180
9181 ctxt = xmlNewParserCtxt();
9182 if (ctxt == NULL) {
9183 xmlFree(buf);
9184 return(NULL);
9185 }
9186 if (sax != NULL) {
9187 if (ctxt->sax != &xmlDefaultSAXHandler)
9188 xmlFree(ctxt->sax);
9189 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9190 if (ctxt->sax == NULL) {
9191 xmlFree(buf);
9192 xmlFree(ctxt);
9193 return(NULL);
9194 }
9195 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9196 if (user_data != NULL)
9197 ctxt->userData = user_data;
9198 }
9199
9200 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9201 if (inputStream == NULL) {
9202 xmlFreeParserCtxt(ctxt);
9203 return(NULL);
9204 }
9205 inputPush(ctxt, inputStream);
9206
9207 return(ctxt);
9208}
9209
9210/************************************************************************
9211 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009212 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009213 * *
9214 ************************************************************************/
9215
9216/**
9217 * xmlIOParseDTD:
9218 * @sax: the SAX handler block or NULL
9219 * @input: an Input Buffer
9220 * @enc: the charset encoding if known
9221 *
9222 * Load and parse a DTD
9223 *
9224 * Returns the resulting xmlDtdPtr or NULL in case of error.
9225 * @input will be freed at parsing end.
9226 */
9227
9228xmlDtdPtr
9229xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9230 xmlCharEncoding enc) {
9231 xmlDtdPtr ret = NULL;
9232 xmlParserCtxtPtr ctxt;
9233 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009234 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009235
9236 if (input == NULL)
9237 return(NULL);
9238
9239 ctxt = xmlNewParserCtxt();
9240 if (ctxt == NULL) {
9241 return(NULL);
9242 }
9243
9244 /*
9245 * Set-up the SAX context
9246 */
9247 if (sax != NULL) {
9248 if (ctxt->sax != NULL)
9249 xmlFree(ctxt->sax);
9250 ctxt->sax = sax;
9251 ctxt->userData = NULL;
9252 }
9253
9254 /*
9255 * generate a parser input from the I/O handler
9256 */
9257
9258 pinput = xmlNewIOInputStream(ctxt, input, enc);
9259 if (pinput == NULL) {
9260 if (sax != NULL) ctxt->sax = NULL;
9261 xmlFreeParserCtxt(ctxt);
9262 return(NULL);
9263 }
9264
9265 /*
9266 * plug some encoding conversion routines here.
9267 */
9268 xmlPushInput(ctxt, pinput);
9269
9270 pinput->filename = NULL;
9271 pinput->line = 1;
9272 pinput->col = 1;
9273 pinput->base = ctxt->input->cur;
9274 pinput->cur = ctxt->input->cur;
9275 pinput->free = NULL;
9276
9277 /*
9278 * let's parse that entity knowing it's an external subset.
9279 */
9280 ctxt->inSubset = 2;
9281 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9282 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9283 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009284
9285 if (enc == XML_CHAR_ENCODING_NONE) {
9286 /*
9287 * Get the 4 first bytes and decode the charset
9288 * if enc != XML_CHAR_ENCODING_NONE
9289 * plug some encoding conversion routines.
9290 */
9291 start[0] = RAW;
9292 start[1] = NXT(1);
9293 start[2] = NXT(2);
9294 start[3] = NXT(3);
9295 enc = xmlDetectCharEncoding(start, 4);
9296 if (enc != XML_CHAR_ENCODING_NONE) {
9297 xmlSwitchEncoding(ctxt, enc);
9298 }
9299 }
9300
Owen Taylor3473f882001-02-23 17:55:21 +00009301 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9302
9303 if (ctxt->myDoc != NULL) {
9304 if (ctxt->wellFormed) {
9305 ret = ctxt->myDoc->extSubset;
9306 ctxt->myDoc->extSubset = NULL;
9307 } else {
9308 ret = NULL;
9309 }
9310 xmlFreeDoc(ctxt->myDoc);
9311 ctxt->myDoc = NULL;
9312 }
9313 if (sax != NULL) ctxt->sax = NULL;
9314 xmlFreeParserCtxt(ctxt);
9315
9316 return(ret);
9317}
9318
9319/**
9320 * xmlSAXParseDTD:
9321 * @sax: the SAX handler block
9322 * @ExternalID: a NAME* containing the External ID of the DTD
9323 * @SystemID: a NAME* containing the URL to the DTD
9324 *
9325 * Load and parse an external subset.
9326 *
9327 * Returns the resulting xmlDtdPtr or NULL in case of error.
9328 */
9329
9330xmlDtdPtr
9331xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9332 const xmlChar *SystemID) {
9333 xmlDtdPtr ret = NULL;
9334 xmlParserCtxtPtr ctxt;
9335 xmlParserInputPtr input = NULL;
9336 xmlCharEncoding enc;
9337
9338 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9339
9340 ctxt = xmlNewParserCtxt();
9341 if (ctxt == NULL) {
9342 return(NULL);
9343 }
9344
9345 /*
9346 * Set-up the SAX context
9347 */
9348 if (sax != NULL) {
9349 if (ctxt->sax != NULL)
9350 xmlFree(ctxt->sax);
9351 ctxt->sax = sax;
9352 ctxt->userData = NULL;
9353 }
9354
9355 /*
9356 * Ask the Entity resolver to load the damn thing
9357 */
9358
9359 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9360 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9361 if (input == NULL) {
9362 if (sax != NULL) ctxt->sax = NULL;
9363 xmlFreeParserCtxt(ctxt);
9364 return(NULL);
9365 }
9366
9367 /*
9368 * plug some encoding conversion routines here.
9369 */
9370 xmlPushInput(ctxt, input);
9371 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9372 xmlSwitchEncoding(ctxt, enc);
9373
9374 if (input->filename == NULL)
9375 input->filename = (char *) xmlStrdup(SystemID);
9376 input->line = 1;
9377 input->col = 1;
9378 input->base = ctxt->input->cur;
9379 input->cur = ctxt->input->cur;
9380 input->free = NULL;
9381
9382 /*
9383 * let's parse that entity knowing it's an external subset.
9384 */
9385 ctxt->inSubset = 2;
9386 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9387 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9388 ExternalID, SystemID);
9389 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9390
9391 if (ctxt->myDoc != NULL) {
9392 if (ctxt->wellFormed) {
9393 ret = ctxt->myDoc->extSubset;
9394 ctxt->myDoc->extSubset = NULL;
9395 } else {
9396 ret = NULL;
9397 }
9398 xmlFreeDoc(ctxt->myDoc);
9399 ctxt->myDoc = NULL;
9400 }
9401 if (sax != NULL) ctxt->sax = NULL;
9402 xmlFreeParserCtxt(ctxt);
9403
9404 return(ret);
9405}
9406
9407/**
9408 * xmlParseDTD:
9409 * @ExternalID: a NAME* containing the External ID of the DTD
9410 * @SystemID: a NAME* containing the URL to the DTD
9411 *
9412 * Load and parse an external subset.
9413 *
9414 * Returns the resulting xmlDtdPtr or NULL in case of error.
9415 */
9416
9417xmlDtdPtr
9418xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9419 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9420}
9421
9422/************************************************************************
9423 * *
9424 * Front ends when parsing an Entity *
9425 * *
9426 ************************************************************************/
9427
9428/**
Owen Taylor3473f882001-02-23 17:55:21 +00009429 * xmlParseCtxtExternalEntity:
9430 * @ctx: the existing parsing context
9431 * @URL: the URL for the entity to load
9432 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009433 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009434 *
9435 * Parse an external general entity within an existing parsing context
9436 * An external general parsed entity is well-formed if it matches the
9437 * production labeled extParsedEnt.
9438 *
9439 * [78] extParsedEnt ::= TextDecl? content
9440 *
9441 * Returns 0 if the entity is well formed, -1 in case of args problem and
9442 * the parser error code otherwise
9443 */
9444
9445int
9446xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009447 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009448 xmlParserCtxtPtr ctxt;
9449 xmlDocPtr newDoc;
9450 xmlSAXHandlerPtr oldsax = NULL;
9451 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009452 xmlChar start[4];
9453 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009454
9455 if (ctx->depth > 40) {
9456 return(XML_ERR_ENTITY_LOOP);
9457 }
9458
Daniel Veillardcda96922001-08-21 10:56:31 +00009459 if (lst != NULL)
9460 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009461 if ((URL == NULL) && (ID == NULL))
9462 return(-1);
9463 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9464 return(-1);
9465
9466
9467 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9468 if (ctxt == NULL) return(-1);
9469 ctxt->userData = ctxt;
9470 oldsax = ctxt->sax;
9471 ctxt->sax = ctx->sax;
9472 newDoc = xmlNewDoc(BAD_CAST "1.0");
9473 if (newDoc == NULL) {
9474 xmlFreeParserCtxt(ctxt);
9475 return(-1);
9476 }
9477 if (ctx->myDoc != NULL) {
9478 newDoc->intSubset = ctx->myDoc->intSubset;
9479 newDoc->extSubset = ctx->myDoc->extSubset;
9480 }
9481 if (ctx->myDoc->URL != NULL) {
9482 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9483 }
9484 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9485 if (newDoc->children == NULL) {
9486 ctxt->sax = oldsax;
9487 xmlFreeParserCtxt(ctxt);
9488 newDoc->intSubset = NULL;
9489 newDoc->extSubset = NULL;
9490 xmlFreeDoc(newDoc);
9491 return(-1);
9492 }
9493 nodePush(ctxt, newDoc->children);
9494 if (ctx->myDoc == NULL) {
9495 ctxt->myDoc = newDoc;
9496 } else {
9497 ctxt->myDoc = ctx->myDoc;
9498 newDoc->children->doc = ctx->myDoc;
9499 }
9500
Daniel Veillard87a764e2001-06-20 17:41:10 +00009501 /*
9502 * Get the 4 first bytes and decode the charset
9503 * if enc != XML_CHAR_ENCODING_NONE
9504 * plug some encoding conversion routines.
9505 */
9506 GROW
9507 start[0] = RAW;
9508 start[1] = NXT(1);
9509 start[2] = NXT(2);
9510 start[3] = NXT(3);
9511 enc = xmlDetectCharEncoding(start, 4);
9512 if (enc != XML_CHAR_ENCODING_NONE) {
9513 xmlSwitchEncoding(ctxt, enc);
9514 }
9515
Owen Taylor3473f882001-02-23 17:55:21 +00009516 /*
9517 * Parse a possible text declaration first
9518 */
Owen Taylor3473f882001-02-23 17:55:21 +00009519 if ((RAW == '<') && (NXT(1) == '?') &&
9520 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9521 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9522 xmlParseTextDecl(ctxt);
9523 }
9524
9525 /*
9526 * Doing validity checking on chunk doesn't make sense
9527 */
9528 ctxt->instate = XML_PARSER_CONTENT;
9529 ctxt->validate = ctx->validate;
9530 ctxt->loadsubset = ctx->loadsubset;
9531 ctxt->depth = ctx->depth + 1;
9532 ctxt->replaceEntities = ctx->replaceEntities;
9533 if (ctxt->validate) {
9534 ctxt->vctxt.error = ctx->vctxt.error;
9535 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009536 } else {
9537 ctxt->vctxt.error = NULL;
9538 ctxt->vctxt.warning = NULL;
9539 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009540 ctxt->vctxt.nodeTab = NULL;
9541 ctxt->vctxt.nodeNr = 0;
9542 ctxt->vctxt.nodeMax = 0;
9543 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009544
9545 xmlParseContent(ctxt);
9546
9547 if ((RAW == '<') && (NXT(1) == '/')) {
9548 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9550 ctxt->sax->error(ctxt->userData,
9551 "chunk is not well balanced\n");
9552 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009553 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009554 } else if (RAW != 0) {
9555 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9557 ctxt->sax->error(ctxt->userData,
9558 "extra content at the end of well balanced chunk\n");
9559 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009560 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009561 }
9562 if (ctxt->node != newDoc->children) {
9563 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9565 ctxt->sax->error(ctxt->userData,
9566 "chunk is not well balanced\n");
9567 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009568 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009569 }
9570
9571 if (!ctxt->wellFormed) {
9572 if (ctxt->errNo == 0)
9573 ret = 1;
9574 else
9575 ret = ctxt->errNo;
9576 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009577 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009578 xmlNodePtr cur;
9579
9580 /*
9581 * Return the newly created nodeset after unlinking it from
9582 * they pseudo parent.
9583 */
9584 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009585 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009586 while (cur != NULL) {
9587 cur->parent = NULL;
9588 cur = cur->next;
9589 }
9590 newDoc->children->children = NULL;
9591 }
9592 ret = 0;
9593 }
9594 ctxt->sax = oldsax;
9595 xmlFreeParserCtxt(ctxt);
9596 newDoc->intSubset = NULL;
9597 newDoc->extSubset = NULL;
9598 xmlFreeDoc(newDoc);
9599
9600 return(ret);
9601}
9602
9603/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009604 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009605 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009606 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009607 * @sax: the SAX handler bloc (possibly NULL)
9608 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9609 * @depth: Used for loop detection, use 0
9610 * @URL: the URL for the entity to load
9611 * @ID: the System ID for the entity to load
9612 * @list: the return value for the set of parsed nodes
9613 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009614 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009615 *
9616 * Returns 0 if the entity is well formed, -1 in case of args problem and
9617 * the parser error code otherwise
9618 */
9619
Daniel Veillard257d9102001-05-08 10:41:44 +00009620static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009621xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9622 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009623 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009624 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009625 xmlParserCtxtPtr ctxt;
9626 xmlDocPtr newDoc;
9627 xmlSAXHandlerPtr oldsax = NULL;
9628 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009629 xmlChar start[4];
9630 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009631
9632 if (depth > 40) {
9633 return(XML_ERR_ENTITY_LOOP);
9634 }
9635
9636
9637
9638 if (list != NULL)
9639 *list = NULL;
9640 if ((URL == NULL) && (ID == NULL))
9641 return(-1);
9642 if (doc == NULL) /* @@ relax but check for dereferences */
9643 return(-1);
9644
9645
9646 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9647 if (ctxt == NULL) return(-1);
9648 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009649 if (oldctxt != NULL) {
9650 ctxt->_private = oldctxt->_private;
9651 ctxt->loadsubset = oldctxt->loadsubset;
9652 ctxt->validate = oldctxt->validate;
9653 ctxt->external = oldctxt->external;
9654 } else {
9655 /*
9656 * Doing validity checking on chunk without context
9657 * doesn't make sense
9658 */
9659 ctxt->_private = NULL;
9660 ctxt->validate = 0;
9661 ctxt->external = 2;
9662 ctxt->loadsubset = 0;
9663 }
Owen Taylor3473f882001-02-23 17:55:21 +00009664 if (sax != NULL) {
9665 oldsax = ctxt->sax;
9666 ctxt->sax = sax;
9667 if (user_data != NULL)
9668 ctxt->userData = user_data;
9669 }
9670 newDoc = xmlNewDoc(BAD_CAST "1.0");
9671 if (newDoc == NULL) {
9672 xmlFreeParserCtxt(ctxt);
9673 return(-1);
9674 }
9675 if (doc != NULL) {
9676 newDoc->intSubset = doc->intSubset;
9677 newDoc->extSubset = doc->extSubset;
9678 }
9679 if (doc->URL != NULL) {
9680 newDoc->URL = xmlStrdup(doc->URL);
9681 }
9682 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9683 if (newDoc->children == NULL) {
9684 if (sax != NULL)
9685 ctxt->sax = oldsax;
9686 xmlFreeParserCtxt(ctxt);
9687 newDoc->intSubset = NULL;
9688 newDoc->extSubset = NULL;
9689 xmlFreeDoc(newDoc);
9690 return(-1);
9691 }
9692 nodePush(ctxt, newDoc->children);
9693 if (doc == NULL) {
9694 ctxt->myDoc = newDoc;
9695 } else {
9696 ctxt->myDoc = doc;
9697 newDoc->children->doc = doc;
9698 }
9699
Daniel Veillard87a764e2001-06-20 17:41:10 +00009700 /*
9701 * Get the 4 first bytes and decode the charset
9702 * if enc != XML_CHAR_ENCODING_NONE
9703 * plug some encoding conversion routines.
9704 */
9705 GROW;
9706 start[0] = RAW;
9707 start[1] = NXT(1);
9708 start[2] = NXT(2);
9709 start[3] = NXT(3);
9710 enc = xmlDetectCharEncoding(start, 4);
9711 if (enc != XML_CHAR_ENCODING_NONE) {
9712 xmlSwitchEncoding(ctxt, enc);
9713 }
9714
Owen Taylor3473f882001-02-23 17:55:21 +00009715 /*
9716 * Parse a possible text declaration first
9717 */
Owen Taylor3473f882001-02-23 17:55:21 +00009718 if ((RAW == '<') && (NXT(1) == '?') &&
9719 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9720 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9721 xmlParseTextDecl(ctxt);
9722 }
9723
Owen Taylor3473f882001-02-23 17:55:21 +00009724 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009725 ctxt->depth = depth;
9726
9727 xmlParseContent(ctxt);
9728
Daniel Veillard561b7f82002-03-20 21:55:57 +00009729 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009730 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9732 ctxt->sax->error(ctxt->userData,
9733 "chunk is not well balanced\n");
9734 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009735 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009736 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009737 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9739 ctxt->sax->error(ctxt->userData,
9740 "extra content at the end of well balanced chunk\n");
9741 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009742 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009743 }
9744 if (ctxt->node != newDoc->children) {
9745 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9747 ctxt->sax->error(ctxt->userData,
9748 "chunk is not well balanced\n");
9749 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009750 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009751 }
9752
9753 if (!ctxt->wellFormed) {
9754 if (ctxt->errNo == 0)
9755 ret = 1;
9756 else
9757 ret = ctxt->errNo;
9758 } else {
9759 if (list != NULL) {
9760 xmlNodePtr cur;
9761
9762 /*
9763 * Return the newly created nodeset after unlinking it from
9764 * they pseudo parent.
9765 */
9766 cur = newDoc->children->children;
9767 *list = cur;
9768 while (cur != NULL) {
9769 cur->parent = NULL;
9770 cur = cur->next;
9771 }
9772 newDoc->children->children = NULL;
9773 }
9774 ret = 0;
9775 }
9776 if (sax != NULL)
9777 ctxt->sax = oldsax;
9778 xmlFreeParserCtxt(ctxt);
9779 newDoc->intSubset = NULL;
9780 newDoc->extSubset = NULL;
9781 xmlFreeDoc(newDoc);
9782
9783 return(ret);
9784}
9785
9786/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009787 * xmlParseExternalEntity:
9788 * @doc: the document the chunk pertains to
9789 * @sax: the SAX handler bloc (possibly NULL)
9790 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9791 * @depth: Used for loop detection, use 0
9792 * @URL: the URL for the entity to load
9793 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009794 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009795 *
9796 * Parse an external general entity
9797 * An external general parsed entity is well-formed if it matches the
9798 * production labeled extParsedEnt.
9799 *
9800 * [78] extParsedEnt ::= TextDecl? content
9801 *
9802 * Returns 0 if the entity is well formed, -1 in case of args problem and
9803 * the parser error code otherwise
9804 */
9805
9806int
9807xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009808 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009809 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009810 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009811}
9812
9813/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009814 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009815 * @doc: the document the chunk pertains to
9816 * @sax: the SAX handler bloc (possibly NULL)
9817 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9818 * @depth: Used for loop detection, use 0
9819 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009820 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009821 *
9822 * Parse a well-balanced chunk of an XML document
9823 * called by the parser
9824 * The allowed sequence for the Well Balanced Chunk is the one defined by
9825 * the content production in the XML grammar:
9826 *
9827 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9828 *
9829 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9830 * the parser error code otherwise
9831 */
9832
9833int
9834xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009835 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +00009836 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
9837 depth, string, lst, 0 );
9838}
9839
9840/**
Daniel Veillard328f48c2002-11-15 15:24:34 +00009841 * xmlParseBalancedChunkMemoryInternal:
9842 * @oldctxt: the existing parsing context
9843 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9844 * @user_data: the user data field for the parser context
9845 * @lst: the return value for the set of parsed nodes
9846 *
9847 *
9848 * Parse a well-balanced chunk of an XML document
9849 * called by the parser
9850 * The allowed sequence for the Well Balanced Chunk is the one defined by
9851 * the content production in the XML grammar:
9852 *
9853 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9854 *
9855 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9856 * the parser error code otherwise
9857 *
9858 * In case recover is set to 1, the nodelist will not be empty even if
9859 * the parsed chunk is not well balanced.
9860 */
9861static int
9862xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
9863 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
9864 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009865 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009866 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009867 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009868 int size;
9869 int ret = 0;
9870
9871 if (oldctxt->depth > 40) {
9872 return(XML_ERR_ENTITY_LOOP);
9873 }
9874
9875
9876 if (lst != NULL)
9877 *lst = NULL;
9878 if (string == NULL)
9879 return(-1);
9880
9881 size = xmlStrlen(string);
9882
9883 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9884 if (ctxt == NULL) return(-1);
9885 if (user_data != NULL)
9886 ctxt->userData = user_data;
9887 else
9888 ctxt->userData = ctxt;
9889
9890 oldsax = ctxt->sax;
9891 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +00009892 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009893 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +00009894 newDoc = xmlNewDoc(BAD_CAST "1.0");
9895 if (newDoc == NULL) {
9896 ctxt->sax = oldsax;
9897 xmlFreeParserCtxt(ctxt);
9898 return(-1);
9899 }
Daniel Veillard328f48c2002-11-15 15:24:34 +00009900 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +00009901 } else {
9902 ctxt->myDoc = oldctxt->myDoc;
9903 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009904 }
Daniel Veillard9bc53102002-11-25 13:20:04 +00009905 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +00009906 BAD_CAST "pseudoroot", NULL);
9907 if (ctxt->myDoc->children == NULL) {
9908 ctxt->sax = oldsax;
9909 xmlFreeParserCtxt(ctxt);
9910 if (newDoc != NULL)
9911 xmlFreeDoc(newDoc);
9912 return(-1);
9913 }
9914 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009915 ctxt->instate = XML_PARSER_CONTENT;
9916 ctxt->depth = oldctxt->depth + 1;
9917
9918 /*
9919 * Doing validity checking on chunk doesn't make sense
9920 */
9921 ctxt->validate = 0;
9922 ctxt->loadsubset = oldctxt->loadsubset;
9923
Daniel Veillard68e9e742002-11-16 15:35:11 +00009924 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009925 if ((RAW == '<') && (NXT(1) == '/')) {
9926 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9928 ctxt->sax->error(ctxt->userData,
9929 "chunk is not well balanced\n");
9930 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009931 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009932 } else if (RAW != 0) {
9933 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9935 ctxt->sax->error(ctxt->userData,
9936 "extra content at the end of well balanced chunk\n");
9937 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009938 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009939 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009940 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +00009941 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9943 ctxt->sax->error(ctxt->userData,
9944 "chunk is not well balanced\n");
9945 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009946 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009947 }
9948
9949 if (!ctxt->wellFormed) {
9950 if (ctxt->errNo == 0)
9951 ret = 1;
9952 else
9953 ret = ctxt->errNo;
9954 } else {
9955 ret = 0;
9956 }
9957
9958 if ((lst != NULL) && (ret == 0)) {
9959 xmlNodePtr cur;
9960
9961 /*
9962 * Return the newly created nodeset after unlinking it from
9963 * they pseudo parent.
9964 */
Daniel Veillard68e9e742002-11-16 15:35:11 +00009965 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009966 *lst = cur;
9967 while (cur != NULL) {
9968 cur->parent = NULL;
9969 cur = cur->next;
9970 }
Daniel Veillard68e9e742002-11-16 15:35:11 +00009971 ctxt->myDoc->children->children = NULL;
9972 }
9973 if (ctxt->myDoc != NULL) {
9974 xmlFreeNode(ctxt->myDoc->children);
9975 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +00009976 }
9977
9978 ctxt->sax = oldsax;
9979 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +00009980 if (newDoc != NULL)
9981 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +00009982
9983 return(ret);
9984}
9985
9986/**
Daniel Veillard58e44c92002-08-02 22:19:49 +00009987 * xmlParseBalancedChunkMemoryRecover:
9988 * @doc: the document the chunk pertains to
9989 * @sax: the SAX handler bloc (possibly NULL)
9990 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9991 * @depth: Used for loop detection, use 0
9992 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9993 * @lst: the return value for the set of parsed nodes
9994 * @recover: return nodes even if the data is broken (use 0)
9995 *
9996 *
9997 * Parse a well-balanced chunk of an XML document
9998 * called by the parser
9999 * The allowed sequence for the Well Balanced Chunk is the one defined by
10000 * the content production in the XML grammar:
10001 *
10002 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10003 *
10004 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10005 * the parser error code otherwise
10006 *
10007 * In case recover is set to 1, the nodelist will not be empty even if
10008 * the parsed chunk is not well balanced.
10009 */
10010int
10011xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10012 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10013 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010014 xmlParserCtxtPtr ctxt;
10015 xmlDocPtr newDoc;
10016 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010017 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010018 int size;
10019 int ret = 0;
10020
10021 if (depth > 40) {
10022 return(XML_ERR_ENTITY_LOOP);
10023 }
10024
10025
Daniel Veillardcda96922001-08-21 10:56:31 +000010026 if (lst != NULL)
10027 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010028 if (string == NULL)
10029 return(-1);
10030
10031 size = xmlStrlen(string);
10032
10033 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10034 if (ctxt == NULL) return(-1);
10035 ctxt->userData = ctxt;
10036 if (sax != NULL) {
10037 oldsax = ctxt->sax;
10038 ctxt->sax = sax;
10039 if (user_data != NULL)
10040 ctxt->userData = user_data;
10041 }
10042 newDoc = xmlNewDoc(BAD_CAST "1.0");
10043 if (newDoc == NULL) {
10044 xmlFreeParserCtxt(ctxt);
10045 return(-1);
10046 }
10047 if (doc != NULL) {
10048 newDoc->intSubset = doc->intSubset;
10049 newDoc->extSubset = doc->extSubset;
10050 }
10051 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10052 if (newDoc->children == NULL) {
10053 if (sax != NULL)
10054 ctxt->sax = oldsax;
10055 xmlFreeParserCtxt(ctxt);
10056 newDoc->intSubset = NULL;
10057 newDoc->extSubset = NULL;
10058 xmlFreeDoc(newDoc);
10059 return(-1);
10060 }
10061 nodePush(ctxt, newDoc->children);
10062 if (doc == NULL) {
10063 ctxt->myDoc = newDoc;
10064 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010065 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010066 newDoc->children->doc = doc;
10067 }
10068 ctxt->instate = XML_PARSER_CONTENT;
10069 ctxt->depth = depth;
10070
10071 /*
10072 * Doing validity checking on chunk doesn't make sense
10073 */
10074 ctxt->validate = 0;
10075 ctxt->loadsubset = 0;
10076
Daniel Veillardb39bc392002-10-26 19:29:51 +000010077 if ( doc != NULL ){
10078 content = doc->children;
10079 doc->children = NULL;
10080 xmlParseContent(ctxt);
10081 doc->children = content;
10082 }
10083 else {
10084 xmlParseContent(ctxt);
10085 }
Owen Taylor3473f882001-02-23 17:55:21 +000010086 if ((RAW == '<') && (NXT(1) == '/')) {
10087 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10088 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10089 ctxt->sax->error(ctxt->userData,
10090 "chunk is not well balanced\n");
10091 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010092 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010093 } else if (RAW != 0) {
10094 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10096 ctxt->sax->error(ctxt->userData,
10097 "extra content at the end of well balanced chunk\n");
10098 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010099 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010100 }
10101 if (ctxt->node != newDoc->children) {
10102 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10104 ctxt->sax->error(ctxt->userData,
10105 "chunk is not well balanced\n");
10106 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010107 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010108 }
10109
10110 if (!ctxt->wellFormed) {
10111 if (ctxt->errNo == 0)
10112 ret = 1;
10113 else
10114 ret = ctxt->errNo;
10115 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010116 ret = 0;
10117 }
10118
10119 if (lst != NULL && (ret == 0 || recover == 1)) {
10120 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010121
10122 /*
10123 * Return the newly created nodeset after unlinking it from
10124 * they pseudo parent.
10125 */
10126 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010127 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010128 while (cur != NULL) {
10129 cur->parent = NULL;
10130 cur = cur->next;
10131 }
10132 newDoc->children->children = NULL;
10133 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010134
Owen Taylor3473f882001-02-23 17:55:21 +000010135 if (sax != NULL)
10136 ctxt->sax = oldsax;
10137 xmlFreeParserCtxt(ctxt);
10138 newDoc->intSubset = NULL;
10139 newDoc->extSubset = NULL;
10140 xmlFreeDoc(newDoc);
10141
10142 return(ret);
10143}
10144
10145/**
10146 * xmlSAXParseEntity:
10147 * @sax: the SAX handler block
10148 * @filename: the filename
10149 *
10150 * parse an XML external entity out of context and build a tree.
10151 * It use the given SAX function block to handle the parsing callback.
10152 * If sax is NULL, fallback to the default DOM tree building routines.
10153 *
10154 * [78] extParsedEnt ::= TextDecl? content
10155 *
10156 * This correspond to a "Well Balanced" chunk
10157 *
10158 * Returns the resulting document tree
10159 */
10160
10161xmlDocPtr
10162xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10163 xmlDocPtr ret;
10164 xmlParserCtxtPtr ctxt;
10165 char *directory = NULL;
10166
10167 ctxt = xmlCreateFileParserCtxt(filename);
10168 if (ctxt == NULL) {
10169 return(NULL);
10170 }
10171 if (sax != NULL) {
10172 if (ctxt->sax != NULL)
10173 xmlFree(ctxt->sax);
10174 ctxt->sax = sax;
10175 ctxt->userData = NULL;
10176 }
10177
10178 if ((ctxt->directory == NULL) && (directory == NULL))
10179 directory = xmlParserGetDirectory(filename);
10180
10181 xmlParseExtParsedEnt(ctxt);
10182
10183 if (ctxt->wellFormed)
10184 ret = ctxt->myDoc;
10185 else {
10186 ret = NULL;
10187 xmlFreeDoc(ctxt->myDoc);
10188 ctxt->myDoc = NULL;
10189 }
10190 if (sax != NULL)
10191 ctxt->sax = NULL;
10192 xmlFreeParserCtxt(ctxt);
10193
10194 return(ret);
10195}
10196
10197/**
10198 * xmlParseEntity:
10199 * @filename: the filename
10200 *
10201 * parse an XML external entity out of context and build a tree.
10202 *
10203 * [78] extParsedEnt ::= TextDecl? content
10204 *
10205 * This correspond to a "Well Balanced" chunk
10206 *
10207 * Returns the resulting document tree
10208 */
10209
10210xmlDocPtr
10211xmlParseEntity(const char *filename) {
10212 return(xmlSAXParseEntity(NULL, filename));
10213}
10214
10215/**
10216 * xmlCreateEntityParserCtxt:
10217 * @URL: the entity URL
10218 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010219 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010220 *
10221 * Create a parser context for an external entity
10222 * Automatic support for ZLIB/Compress compressed document is provided
10223 * by default if found at compile-time.
10224 *
10225 * Returns the new parser context or NULL
10226 */
10227xmlParserCtxtPtr
10228xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10229 const xmlChar *base) {
10230 xmlParserCtxtPtr ctxt;
10231 xmlParserInputPtr inputStream;
10232 char *directory = NULL;
10233 xmlChar *uri;
10234
10235 ctxt = xmlNewParserCtxt();
10236 if (ctxt == NULL) {
10237 return(NULL);
10238 }
10239
10240 uri = xmlBuildURI(URL, base);
10241
10242 if (uri == NULL) {
10243 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10244 if (inputStream == NULL) {
10245 xmlFreeParserCtxt(ctxt);
10246 return(NULL);
10247 }
10248
10249 inputPush(ctxt, inputStream);
10250
10251 if ((ctxt->directory == NULL) && (directory == NULL))
10252 directory = xmlParserGetDirectory((char *)URL);
10253 if ((ctxt->directory == NULL) && (directory != NULL))
10254 ctxt->directory = directory;
10255 } else {
10256 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10257 if (inputStream == NULL) {
10258 xmlFree(uri);
10259 xmlFreeParserCtxt(ctxt);
10260 return(NULL);
10261 }
10262
10263 inputPush(ctxt, inputStream);
10264
10265 if ((ctxt->directory == NULL) && (directory == NULL))
10266 directory = xmlParserGetDirectory((char *)uri);
10267 if ((ctxt->directory == NULL) && (directory != NULL))
10268 ctxt->directory = directory;
10269 xmlFree(uri);
10270 }
10271
10272 return(ctxt);
10273}
10274
10275/************************************************************************
10276 * *
10277 * Front ends when parsing from a file *
10278 * *
10279 ************************************************************************/
10280
10281/**
10282 * xmlCreateFileParserCtxt:
10283 * @filename: the filename
10284 *
10285 * Create a parser context for a file content.
10286 * Automatic support for ZLIB/Compress compressed document is provided
10287 * by default if found at compile-time.
10288 *
10289 * Returns the new parser context or NULL
10290 */
10291xmlParserCtxtPtr
10292xmlCreateFileParserCtxt(const char *filename)
10293{
10294 xmlParserCtxtPtr ctxt;
10295 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010296 char *directory = NULL;
Daniel Veillardf4862f02002-09-10 11:13:43 +000010297 xmlChar *normalized;
Owen Taylor3473f882001-02-23 17:55:21 +000010298
Owen Taylor3473f882001-02-23 17:55:21 +000010299 ctxt = xmlNewParserCtxt();
10300 if (ctxt == NULL) {
10301 if (xmlDefaultSAXHandler.error != NULL) {
10302 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10303 }
10304 return(NULL);
10305 }
10306
Daniel Veillardf4862f02002-09-10 11:13:43 +000010307 normalized = xmlNormalizeWindowsPath((const xmlChar *) filename);
10308 if (normalized == NULL) {
10309 xmlFreeParserCtxt(ctxt);
10310 return(NULL);
10311 }
10312 inputStream = xmlLoadExternalEntity((char *) normalized, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010313 if (inputStream == NULL) {
10314 xmlFreeParserCtxt(ctxt);
Daniel Veillardf4862f02002-09-10 11:13:43 +000010315 xmlFree(normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010316 return(NULL);
10317 }
10318
Owen Taylor3473f882001-02-23 17:55:21 +000010319 inputPush(ctxt, inputStream);
10320 if ((ctxt->directory == NULL) && (directory == NULL))
Daniel Veillardf4862f02002-09-10 11:13:43 +000010321 directory = xmlParserGetDirectory((char *) normalized);
Owen Taylor3473f882001-02-23 17:55:21 +000010322 if ((ctxt->directory == NULL) && (directory != NULL))
10323 ctxt->directory = directory;
10324
Daniel Veillardf4862f02002-09-10 11:13:43 +000010325 xmlFree(normalized);
10326
Owen Taylor3473f882001-02-23 17:55:21 +000010327 return(ctxt);
10328}
10329
10330/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010331 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010332 * @sax: the SAX handler block
10333 * @filename: the filename
10334 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10335 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010336 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010337 *
10338 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10339 * compressed document is provided by default if found at compile-time.
10340 * It use the given SAX function block to handle the parsing callback.
10341 * If sax is NULL, fallback to the default DOM tree building routines.
10342 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010343 * User data (void *) is stored within the parser context in the
10344 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010345 *
Owen Taylor3473f882001-02-23 17:55:21 +000010346 * Returns the resulting document tree
10347 */
10348
10349xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010350xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10351 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010352 xmlDocPtr ret;
10353 xmlParserCtxtPtr ctxt;
10354 char *directory = NULL;
10355
Daniel Veillard635ef722001-10-29 11:48:19 +000010356 xmlInitParser();
10357
Owen Taylor3473f882001-02-23 17:55:21 +000010358 ctxt = xmlCreateFileParserCtxt(filename);
10359 if (ctxt == NULL) {
10360 return(NULL);
10361 }
10362 if (sax != NULL) {
10363 if (ctxt->sax != NULL)
10364 xmlFree(ctxt->sax);
10365 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010366 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010367 if (data!=NULL) {
10368 ctxt->_private=data;
10369 }
Owen Taylor3473f882001-02-23 17:55:21 +000010370
10371 if ((ctxt->directory == NULL) && (directory == NULL))
10372 directory = xmlParserGetDirectory(filename);
10373 if ((ctxt->directory == NULL) && (directory != NULL))
10374 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10375
Daniel Veillarddad3f682002-11-17 16:47:27 +000010376 ctxt->recovery = recovery;
10377
Owen Taylor3473f882001-02-23 17:55:21 +000010378 xmlParseDocument(ctxt);
10379
10380 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10381 else {
10382 ret = NULL;
10383 xmlFreeDoc(ctxt->myDoc);
10384 ctxt->myDoc = NULL;
10385 }
10386 if (sax != NULL)
10387 ctxt->sax = NULL;
10388 xmlFreeParserCtxt(ctxt);
10389
10390 return(ret);
10391}
10392
10393/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010394 * xmlSAXParseFile:
10395 * @sax: the SAX handler block
10396 * @filename: the filename
10397 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10398 * documents
10399 *
10400 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10401 * compressed document is provided by default if found at compile-time.
10402 * It use the given SAX function block to handle the parsing callback.
10403 * If sax is NULL, fallback to the default DOM tree building routines.
10404 *
10405 * Returns the resulting document tree
10406 */
10407
10408xmlDocPtr
10409xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10410 int recovery) {
10411 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10412}
10413
10414/**
Owen Taylor3473f882001-02-23 17:55:21 +000010415 * xmlRecoverDoc:
10416 * @cur: a pointer to an array of xmlChar
10417 *
10418 * parse an XML in-memory document and build a tree.
10419 * In the case the document is not Well Formed, a tree is built anyway
10420 *
10421 * Returns the resulting document tree
10422 */
10423
10424xmlDocPtr
10425xmlRecoverDoc(xmlChar *cur) {
10426 return(xmlSAXParseDoc(NULL, cur, 1));
10427}
10428
10429/**
10430 * xmlParseFile:
10431 * @filename: the filename
10432 *
10433 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10434 * compressed document is provided by default if found at compile-time.
10435 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010436 * Returns the resulting document tree if the file was wellformed,
10437 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010438 */
10439
10440xmlDocPtr
10441xmlParseFile(const char *filename) {
10442 return(xmlSAXParseFile(NULL, filename, 0));
10443}
10444
10445/**
10446 * xmlRecoverFile:
10447 * @filename: the filename
10448 *
10449 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10450 * compressed document is provided by default if found at compile-time.
10451 * In the case the document is not Well Formed, a tree is built anyway
10452 *
10453 * Returns the resulting document tree
10454 */
10455
10456xmlDocPtr
10457xmlRecoverFile(const char *filename) {
10458 return(xmlSAXParseFile(NULL, filename, 1));
10459}
10460
10461
10462/**
10463 * xmlSetupParserForBuffer:
10464 * @ctxt: an XML parser context
10465 * @buffer: a xmlChar * buffer
10466 * @filename: a file name
10467 *
10468 * Setup the parser context to parse a new buffer; Clears any prior
10469 * contents from the parser context. The buffer parameter must not be
10470 * NULL, but the filename parameter can be
10471 */
10472void
10473xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10474 const char* filename)
10475{
10476 xmlParserInputPtr input;
10477
10478 input = xmlNewInputStream(ctxt);
10479 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010480 xmlGenericError(xmlGenericErrorContext,
10481 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010482 xmlFree(ctxt);
10483 return;
10484 }
10485
10486 xmlClearParserCtxt(ctxt);
10487 if (filename != NULL)
10488 input->filename = xmlMemStrdup(filename);
10489 input->base = buffer;
10490 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010491 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010492 inputPush(ctxt, input);
10493}
10494
10495/**
10496 * xmlSAXUserParseFile:
10497 * @sax: a SAX handler
10498 * @user_data: The user data returned on SAX callbacks
10499 * @filename: a file name
10500 *
10501 * parse an XML file and call the given SAX handler routines.
10502 * Automatic support for ZLIB/Compress compressed document is provided
10503 *
10504 * Returns 0 in case of success or a error number otherwise
10505 */
10506int
10507xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10508 const char *filename) {
10509 int ret = 0;
10510 xmlParserCtxtPtr ctxt;
10511
10512 ctxt = xmlCreateFileParserCtxt(filename);
10513 if (ctxt == NULL) return -1;
10514 if (ctxt->sax != &xmlDefaultSAXHandler)
10515 xmlFree(ctxt->sax);
10516 ctxt->sax = sax;
10517 if (user_data != NULL)
10518 ctxt->userData = user_data;
10519
10520 xmlParseDocument(ctxt);
10521
10522 if (ctxt->wellFormed)
10523 ret = 0;
10524 else {
10525 if (ctxt->errNo != 0)
10526 ret = ctxt->errNo;
10527 else
10528 ret = -1;
10529 }
10530 if (sax != NULL)
10531 ctxt->sax = NULL;
10532 xmlFreeParserCtxt(ctxt);
10533
10534 return ret;
10535}
10536
10537/************************************************************************
10538 * *
10539 * Front ends when parsing from memory *
10540 * *
10541 ************************************************************************/
10542
10543/**
10544 * xmlCreateMemoryParserCtxt:
10545 * @buffer: a pointer to a char array
10546 * @size: the size of the array
10547 *
10548 * Create a parser context for an XML in-memory document.
10549 *
10550 * Returns the new parser context or NULL
10551 */
10552xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010553xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010554 xmlParserCtxtPtr ctxt;
10555 xmlParserInputPtr input;
10556 xmlParserInputBufferPtr buf;
10557
10558 if (buffer == NULL)
10559 return(NULL);
10560 if (size <= 0)
10561 return(NULL);
10562
10563 ctxt = xmlNewParserCtxt();
10564 if (ctxt == NULL)
10565 return(NULL);
10566
10567 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010568 if (buf == NULL) {
10569 xmlFreeParserCtxt(ctxt);
10570 return(NULL);
10571 }
Owen Taylor3473f882001-02-23 17:55:21 +000010572
10573 input = xmlNewInputStream(ctxt);
10574 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010575 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010576 xmlFreeParserCtxt(ctxt);
10577 return(NULL);
10578 }
10579
10580 input->filename = NULL;
10581 input->buf = buf;
10582 input->base = input->buf->buffer->content;
10583 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010584 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010585
10586 inputPush(ctxt, input);
10587 return(ctxt);
10588}
10589
10590/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010591 * xmlSAXParseMemoryWithData:
10592 * @sax: the SAX handler block
10593 * @buffer: an pointer to a char array
10594 * @size: the size of the array
10595 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10596 * documents
10597 * @data: the userdata
10598 *
10599 * parse an XML in-memory block and use the given SAX function block
10600 * to handle the parsing callback. If sax is NULL, fallback to the default
10601 * DOM tree building routines.
10602 *
10603 * User data (void *) is stored within the parser context in the
10604 * context's _private member, so it is available nearly everywhere in libxml
10605 *
10606 * Returns the resulting document tree
10607 */
10608
10609xmlDocPtr
10610xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10611 int size, int recovery, void *data) {
10612 xmlDocPtr ret;
10613 xmlParserCtxtPtr ctxt;
10614
10615 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10616 if (ctxt == NULL) return(NULL);
10617 if (sax != NULL) {
10618 if (ctxt->sax != NULL)
10619 xmlFree(ctxt->sax);
10620 ctxt->sax = sax;
10621 }
10622 if (data!=NULL) {
10623 ctxt->_private=data;
10624 }
10625
10626 xmlParseDocument(ctxt);
10627
10628 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10629 else {
10630 ret = NULL;
10631 xmlFreeDoc(ctxt->myDoc);
10632 ctxt->myDoc = NULL;
10633 }
10634 if (sax != NULL)
10635 ctxt->sax = NULL;
10636 xmlFreeParserCtxt(ctxt);
10637
10638 return(ret);
10639}
10640
10641/**
Owen Taylor3473f882001-02-23 17:55:21 +000010642 * xmlSAXParseMemory:
10643 * @sax: the SAX handler block
10644 * @buffer: an pointer to a char array
10645 * @size: the size of the array
10646 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10647 * documents
10648 *
10649 * parse an XML in-memory block and use the given SAX function block
10650 * to handle the parsing callback. If sax is NULL, fallback to the default
10651 * DOM tree building routines.
10652 *
10653 * Returns the resulting document tree
10654 */
10655xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010656xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10657 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010658 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010659}
10660
10661/**
10662 * xmlParseMemory:
10663 * @buffer: an pointer to a char array
10664 * @size: the size of the array
10665 *
10666 * parse an XML in-memory block and build a tree.
10667 *
10668 * Returns the resulting document tree
10669 */
10670
Daniel Veillard50822cb2001-07-26 20:05:51 +000010671xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010672 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10673}
10674
10675/**
10676 * xmlRecoverMemory:
10677 * @buffer: an pointer to a char array
10678 * @size: the size of the array
10679 *
10680 * parse an XML in-memory block and build a tree.
10681 * In the case the document is not Well Formed, a tree is built anyway
10682 *
10683 * Returns the resulting document tree
10684 */
10685
Daniel Veillard50822cb2001-07-26 20:05:51 +000010686xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010687 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10688}
10689
10690/**
10691 * xmlSAXUserParseMemory:
10692 * @sax: a SAX handler
10693 * @user_data: The user data returned on SAX callbacks
10694 * @buffer: an in-memory XML document input
10695 * @size: the length of the XML document in bytes
10696 *
10697 * A better SAX parsing routine.
10698 * parse an XML in-memory buffer and call the given SAX handler routines.
10699 *
10700 * Returns 0 in case of success or a error number otherwise
10701 */
10702int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010703 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010704 int ret = 0;
10705 xmlParserCtxtPtr ctxt;
10706 xmlSAXHandlerPtr oldsax = NULL;
10707
Daniel Veillard9e923512002-08-14 08:48:52 +000010708 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010709 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10710 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010711 oldsax = ctxt->sax;
10712 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010713 if (user_data != NULL)
10714 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010715
10716 xmlParseDocument(ctxt);
10717
10718 if (ctxt->wellFormed)
10719 ret = 0;
10720 else {
10721 if (ctxt->errNo != 0)
10722 ret = ctxt->errNo;
10723 else
10724 ret = -1;
10725 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010726 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010727 xmlFreeParserCtxt(ctxt);
10728
10729 return ret;
10730}
10731
10732/**
10733 * xmlCreateDocParserCtxt:
10734 * @cur: a pointer to an array of xmlChar
10735 *
10736 * Creates a parser context for an XML in-memory document.
10737 *
10738 * Returns the new parser context or NULL
10739 */
10740xmlParserCtxtPtr
10741xmlCreateDocParserCtxt(xmlChar *cur) {
10742 int len;
10743
10744 if (cur == NULL)
10745 return(NULL);
10746 len = xmlStrlen(cur);
10747 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10748}
10749
10750/**
10751 * xmlSAXParseDoc:
10752 * @sax: the SAX handler block
10753 * @cur: a pointer to an array of xmlChar
10754 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10755 * documents
10756 *
10757 * parse an XML in-memory document and build a tree.
10758 * It use the given SAX function block to handle the parsing callback.
10759 * If sax is NULL, fallback to the default DOM tree building routines.
10760 *
10761 * Returns the resulting document tree
10762 */
10763
10764xmlDocPtr
10765xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10766 xmlDocPtr ret;
10767 xmlParserCtxtPtr ctxt;
10768
10769 if (cur == NULL) return(NULL);
10770
10771
10772 ctxt = xmlCreateDocParserCtxt(cur);
10773 if (ctxt == NULL) return(NULL);
10774 if (sax != NULL) {
10775 ctxt->sax = sax;
10776 ctxt->userData = NULL;
10777 }
10778
10779 xmlParseDocument(ctxt);
10780 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10781 else {
10782 ret = NULL;
10783 xmlFreeDoc(ctxt->myDoc);
10784 ctxt->myDoc = NULL;
10785 }
10786 if (sax != NULL)
10787 ctxt->sax = NULL;
10788 xmlFreeParserCtxt(ctxt);
10789
10790 return(ret);
10791}
10792
10793/**
10794 * xmlParseDoc:
10795 * @cur: a pointer to an array of xmlChar
10796 *
10797 * parse an XML in-memory document and build a tree.
10798 *
10799 * Returns the resulting document tree
10800 */
10801
10802xmlDocPtr
10803xmlParseDoc(xmlChar *cur) {
10804 return(xmlSAXParseDoc(NULL, cur, 0));
10805}
10806
Daniel Veillard8107a222002-01-13 14:10:10 +000010807/************************************************************************
10808 * *
10809 * Specific function to keep track of entities references *
10810 * and used by the XSLT debugger *
10811 * *
10812 ************************************************************************/
10813
10814static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10815
10816/**
10817 * xmlAddEntityReference:
10818 * @ent : A valid entity
10819 * @firstNode : A valid first node for children of entity
10820 * @lastNode : A valid last node of children entity
10821 *
10822 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10823 */
10824static void
10825xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10826 xmlNodePtr lastNode)
10827{
10828 if (xmlEntityRefFunc != NULL) {
10829 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10830 }
10831}
10832
10833
10834/**
10835 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000010836 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000010837 *
10838 * Set the function to call call back when a xml reference has been made
10839 */
10840void
10841xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10842{
10843 xmlEntityRefFunc = func;
10844}
Owen Taylor3473f882001-02-23 17:55:21 +000010845
10846/************************************************************************
10847 * *
10848 * Miscellaneous *
10849 * *
10850 ************************************************************************/
10851
10852#ifdef LIBXML_XPATH_ENABLED
10853#include <libxml/xpath.h>
10854#endif
10855
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010856extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010857static int xmlParserInitialized = 0;
10858
10859/**
10860 * xmlInitParser:
10861 *
10862 * Initialization function for the XML parser.
10863 * This is not reentrant. Call once before processing in case of
10864 * use in multithreaded programs.
10865 */
10866
10867void
10868xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010869 if (xmlParserInitialized != 0)
10870 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010871
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010872 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10873 (xmlGenericError == NULL))
10874 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010875 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010876 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010877 xmlInitCharEncodingHandlers();
10878 xmlInitializePredefinedEntities();
10879 xmlDefaultSAXHandlerInit();
10880 xmlRegisterDefaultInputCallbacks();
10881 xmlRegisterDefaultOutputCallbacks();
10882#ifdef LIBXML_HTML_ENABLED
10883 htmlInitAutoClose();
10884 htmlDefaultSAXHandlerInit();
10885#endif
10886#ifdef LIBXML_XPATH_ENABLED
10887 xmlXPathInit();
10888#endif
10889 xmlParserInitialized = 1;
10890}
10891
10892/**
10893 * xmlCleanupParser:
10894 *
10895 * Cleanup function for the XML parser. It tries to reclaim all
10896 * parsing related global memory allocated for the parser processing.
10897 * It doesn't deallocate any document related memory. Calling this
10898 * function should not prevent reusing the parser.
10899 */
10900
10901void
10902xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010903 xmlCleanupCharEncodingHandlers();
10904 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010905#ifdef LIBXML_CATALOG_ENABLED
10906 xmlCatalogCleanup();
10907#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010908 xmlCleanupThreads();
10909 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010910}